import * as cheerio from 'cheerio';
import { Logger } from './Logger';

const logger = Logger.getInstance().getLogger();

/**
 * Utility class for parsing HTML content to extract Terabox-specific data.
 * This is crucial for bypassing the initial share page protection.
 */
export class DataParser {

    /**
     * Extracts essential parameters (share ID, file list, etc.) from the Terabox share page HTML.
     * The parameters are usually embedded in a script tag as JSON data.
     * @param html The HTML content of the Terabox share page.
     * @returns An object containing the extracted parameters, or null if parsing fails.
     */
    public static extractShareParams(html: string): { shareId: string, uk: string, sign: string, timestamp: string, fileList: any[] } | null {
	        logger.debug('Starting HTML content parsing for share parameters...');
        try {
            const $ = cheerio.load(html);
            let shareDataScript: string | null = null;

            // Terabox embeds data in a script tag, usually containing 'file_list'
            $('script').each((i, element) => {
                const scriptContent = $(element).html();
                if (scriptContent && scriptContent.includes('file_list')) {
                    shareDataScript = scriptContent;
                    return false; // Break the loop
                }
            });

            if (!shareDataScript) {
                logger.warn('Could not find the script tag containing file_list data.');
                return null;
            }

            const scriptContent: string = shareDataScript;

            // Regex to find the JSON object containing the data
            // We look for a pattern like: "file_list":[{...}], "shareid":"...", "uk":"...", "sign":"...", "timestamp":"..."
            const fileListMatch = scriptContent.match(/"file_list":(\[.*?\])/);
            const shareIdMatch = scriptContent.match(/"shareid":"(.*?)"/);
            const ukMatch = scriptContent.match(/"uk":"(.*?)"/);
            const signMatch = scriptContent.match(/"sign":"(.*?)"/);
            const timestampMatch = scriptContent.match(/"timestamp":"(.*?)"/);

            if (fileListMatch && shareIdMatch && ukMatch && signMatch && timestampMatch) {
                const fileListJson = fileListMatch[1];
                const fileList = JSON.parse(fileListJson);

                const params = {
                    shareId: shareIdMatch[1],
                    uk: ukMatch[1],
                    sign: signMatch[1],
                    timestamp: timestampMatch[1],
                    fileList: fileList,
	                };
	
	                logger.info(params, 'Successfully extracted all share parameters.');
	                return params;
            }

	            logger.warn('Failed to extract all necessary parameters from the script content.');
            return null;

	        } catch (error: any) {
	            logger.error({ error: error.message || error }, 'Error during HTML parsing in DataParser.extractShareParams');
	            return null;
	        }
    }

    /**
     * Extracts the fs_id (File System ID) from the file list.
     * This is the ID needed for the direct download API call.
     * @param fileList The list of files extracted from the share page.
     * @returns The fs_id of the first file, or null if not found.
     */
    public static extractFsId(fileList: any[]): string | null {
        if (fileList && fileList.length > 0 && fileList[0].fs_id) {
            return fileList[0].fs_id.toString();
	        }
	        logger.warn('Could not extract fs_id from the file list.');
	        return null;
    }
}