import { FileInfo, ScraperConfig, ScraperResponse, ScraperError, DownloadProgress, ProgressCallback, DownloadResult, TeraboxApiFileResponse, bytesToHuman } from '../types'; import { Logger } from '../utils/Logger'; import { HttpClient } from '../utils/HttpClient'; import { DataParser } from '../utils/DataParser'; import * as fs from 'fs'; import * as path from 'path'; import { Writable } from 'stream'; const logger = Logger.getInstance().getLogger(); /** * The core class for the Terabox Downloader and Scraper library. * It handles the entire process: link parsing, API interaction, and file downloading. */ export class TeraboxScraper { private config: ScraperConfig; private httpClient: HttpClient; private readonly TERABOX_DOMAIN = 'www.terabox.app'; private readonly API_INFO_URL = 'https://www.terabox.app/share/list?app_id=250528&web=1&channel=share'; /** * Initializes the TeraboxScraper with configuration. * @param config The configuration object including the required Terabox cookie. */ constructor(config: ScraperConfig) { if (!config.cookie || !config.cookie.includes('ndus=')) { logger.warn('Configuration missing or invalid Terabox cookie (ndus). Operations may fail.'); } this.config = { ...config, userAgent: config.userAgent || 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36 XbibzScraper/1.0.0', }; this.httpClient = new HttpClient(this.config.proxy, this.config.userAgent); logger.info({ cookiePresent: !!config.cookie, proxy: !!config.proxy }, 'TeraboxScraper initialized.'); } /** * Standardized error creation utility. * @param code Error code. * @param message Error message. * @param originalError Optional original error object. * @returns A ScraperResponse with success: false and the error details. */ private createErrorResponse(code: string, message: string, originalError?: any): ScraperResponse { const error: ScraperError = { code, message, originalError }; logger.error({ originalError, code, message }, `Operation failed: [${code}] ${message}`); return { success: false, error }; } /** * Extracts the share key (surl) from a Terabox share link. * @param shareLink The full Terabox share URL. * @returns The share key (surl) or null if invalid. */ private extractShareKey(shareLink: string): string | null { try { const url = new URL(shareLink); if (url.hostname.includes(this.TERABOX_DOMAIN) && url.pathname.startsWith('/s/')) { // Pathname is typically /s/{surl} const surl = url.pathname.split('/s/')[1]; if (surl) { return surl; } } return null; } catch (e) { logger.warn({ shareLink }, 'Invalid share link format provided.'); return null; } } /** * Fetches file information and the direct download link from a Terabox share URL. * This is a two-step process: * 1. Scrape the initial share page to get hidden parameters (shareid, uk, sign, timestamp). * 2. Use the parameters to call the internal Terabox API for file details and the download link. * @param shareLink The Terabox share link (e.g., https://www.terabox.app/s/...). * @returns A promise that resolves to a ScraperResponse containing FileInfo or an error. */ public async getFileInfo(shareLink: string): Promise> { const surl = this.extractShareKey(shareLink); if (!surl) { return this.createErrorResponse('E_INVALID_LINK', 'The provided link is not a valid Terabox share URL.'); } logger.info({ surl }, `Starting file info retrieval for SURL: ${surl}`); // --- Step 1: Scrape the share page for hidden parameters --- try { const sharePageUrl = `https://${this.TERABOX_DOMAIN}/s/${surl}`; const htmlResponse = await this.httpClient.get(sharePageUrl, { headers: { 'Cookie': this.config.cookie } }); const params = DataParser.extractShareParams(htmlResponse.data); if (!params) { return this.createErrorResponse('E_PARSING_FAIL', 'Failed to extract necessary parameters from the share page. Cookie might be invalid or page structure changed.'); } const fsId = DataParser.extractFsId(params.fileList); if (!fsId) { return this.createErrorResponse('E_FSID_NOT_FOUND', 'Could not find a valid file ID (fs_id) in the extracted data.'); } // We only need the first file's metadata for single file download const fileMetadata = params.fileList[0]; // --- Step 2: Call the internal API for the direct download link --- const apiParams = new URLSearchParams({ shareid: params.shareId, uk: params.uk, sign: params.sign, timestamp: params.timestamp, fid_list: `[${fsId}]`, channel: 'share', web: '1', app_id: '250528', operation: 'download' // This is the key operation parameter }).toString(); const apiResponse = await this.httpClient.get(`${this.API_INFO_URL}&${apiParams}`, { headers: { 'Cookie': this.config.cookie } }); const apiData = apiResponse.data; if (apiData.errno !== 0) { return this.createErrorResponse('E_API_ERROR', `Terabox API returned an error: ${apiData.errmsg || 'Unknown API Error'}`, apiData); } const fileData = apiData.list?.[0]; if (!fileData || !fileData.dlink) { return this.createErrorResponse('E_DLINK_NOT_FOUND', 'Direct download link (dlink) not found in the API response. File might be too large or restricted.'); } const fileInfo: FileInfo = { shareLink, downloadLink: fileData.dlink, fileName: fileData.server_filename || fileMetadata.server_filename || 'unknown_file', fileSize: fileData.size || fileMetadata.size || 0, fileSizeHuman: bytesToHuman(fileData.size || fileMetadata.size || 0), thumbnailUrl: fileData.thumbs?.url1 || null, fsId: fileData.fs_id.toString(), shareTime: fileData.share_time || fileMetadata.share_time || 0, }; logger.info({ fileName: fileInfo.fileName, size: fileInfo.fileSizeHuman }, 'Successfully retrieved file information and direct link.'); return { success: true, data: fileInfo }; } catch (e: any) { if (e.response && e.response.status === 404) { return this.createErrorResponse('E_NOT_FOUND', 'The Terabox share link does not exist or has been removed.'); } return this.createErrorResponse('E_NETWORK_FAIL', `A network or unexpected error occurred: ${e.message}`, e.message || e); } } /** * Downloads a file using the FileInfo object. * Implements robust progress tracking and stream-based downloading for efficiency. * @param fileInfo The FileInfo object containing the downloadLink. * @param savePath The local path to save the file (directory or full file path). * @param progressCallback Optional function to track download progress. * @returns A promise that resolves to a ScraperResponse containing DownloadResult or an error. */ public async downloadFile( fileInfo: FileInfo, savePath: string, progressCallback?: ProgressCallback ): Promise> { if (!fileInfo.downloadLink) { return this.createErrorResponse('E_MISSING_DLINK', 'FileInfo object is missing the direct download link.'); } const finalPath = path.isAbsolute(savePath) && path.extname(savePath) !== '' ? savePath : path.join(savePath, fileInfo.fileName); const dir = path.dirname(finalPath); try { // Ensure the directory exists await fs.promises.mkdir(dir, { recursive: true }); logger.info({ fileName: fileInfo.fileName, finalPath }, `Starting download for ${fileInfo.fileName} to ${finalPath}`); const startTime = Date.now(); let downloadedBytes = 0; let lastTime = startTime; let lastDownloaded = 0; const response = await this.httpClient.get(fileInfo.downloadLink, { responseType: 'stream', headers: { // Terabox might require the cookie again for the direct download link 'Cookie': this.config.cookie, // Set a referrer to mimic a browser download 'Referer': `https://${this.TERABOX_DOMAIN}/s/${this.extractShareKey(fileInfo.shareLink)}` } }); const totalSize = parseInt(response.headers['content-length'], 10) || fileInfo.fileSize; if (totalSize === 0) { logger.warn('Content-Length header is missing or zero. Using estimated file size.'); } const writer = fs.createWriteStream(finalPath); // Pipe the response stream to the file writer stream response.data.pipe(writer); // Progress tracking logic response.data.on('data', (chunk: Buffer) => { downloadedBytes += chunk.length; if (progressCallback) { const currentTime = Date.now(); const timeElapsed = (currentTime - lastTime) / 1000; // in seconds const bytesSinceLast = downloadedBytes - lastDownloaded; let speedBps = 0; if (timeElapsed > 0) { speedBps = bytesSinceLast / timeElapsed; } const remainingBytes = totalSize - downloadedBytes; const etaSeconds = speedBps > 0 ? remainingBytes / speedBps : Infinity; const progress: DownloadProgress = { totalSize, downloaded: downloadedBytes, percentage: totalSize > 0 ? (downloadedBytes / totalSize) * 100 : 0, speedBps, etaSeconds, }; progressCallback(progress); // Update tracking variables every second (or more frequently) if (currentTime - lastTime >= 1000) { lastTime = currentTime; lastDownloaded = downloadedBytes; } } }); // Wait for the download to finish await new Promise((resolve, reject) => { writer.on('finish', () => { const durationMs = Date.now() - startTime; logger.info({ durationMs }, `Download completed in ${durationMs / 1000}s.`); resolve(); }); writer.on('error', (err) => { logger.error({ error: err }, 'File write stream error.'); reject(err); }); response.data.on('error', (err: any) => { logger.error({ error: err }, 'Download stream error.'); reject(err); }); }); const result: DownloadResult = { filePath: finalPath, durationMs: Date.now() - startTime, }; return { success: true, data: result }; } catch (e: any) { // Clean up the partially downloaded file if an error occurred if (fs.existsSync(finalPath)) { await fs.promises.unlink(finalPath).catch(() => logger.warn('Failed to delete partial file.')); } return this.createErrorResponse('E_DOWNLOAD_FAIL', `Download failed: ${e.message}`, e.message || e); } } }