import React, { useState, useRef, useEffect } from 'react'; import cx from 'classnames'; import Spin from '../../ui/Spin'; import { DocumentIcon } from '../../icons/Document'; import Modal from '../../ui/Modal'; import { useTranslation } from 'react-i18next'; // Types type PreviewFile = { name: string; id: string; content: string; type: 'document'; previewUrl?: string; uploaded?: boolean; error?: boolean; }; // Constants const PDF_JS_VERSION = '3.11.174'; const WORKER_URL = `https://cdnjs.cloudflare.com/ajax/libs/pdf.js/${PDF_JS_VERSION}/pdf.worker.min.js`; const PDF_JS_URL = `https://cdnjs.cloudflare.com/ajax/libs/pdf.js/${PDF_JS_VERSION}/pdf.min.js`; const XLSX_URL = 'https://cdn.sheetjs.com/xlsx-0.20.0/package/dist/xlsx.full.min.js'; // Add type definitions for external libraries declare global { interface Window { pdfjsLib: any; XLSX: any; } } // Props interface interface UploadDocumentsProps { setDocumentPreviewFiles: ( files: { name: string; id: string; content: string; mimeType: string }[] ) => void; maxDocuments?: number; documentPreviewFiles: any; onLoadingChange?: (loading: boolean) => void; onDocumentError?: (error: { message: string; severity: 'error' | 'warning' | 'info'; }) => void; onValidateFile?: (file: File) => boolean; onValidatePayloadSize?: ( newDocuments: { name: string; id: string; content: string; mimeType: string; }[] ) => boolean | { valid: boolean; message?: string }; /** Per-document content character limit. */ maxDocumentContentLength?: number; } const UploadDocuments: React.FC = ({ setDocumentPreviewFiles, maxDocuments, documentPreviewFiles, onLoadingChange, onDocumentError, onValidateFile, onValidatePayloadSize, maxDocumentContentLength = 300000, }) => { const { t } = useTranslation(); // State const [isLoading, setIsLoading] = useState(false); const [selectedFile, setSelectedFile] = useState(null); // Refs const documentInputRef = useRef(null); // Update loading state in parent component useEffect(() => { if (onLoadingChange) { onLoadingChange(isLoading); } }, [isLoading, onLoadingChange]); // Document upload const validateDocumentFile = (file: File): boolean => { if (onValidateFile) { return onValidateFile(file); } return true; }; // Validate total payload size (returns result object for conditional error display) const validatePayloadSize = ( newDocuments: { name: string; id: string; content: string; mimeType: string; }[] ): { valid: boolean; message?: string } => { if (onValidatePayloadSize) { const result = onValidatePayloadSize(newDocuments); if (typeof result === 'boolean') { return result ? { valid: true } : { valid: false, message: '' }; } return result; } return { valid: true }; }; const extractTextFromPDF = async (file: File): Promise => { try { // Load PDF.js if not already loaded if (!window.pdfjsLib) { await new Promise((resolve, reject) => { const script = document.createElement('script'); script.src = PDF_JS_URL; script.onload = () => { window.pdfjsLib.GlobalWorkerOptions.workerSrc = WORKER_URL; resolve(true); }; script.onerror = reject; document.head.appendChild(script); }); } // Extract text from PDF const arrayBuffer = await file.arrayBuffer(); const pdf = await window.pdfjsLib.getDocument({ data: arrayBuffer }) .promise; let text = ''; // Iterate through each page and extract text for (let i = 1; i <= pdf.numPages; i++) { const page = await pdf.getPage(i); const content = await page.getTextContent(); const pageText = content.items .filter((item: any) => item.str && typeof item.str === 'string') .map((item: any) => item.str) .join(' '); text += pageText + '\n'; } return text; } catch (error) { console.error('PDF extraction failed:', error); throw new Error( `PDF extraction failed: ${ error instanceof Error ? error.message : 'Unknown error' }` ); } }; const extractTextFromXLSX = async (file: File): Promise => { try { if (!window.XLSX) { await new Promise((resolve, reject) => { const script = document.createElement('script'); script.src = XLSX_URL; script.onload = resolve; script.onerror = reject; document.head.appendChild(script); }); } const arrayBuffer = await file.arrayBuffer(); const workbook = window.XLSX.read(arrayBuffer, { type: 'array', cellFormula: true, cellNF: true, cellText: true, cellDates: true, }); let text = ''; for (const sheetName of workbook.SheetNames) { const worksheet = workbook.Sheets[sheetName]; const data = window.XLSX.utils.sheet_to_json(worksheet, { header: 1, raw: false, }); const colWidths = data.reduce((widths: number[], row: any[]) => { row.forEach((cell, i) => { const cellWidth = (cell || '').toString().length; widths[i] = Math.max(widths[i] || 0, cellWidth); }); return widths; }, []); const formattedText = data.map((row: any[]) => { return row .map((cell, i) => { const cellStr = (cell || '').toString(); return cellStr.padEnd(colWidths[i] + 2); }) .join('|') .trim(); }); if (formattedText.length > 0) { const separator = colWidths .map((w: number) => '-'.repeat(w + 2)) .join('+'); formattedText.splice(1, 0, separator); } text += `Sheet: ${sheetName}\n${formattedText.join('\n')}\n\n`; } return text; } catch (error) { console.error('XLSX extraction failed:', error); throw new Error( `XLSX extraction failed: ${ error instanceof Error ? error.message : 'Unknown error' }` ); } }; const processDocumentFile = async ( file: File ): Promise<{ text: string | null; wasTruncated: boolean }> => { const fileExt = file.name.split('.').pop()?.toLowerCase() || ''; try { let text: string | null = null; if (fileExt === 'pdf') { text = await extractTextFromPDF(file); } else if (['txt', 'md', 'json', 'csv', 'html'].includes(fileExt)) { text = await file.text(); } else if (fileExt === 'xlsx') { text = await extractTextFromXLSX(file); } const perDocumentLimit = maxDocumentContentLength; let wasTruncated = false; if (text && text.length > perDocumentLimit) { console.warn( 'Document content exceeds length limit:', text.length, '>', perDocumentLimit ); wasTruncated = true; text = text.substring(0, perDocumentLimit) + '\n\n[Content truncated due to size limits]'; } return { text, wasTruncated }; } catch (error) { console.error('Document processing failed:', error); throw new Error( `Failed to process "${file.name}": ${ error instanceof Error ? error.message : 'Unknown error' }` ); } }; const handleDocumentUpload = async ( e: React.ChangeEvent ) => { const files = Array.from(e.target.files || []); if (files.length === 0) return; // Check current total media count (images + documents) const currentMediaCount = documentPreviewFiles.length; const remainingSlots = maxDocuments ? Math.max(0, maxDocuments - currentMediaCount) : files.length; const filesToProcess = files.slice(0, remainingSlots); if (files.length > filesToProcess.length) { const skipped = files.length - filesToProcess.length; onDocumentError?.({ message: t('upload.documentsNotAddedMaxAllowed', { count: skipped, max: maxDocuments ?? 10, defaultValue: `${skipped} document(s) not added (maximum ${maxDocuments ?? 10} files allowed).`, }) ?? `${skipped} document(s) not added (maximum ${maxDocuments ?? 10} files allowed).`, severity: 'warning', }); } if (filesToProcess.length === 0) { if (documentInputRef.current) { documentInputRef.current.value = ''; } return; } setIsLoading(true); // Process each file const processedFiles: { name: string; id: string; content: string; mimeType: string; }[] = []; let hadTruncation = false; for (const file of filesToProcess) { if (!validateDocumentFile(file)) { continue; } const fileId = Math.random().toString(36).substr(2, 9); try { const { text, wasTruncated } = await processDocumentFile(file); if (wasTruncated) hadTruncation = true; if (text) { processedFiles.push({ name: file.name, id: fileId, content: text, mimeType: file.type, }); } } catch (error) { console.error('File processing error:', error); onDocumentError?.({ message: `${ error instanceof Error ? error.message : 'Unknown error' }`, severity: 'warning', }); } } // Add new documents to existing ones (only those that fit within payload) if (processedFiles.length > 0) { const existingDocuments = documentPreviewFiles.filter( (file: any) => file.type === 'document' ); const existingImages = documentPreviewFiles.filter( (file: any) => file.type === 'image' ); setDocumentPreviewFiles([ ...existingDocuments, ...processedFiles.map(file => ({ ...file, type: 'document', })), ]); } setIsLoading(false); if (documentInputRef.current) { documentInputRef.current.value = ''; } }; return (
{/* Hidden file input */} {/* Upload document button */} {/* Modal */} setSelectedFile(null)} closable title={selectedFile?.name} >
{selectedFile?.content}
); }; export default UploadDocuments;