import React, { useState, useRef } from 'react'; import cx from 'classnames'; import Spin from '../../ui/Spin'; import { DocumentIcon } from '../../icons/Document'; import Modal from '../../ui/Modal'; import { useTranslation } from 'react-i18next'; import memoriApiClient from '@memori.ai/memori-api-client'; // Types type PreviewFile = { name: string; id: string; content: string; type: 'document'; previewUrl?: string; uploaded?: boolean; error?: boolean; }; // Constants const PDF_JS_VERSION = '3.11.174'; const WORKER_URL = `https://cdnjs.cloudflare.com/ajax/libs/pdf.js/${PDF_JS_VERSION}/pdf.worker.min.js`; const PDF_JS_URL = `https://cdnjs.cloudflare.com/ajax/libs/pdf.js/${PDF_JS_VERSION}/pdf.min.js`; const XLSX_URL = 'https://cdn.sheetjs.com/xlsx-0.20.0/package/dist/xlsx.full.min.js'; // Add type definitions for external libraries declare global { interface Window { pdfjsLib: any; XLSX: any; } } // Props interface interface UploadDocumentsProps { setDocumentPreviewFiles: ( files: { name: string; id: string; content: string; mimeType: string; textAssetUrl?: string; }[] ) => void; authToken?: string; client?: ReturnType; sessionID?: string; memoriID?: string; maxDocuments?: number; documentPreviewFiles: any; onLoadingChange?: (loading: boolean, fileCount?: number) => void; onDocumentError?: (error: { message: string; severity: 'error' | 'warning' | 'info'; }) => void; onValidateFile?: (file: File) => boolean; onValidatePayloadSize?: ( newDocuments: { name: string; id: string; content: string; mimeType: string; }[] ) => boolean | { valid: boolean; message?: string }; } const UploadDocuments: React.FC = ({ setDocumentPreviewFiles, authToken = '', client, sessionID = '', memoriID = '', maxDocuments, documentPreviewFiles, onLoadingChange, onDocumentError, onValidateFile, onValidatePayloadSize, }) => { const { t } = useTranslation(); const { backend } = client || { backend: { uploadAsset: null, uploadAssetUnlogged: null }, }; // State const [isLoading, setIsLoading] = useState(false); const [selectedFile, setSelectedFile] = useState(null); // Refs const documentInputRef = useRef(null); const setLoadingState = (loading: boolean, fileCount?: number) => { setIsLoading(loading); onLoadingChange?.(loading, fileCount); }; // Document upload const validateDocumentFile = (file: File): boolean => { if (onValidateFile) { return onValidateFile(file); } return true; }; // Validate total payload size (returns result object for conditional error display) const validatePayloadSize = ( newDocuments: { name: string; id: string; content: string; mimeType: string; }[] ): { valid: boolean; message?: string } => { if (onValidatePayloadSize) { const result = onValidatePayloadSize(newDocuments); if (typeof result === 'boolean') { return result ? { valid: true } : { valid: false, message: '' }; } return result; } return { valid: true }; }; const extractTextFromPDF = async (file: File): Promise => { try { // Load PDF.js if not already loaded if (!window.pdfjsLib) { await new Promise((resolve, reject) => { const script = document.createElement('script'); script.src = PDF_JS_URL; script.onload = () => { window.pdfjsLib.GlobalWorkerOptions.workerSrc = WORKER_URL; resolve(true); }; script.onerror = reject; document.head.appendChild(script); }); } // Extract text from PDF const arrayBuffer = await file.arrayBuffer(); const pdf = await window.pdfjsLib.getDocument({ data: arrayBuffer }) .promise; let text = ''; // Iterate through each page and extract text for (let i = 1; i <= pdf.numPages; i++) { const page = await pdf.getPage(i); const content = await page.getTextContent(); const pageText = content.items .filter((item: any) => item.str && typeof item.str === 'string') .map((item: any) => item.str) .join(' '); text += pageText + '\n'; } return text; } catch (error) { console.error('PDF extraction failed:', error); throw new Error( `PDF extraction failed: ${ error instanceof Error ? error.message : 'Unknown error' }` ); } }; const extractTextFromXLSX = async (file: File): Promise => { try { if (!window.XLSX) { await new Promise((resolve, reject) => { const script = document.createElement('script'); script.src = XLSX_URL; script.onload = resolve; script.onerror = reject; document.head.appendChild(script); }); } const arrayBuffer = await file.arrayBuffer(); const workbook = window.XLSX.read(arrayBuffer, { type: 'array', cellFormula: true, cellNF: true, cellText: true, cellDates: true, }); let text = ''; for (const sheetName of workbook.SheetNames) { const worksheet = workbook.Sheets[sheetName]; const data = window.XLSX.utils.sheet_to_json(worksheet, { header: 1, raw: false, }); const colWidths = data.reduce((widths: number[], row: any[]) => { row.forEach((cell, i) => { const cellWidth = (cell || '').toString().length; widths[i] = Math.max(widths[i] || 0, cellWidth); }); return widths; }, []); const formattedText = data.map((row: any[]) => { return row .map((cell, i) => { const cellStr = (cell || '').toString(); return cellStr.padEnd(colWidths[i] + 2); }) .join('|') .trim(); }); if (formattedText.length > 0) { const separator = colWidths .map((w: number) => '-'.repeat(w + 2)) .join('+'); formattedText.splice(1, 0, separator); } text += `Sheet: ${sheetName}\n${formattedText.join('\n')}\n\n`; } return text; } catch (error) { console.error('XLSX extraction failed:', error); throw new Error( `XLSX extraction failed: ${ error instanceof Error ? error.message : 'Unknown error' }` ); } }; const processDocumentFile = async ( file: File ): Promise<{ text: string | null }> => { const fileExt = file.name.split('.').pop()?.toLowerCase() || ''; try { let text: string | null = null; if (fileExt === 'pdf') { text = await extractTextFromPDF(file); } else if (['txt', 'md', 'json', 'csv', 'html'].includes(fileExt)) { text = await file.text(); } else if (fileExt === 'xlsx') { text = await extractTextFromXLSX(file); } return { text }; } catch (error) { console.error('Document processing failed:', error); throw new Error( `Failed to process "${file.name}": ${ error instanceof Error ? error.message : 'Unknown error' }` ); } }; const fileToDataUrl = (file: File): Promise => new Promise((resolve, reject) => { const reader = new FileReader(); reader.onload = e => resolve((e.target?.result as string) || ''); reader.onerror = () => reject(new Error('File reading failed')); reader.readAsDataURL(file); }); const uploadAssetFile = async (file: File): Promise => { if (!client) { throw new Error('API client not configured properly for media upload'); } const fileDataUrl = await fileToDataUrl(file); let response: any; if (authToken && backend?.uploadAsset) { response = await backend.uploadAsset(file.name, fileDataUrl, authToken); } else if (memoriID && sessionID && backend?.uploadAssetUnlogged) { response = await backend.uploadAssetUnlogged( file.name, fileDataUrl, memoriID, sessionID ); } else { throw new Error('Missing required parameters for upload'); } if (!response) { throw new Error('Upload failed'); } if (response.resultCode !== 0) { throw new Error(response.resultMessage || 'Upload failed'); } return response.asset?.assetURL; }; const handleDocumentUpload = async ( e: React.ChangeEvent ) => { const files = Array.from(e.target.files || []); if (files.length === 0) return; // Check current total media count (images + documents) const currentMediaCount = documentPreviewFiles.length; const remainingSlots = maxDocuments ? Math.max(0, maxDocuments - currentMediaCount) : files.length; const filesToProcess = files.slice(0, remainingSlots); if (files.length > filesToProcess.length) { const skipped = files.length - filesToProcess.length; onDocumentError?.({ message: t('upload.documentsNotAddedMaxAllowed', { count: skipped, max: maxDocuments ?? 10, defaultValue: `${skipped} document(s) not added (maximum ${ maxDocuments ?? 10 } files allowed).`, }) ?? `${skipped} document(s) not added (maximum ${ maxDocuments ?? 10 } files allowed).`, severity: 'warning', }); } if (filesToProcess.length === 0) { if (documentInputRef.current) { documentInputRef.current.value = ''; } return; } setLoadingState(true, filesToProcess.length); try { // Process each file const processedFiles: { name: string; id: string; content: string; mimeType: string; textAssetUrl?: string; }[] = []; let activeCount = filesToProcess.length; for (const file of filesToProcess) { if (!validateDocumentFile(file)) { activeCount--; onLoadingChange?.(true, activeCount); continue; } const fileId = Math.random().toString(36).substr(2, 9); try { const { text } = await processDocumentFile(file); if (text) { const baseName = file.name.replace(/\.[^/.]+$/, '') || file.name; const textFile = new File([text], `${baseName}.txt`, { type: 'text/plain', }); let textAssetUrl: string | undefined; try { textAssetUrl = await uploadAssetFile(textFile); } catch (uploadError) { console.error('Text asset upload failed:', uploadError); onDocumentError?.({ message: t('upload.partialAssetUploadWarning', { fileName: file.name, defaultValue: 'Some file links could not be uploaded, but the document was added anyway.', }), severity: 'warning', }); } processedFiles.push({ name: file.name, id: fileId, content: text, mimeType: file.type, textAssetUrl, }); } else { activeCount--; onLoadingChange?.(true, activeCount); } } catch (error) { activeCount--; onLoadingChange?.(true, activeCount); console.error('File processing error:', error); onDocumentError?.({ message: `${ error instanceof Error ? error.message : 'Unknown error' }`, severity: 'warning', }); } } if (processedFiles.length > 0) { setDocumentPreviewFiles( processedFiles.map(file => ({ ...file, type: 'document', })) ); } } finally { setLoadingState(false); if (documentInputRef.current) { documentInputRef.current.value = ''; } } }; return (
{/* Hidden file input */} {/* Upload document button */} {/* Modal */} setSelectedFile(null)} closable title={selectedFile?.name} >
{selectedFile?.content}
); }; export default UploadDocuments;