///
import { spawn } from "child_process";
import { promises as fs } from "fs";
import os from "os";
import path from "path";
import { AdapterOutput } from "../pipeline";
import { ZipArchive } from "../lib/zip";
type RtfCandidate = {
filePath: string;
size: number;
priority: number;
};
async function listFiles(root: string): Promise {
const entries = await fs.readdir(root, { withFileTypes: true });
const files: string[] = [];
for (const entry of entries) {
const fullPath = path.join(root, entry.name);
if (entry.isDirectory()) {
files.push(...(await listFiles(fullPath)));
} else if (entry.isFile()) {
files.push(fullPath);
}
}
return files;
}
function scoreRtfCandidate(filePath: string): number {
const name = path.basename(filePath).toLowerCase();
if (name === "txt.rtf" || name === "text.rtf") {
return 3;
}
if (name.startsWith("txt") && name.endsWith(".rtf")) {
return 2;
}
return 1;
}
async function findPrimaryRtf(extractedPath: string): Promise {
const files = await listFiles(extractedPath);
const rtfFiles = files.filter((file) => path.extname(file).toLowerCase() === ".rtf");
if (rtfFiles.length === 0) {
return null;
}
const candidates: RtfCandidate[] = [];
for (const filePath of rtfFiles) {
const stats = await fs.stat(filePath);
candidates.push({
filePath,
size: stats.size,
priority: scoreRtfCandidate(filePath),
});
}
candidates.sort((a, b) => {
if (a.priority !== b.priority) {
return b.priority - a.priority;
}
return b.size - a.size;
});
return candidates[0];
}
function convertRtfFallback(rtf: string): string {
return rtf
.replace(/\r\n/g, "\n")
.replace(/\\par[d]?/g, "\n")
.replace(/\\'[0-9a-fA-F]{2}/g, "")
.replace(/\\[a-zA-Z]+\d* ?/g, "")
.replace(/[{}]/g, "")
.replace(/\n{3,}/g, "\n\n")
.trim();
}
async function convertWithNode(rtf: string): Promise {
try {
const module = await import("rtf2text");
const converter = (module as { default?: unknown }).default ?? module;
if (typeof converter === "function") {
const result = await Promise.resolve((converter as (input: string) => string | Promise)(rtf));
return result?.trim() ? result : null;
}
if (converter && typeof (converter as { fromString?: unknown }).fromString === "function") {
const result = await new Promise((resolve, reject) => {
(converter as { fromString: (input: string, cb: (err: Error | null, text?: string) => void) => void }).fromString(
rtf,
(error, text) => {
if (error) {
reject(error);
} else {
resolve(text ?? "");
}
},
);
});
return result.trim() ? result : null;
}
} catch {
return null;
}
return null;
}
async function runCommand(command: string, args: string[], input?: string): Promise {
return new Promise((resolve, reject) => {
const child = spawn(command, args);
let stdout = "";
let stderr = "";
child.stdout.on("data", (chunk) => {
stdout += chunk.toString();
});
child.stderr.on("data", (chunk) => {
stderr += chunk.toString();
});
child.on("error", (error) => {
reject(error);
});
child.on("close", (code) => {
if (code === 0) {
resolve(stdout);
} else {
reject(new Error(stderr || `${command} exited with code ${code}`));
}
});
if (input) {
child.stdin.write(input);
}
child.stdin.end();
});
}
async function convertWithPandoc(rtfPath: string): Promise {
try {
const result = await runCommand("pandoc", ["--from", "rtf", "--to", "plain", rtfPath]);
return result.trim() ? result : null;
} catch {
return null;
}
}
async function convertWithTextutil(rtfPath: string): Promise {
try {
const result = await runCommand("textutil", ["-convert", "txt", "-stdout", rtfPath]);
return result.trim() ? result : null;
} catch {
return null;
}
}
async function convertRtfToText(rtfPath: string): Promise {
const rtf = await fs.readFile(rtfPath, "utf-8");
const nodeResult = await convertWithNode(rtf);
if (nodeResult) {
return nodeResult;
}
const pandocResult = await convertWithPandoc(rtfPath);
if (pandocResult) {
return pandocResult;
}
const textutilResult = await convertWithTextutil(rtfPath);
if (textutilResult) {
return textutilResult;
}
return convertRtfFallback(rtf);
}
export async function readRtfdZip(filePath: string): Promise {
const extractedPath = await fs.mkdtemp(path.join(os.tmpdir(), "soustack-rtfd-"));
const zip = new ZipArchive(filePath);
const entries = zip.getEntries();
for (const entry of entries) {
const entryName = entry.entryName;
const resolvedPath = path.resolve(extractedPath, entryName);
if (resolvedPath !== extractedPath && !resolvedPath.startsWith(`${extractedPath}${path.sep}`)) {
throw new Error(`Blocked zip entry with invalid path: ${entryName}`);
}
if (entry.isDirectory) {
await fs.mkdir(resolvedPath, { recursive: true });
continue;
}
await fs.mkdir(path.dirname(resolvedPath), { recursive: true });
const data = entry.getData();
await fs.writeFile(resolvedPath, data);
}
const primaryRtf = await findPrimaryRtf(extractedPath);
if (!primaryRtf) {
throw new Error(`No .rtf files found in ${filePath}`);
}
const text = await convertRtfToText(primaryRtf.filePath);
return {
kind: "text",
text,
assets: [primaryRtf.filePath],
meta: {
sourcePath: filePath,
extractedPath,
},
};
}
export async function readRtfdDirectory(dirPath: string): Promise {
const primaryRtf = await findPrimaryRtf(dirPath);
if (!primaryRtf) {
throw new Error(`No .rtf files found in ${dirPath}`);
}
const text = await convertRtfToText(primaryRtf.filePath);
return {
kind: "text",
text,
assets: [primaryRtf.filePath],
meta: {
sourcePath: dirPath,
},
};
}