import * as fs from "node:fs"; import * as fsp from "node:fs/promises"; import * as path from "node:path"; import { isEnoent, logger } from "@oh-my-pi/pi-utils"; const BLOB_PREFIX = "blob:sha256:"; export interface BlobPutResult { hash: string; path: string; get ref(): string; } /** * Content-addressed blob store for externalizing large binary data (images) from session JSONL files. * * Files are stored at `/` with no extension. The SHA-256 hash is computed * over the raw binary data (not base64). Content-addressing makes writes idempotent and * provides automatic deduplication across sessions. */ export class BlobStore { constructor(readonly dir: string) {} /** * Write binary data to the blob store. * @returns SHA-256 hex hash of the data */ async put(data: Buffer): Promise { const hash = new Bun.SHA256().update(data).digest("hex"); const blobPath = path.join(this.dir, hash); const result = { hash, path: blobPath, get ref() { return `${BLOB_PREFIX}${hash}`; }, }; await Bun.write(blobPath, data); return result; } /** * Synchronous variant of {@link put}. Use on persistence hot paths where the caller * cannot afford the microtask hops of the async version (e.g. OOM-safe session writes). * Returns once the bytes are in the kernel page cache. */ putSync(data: Buffer): BlobPutResult { const hash = new Bun.SHA256().update(data).digest("hex"); const blobPath = path.join(this.dir, hash); const result = { hash, path: blobPath, get ref() { return `${BLOB_PREFIX}${hash}`; }, }; fs.mkdirSync(this.dir, { recursive: true }); fs.writeFileSync(blobPath, data); return result; } /** Read blob by hash, returns Buffer or null if not found. */ async get(hash: string): Promise { const blobPath = path.join(this.dir, hash); try { const file = Bun.file(blobPath); const ab = await file.arrayBuffer(); return Buffer.from(ab); } catch (err) { if (isEnoent(err)) return null; throw err; } } /** Check if a blob exists. */ async has(hash: string): Promise { try { await fsp.access(path.join(this.dir, hash)); return true; } catch { return false; } } } /** Check if a data string is a blob reference. */ export function isBlobRef(data: string): boolean { return data.startsWith(BLOB_PREFIX); } /** Extract the SHA-256 hash from a blob reference string. */ export function parseBlobRef(data: string): string | null { if (!data.startsWith(BLOB_PREFIX)) return null; return data.slice(BLOB_PREFIX.length); } /** Identify provider transport image data URLs so persistence can externalize and restore them losslessly. */ export function isImageDataUrl(data: string): boolean { return data.startsWith("data:image/") && data.includes(";base64,"); } /** * Externalize a provider image data URL to the blob store, returning a blob reference. * The full data URL string is preserved so transport-native history can be reconstructed on resume. */ export async function externalizeImageDataUrl(blobStore: BlobStore, dataUrl: string): Promise { if (isBlobRef(dataUrl)) return dataUrl; const { ref } = await blobStore.put(Buffer.from(dataUrl, "utf8")); return ref; } /** Synchronous variant of {@link externalizeImageDataUrl}. */ export function externalizeImageDataUrlSync(blobStore: BlobStore, dataUrl: string): string { if (isBlobRef(dataUrl)) return dataUrl; return blobStore.putSync(Buffer.from(dataUrl, "utf8")).ref; } /** * Externalize an image's base64 data to the blob store, returning a blob reference. * If the data is already a blob reference, returns it unchanged. */ export async function externalizeImageData(blobStore: BlobStore, base64Data: string): Promise { if (isBlobRef(base64Data)) return base64Data; const buffer = Buffer.from(base64Data, "base64"); const { ref } = await blobStore.put(buffer); return ref; } /** Synchronous variant of {@link externalizeImageData}. */ export function externalizeImageDataSync(blobStore: BlobStore, base64Data: string): string { if (isBlobRef(base64Data)) return base64Data; return blobStore.putSync(Buffer.from(base64Data, "base64")).ref; } /** * Resolve an externalized provider image data URL back to its original string. * If the data is not a blob reference, returns it unchanged. * If the blob is missing, logs a warning and returns the reference as-is. */ export async function resolveImageDataUrl(blobStore: BlobStore, data: string): Promise { const hash = parseBlobRef(data); if (!hash) return data; const buffer = await blobStore.get(hash); if (!buffer) { logger.warn("Blob not found for persisted image data URL", { hash }); return data; } return buffer.toString("utf8"); } /** * Resolve a blob reference back to base64 data. * If the data is not a blob reference, returns it unchanged. * If the blob is missing, logs a warning and returns a placeholder. */ export async function resolveImageData(blobStore: BlobStore, data: string): Promise { const hash = parseBlobRef(data); if (!hash) return data; const buffer = await blobStore.get(hash); if (!buffer) { logger.warn("Blob not found for image reference", { hash }); return data; // Return the ref as-is; downstream will see invalid base64 but won't crash } return buffer.toString("base64"); }