/** * Unit tests for vision-proxy pure helpers. * * Run: * node --experimental-strip-types --test extensions/__tests__/internal.test.ts * * Requires Node 22+ for native TypeScript stripping. No build / no deps. */ import { strict as assert } from "node:assert"; import { after, describe, it } from "node:test"; import { lstat, mkdtemp, rm, symlink, writeFile } from "node:fs/promises"; import os from "node:os"; import { join, parse } from "node:path"; import { buildConversationContext, buildDescriptionFence, buildAnalysisFence, buildVideoDescriptionFence, buildVideoEmptyResponseError, buildVideoProxySection, clampPixels, extractXaiResponsesText, formatXaiSttTranscript, isTranscriptionRequest, isXaiProvider, CUSTOM_TYPE_CONFIG, CUSTOM_TYPE_CONSENT, CUSTOM_TYPE_DESCRIPTION, cropSignature, DEFAULT_CONFIG, envFlags, escapeAttr, extractCandidateImagePaths, extractCandidateVideoPaths, extractCandidateAudioPaths, extractDimensions, fenceUntrusted, findDescriptions, fuzzyMatches, getGroundingFormat, hasConsent, hashImageData, IMAGE_PATH_PLACEHOLDER, VIDEO_PATH_PLACEHOLDER, isPathAllowed, isValidNamedRegion, LRUCache, normalizedToPixels, parseModelString, pluralImages, readEnvOverrides, readImageFileWithReason, readPersistentFile, resolveConfig, resolveCropEntry, resolveRegion, sanitize, hammingDistance, computePHash, cropImage, shutdownCropWorkers, piAiImageToBuffer, bufferToPiAiImage, shouldStripImages, splitSubcommand, stripImagePaths, stripMediaPaths, toPiAiImage, type VisionConfig, writePersistentFile, sanitizeForLog, storeImageMeta, createImageMetaStore, storeImageData, getImageData, createImageDataStore, parseRecallRef, spinnerFrame, formatProgressStatus, SPINNER_FRAMES, RECALL_HINT, } from "../internal.ts"; // SessionEntry minimal shape — typed loose because peer dep types are not loaded in test type Entry = any; const customEntry = (customType: string, data: unknown): Entry => ({ type: "custom", customType, data, }); const messageEntry = (role: "user" | "assistant", text: string): Entry => ({ type: "message", message: { role, content: [{ type: "text", text }] }, }); describe("parseModelString", () => { it("accepts valid provider/model pairs", () => { assert.deepEqual(parseModelString("anthropic/claude-sonnet-4-5"), { provider: "anthropic", modelId: "claude-sonnet-4-5", }); assert.deepEqual(parseModelString("openai/gpt-4o"), { provider: "openai", modelId: "gpt-4o" }); assert.deepEqual(parseModelString("provider/path/with/slashes"), { provider: "provider", modelId: "path/with/slashes", }); }); it("normalizes legacy x-ai provider id to Pi's xai provider", () => { assert.deepEqual(parseModelString("x-ai/grok-4.3"), { provider: "xai", modelId: "grok-4.3", }); assert.deepEqual(parseModelString("xai/grok-4.3"), { provider: "xai", modelId: "grok-4.3", }); }); it("rejects malformed strings", () => { assert.equal(parseModelString(""), null); assert.equal(parseModelString("/foo"), null); assert.equal(parseModelString("foo/"), null); assert.equal(parseModelString("noslash"), null); assert.equal(parseModelString("provider with space/m"), null); assert.equal(parseModelString("provider/has space"), null); }); }); describe("sanitize", () => { it("clobbers garbage to defaults", () => { const out = sanitize({ mode: "weird" as any, provider: "bad provider", modelId: "bad model id", systemPrompt: "", includeContext: "yes" as any, }); assert.equal(out.mode, DEFAULT_CONFIG.mode); assert.equal(out.provider, DEFAULT_CONFIG.provider); assert.equal(out.modelId, DEFAULT_CONFIG.modelId); assert.equal(out.systemPrompt, DEFAULT_CONFIG.systemPrompt); assert.equal(out.includeContext, DEFAULT_CONFIG.includeContext); }); it("normalizes legacy x-ai provider id in config", () => { const result = sanitize({ ...DEFAULT_CONFIG, provider: "x-ai", videoProvider: "x-ai" }); assert.equal(result.provider, "xai"); assert.equal(result.videoProvider, "xai"); }); it("preserves valid values", () => { const cfg: VisionConfig = { mode: "always", provider: "openai", modelId: "gpt-4o", systemPrompt: "custom prompt", includeContext: false, tool: "on", maxImagesPerCall: 5, maxBatch: 2, cacheSize: 100, pHashSimilarityThreshold: 0.9, groundingModels: {}, }; const result = sanitize(cfg); assert.equal(result.mode, cfg.mode); assert.equal(result.provider, cfg.provider); assert.equal(result.modelId, cfg.modelId); assert.equal(result.systemPrompt, cfg.systemPrompt); assert.equal(result.includeContext, cfg.includeContext); assert.equal(result.tool, cfg.tool); assert.equal(result.maxImagesPerCall, cfg.maxImagesPerCall); assert.equal(result.maxBatch, cfg.maxBatch); assert.equal(result.cacheSize, cfg.cacheSize); assert.equal(result.pHashSimilarityThreshold, cfg.pHashSimilarityThreshold); }); }); describe("readEnvOverrides", () => { it("returns empty when env unset", () => { assert.deepEqual(readEnvOverrides({}), {}); }); it("reads valid mode", () => { assert.deepEqual(readEnvOverrides({ PI_VISION_PROXY_MODE: "always" }), { mode: "always" }); assert.deepEqual(readEnvOverrides({ PI_VISION_PROXY_MODE: "off" }), { mode: "off" }); }); it("ignores invalid mode", () => { assert.deepEqual(readEnvOverrides({ PI_VISION_PROXY_MODE: "bogus" }), {}); }); it("reads model string", () => { const out = readEnvOverrides({ PI_VISION_PROXY_MODEL: "openai/gpt-4o" }); assert.equal(out.provider, "openai"); assert.equal(out.modelId, "gpt-4o"); }); it("normalizes legacy x-ai video model env override", () => { const out = readEnvOverrides({ PI_VISION_PROXY_VIDEO_MODEL: "x-ai/grok-4.3" }); assert.equal(out.videoProvider, "xai"); assert.equal(out.videoModelId, "grok-4.3"); }); it("ignores malformed model string", () => { assert.deepEqual(readEnvOverrides({ PI_VISION_PROXY_MODEL: "noslash" }), {}); }); it("parses includeContext truthy/falsy values", () => { for (const v of ["1", "true", "yes", "on", "TRUE", "On"]) { assert.equal(readEnvOverrides({ PI_VISION_PROXY_INCLUDE_CONTEXT: v }).includeContext, true, `truthy ${v}`); } for (const v of ["0", "false", "no", "off", "FALSE"]) { assert.equal(readEnvOverrides({ PI_VISION_PROXY_INCLUDE_CONTEXT: v }).includeContext, false, `falsy ${v}`); } assert.equal(readEnvOverrides({ PI_VISION_PROXY_INCLUDE_CONTEXT: "garbage" }).includeContext, undefined); }); }); describe("envFlags", () => { it("reports presence per variable", () => { assert.deepEqual(envFlags({}), { mode: false, model: false, context: false, tool: false, maxImagesPerCall: false, maxBatch: false, cacheSize: false, videoModel: false }); assert.deepEqual( envFlags({ PI_VISION_PROXY_MODE: "x", PI_VISION_PROXY_MODEL: "y", PI_VISION_PROXY_INCLUDE_CONTEXT: "", }), { mode: true, model: true, context: true, tool: false, maxImagesPerCall: false, maxBatch: false, cacheSize: false, videoModel: false }, ); }); }); describe("resolveConfig", () => { it("returns defaults with no entries and empty env", () => { const cfg = resolveConfig([], {}); assert.deepEqual(cfg, DEFAULT_CONFIG); }); it("env wins over persisted", () => { const entries: Entry[] = [customEntry(CUSTOM_TYPE_CONFIG, { mode: "off" })]; const cfg = resolveConfig(entries, { PI_VISION_PROXY_MODE: "always" }); assert.equal(cfg.mode, "always"); }); it("uses last persisted entry", () => { const entries: Entry[] = [ customEntry(CUSTOM_TYPE_CONFIG, { mode: "off" }), customEntry(CUSTOM_TYPE_CONFIG, { mode: "always" }), ]; assert.equal(resolveConfig(entries, {}).mode, "always"); }); }); describe("fenceUntrusted", () => { it("neutralizes opening tag", () => { const out = fenceUntrusted(""); assert.notEqual(out, ""); assert.ok(out.includes("​"), "ZWSP injected"); }); it("neutralizes closing tag, case-insensitive", () => { const out = fenceUntrusted(""); assert.notEqual(out, ""); }); it("leaves unrelated text intact", () => { assert.equal(fenceUntrusted("plain text "), "plain text "); }); }); describe("hashImageData", () => { it("is deterministic and 32 chars", () => { const a = hashImageData("hello"); const b = hashImageData("hello"); assert.equal(a, b); assert.equal(a.length, 32); }); it("differs for different inputs", () => { assert.notEqual(hashImageData("a"), hashImageData("b")); }); }); describe("pluralImages", () => { it("singular vs plural", () => { assert.equal(pluralImages(1), "1 image"); assert.equal(pluralImages(0), "0 images"); assert.equal(pluralImages(5), "5 images"); }); }); describe("parseRecallRef", () => { const hash = "a".repeat(32); it("accepts a bare 32-hex hash", () => { assert.equal(parseRecallRef(hash), hash); }); it("accepts a sha256:-prefixed hash", () => { assert.equal(parseRecallRef(`sha256:${hash}`), hash); }); it("strips a #crop suffix and recalls the base image", () => { assert.equal(parseRecallRef(`${hash}#crop:1840,120,840,360`), hash); }); it("normalizes uppercase to lowercase", () => { assert.equal(parseRecallRef("A".repeat(32)), hash); }); it("rejects file paths and non-hash refs", () => { assert.equal(parseRecallRef("/tmp/shot.png"), null); assert.equal(parseRecallRef("./a.png"), null); assert.equal(parseRecallRef("screenshot.png"), null); // Wrong length / non-hex assert.equal(parseRecallRef("a".repeat(31)), null); assert.equal(parseRecallRef("a".repeat(33)), null); assert.equal(parseRecallRef("z".repeat(32)), null); }); }); describe("session image recall store", () => { it("round-trips retained image bytes by hash", () => { const store = createImageDataStore(); storeImageData(store, "hash1", "AAAA", "image/png"); const got = getImageData(store, "hash1"); assert.deepEqual(got, { data: "AAAA", mimeType: "image/png" }); assert.equal(getImageData(store, "missing"), undefined); }); it("ignores empty hash or data", () => { const store = createImageDataStore(); storeImageData(store, "", "AAAA", "image/png"); storeImageData(store, "hash", "", "image/png"); assert.equal(store.map.size, 0); }); it("does not duplicate on re-store of the same hash", () => { const store = createImageDataStore(); storeImageData(store, "hash1", "AAAA", "image/png"); storeImageData(store, "hash1", "AAAA", "image/png"); assert.equal(store.map.size, 1); }); it("keeps stores isolated — one session's bytes do not leak into another (issue #12)", () => { const sessionA = createImageDataStore(); const sessionB = createImageDataStore(); storeImageData(sessionA, "hash1", "AAAA", "image/png"); assert.deepEqual(getImageData(sessionA, "hash1"), { data: "AAAA", mimeType: "image/png" }); assert.equal(getImageData(sessionB, "hash1"), undefined); }); it("evicts least-recently-used entries past the byte budget", () => { const store = createImageDataStore(); const prev = process.env.PI_VISION_PROXY_IMAGE_RECALL_BYTES; // Budget of 10 decoded bytes; each 8-char base64 entry decodes to 6 bytes. process.env.PI_VISION_PROXY_IMAGE_RECALL_BYTES = "10"; try { storeImageData(store, "h1", "AAAAAAAA", "image/png"); // 6 decoded bytes, total 6 storeImageData(store, "h2", "BBBBBBBB", "image/png"); // 6 decoded bytes, total 12 > 10 → evict h1 assert.equal(getImageData(store, "h1"), undefined); assert.deepEqual(getImageData(store, "h2"), { data: "BBBBBBBB", mimeType: "image/png" }); } finally { if (prev === undefined) delete process.env.PI_VISION_PROXY_IMAGE_RECALL_BYTES; else process.env.PI_VISION_PROXY_IMAGE_RECALL_BYTES = prev; } }); it("keeps a single oversized image rather than evicting everything", () => { const store = createImageDataStore(); const prev = process.env.PI_VISION_PROXY_IMAGE_RECALL_BYTES; process.env.PI_VISION_PROXY_IMAGE_RECALL_BYTES = "4"; try { storeImageData(store, "big", "AAAAAAAAAAAA", "image/png"); // 9 decoded bytes > 4 budget assert.deepEqual(getImageData(store, "big"), { data: "AAAAAAAAAAAA", mimeType: "image/png" }); assert.equal(store.map.size, 1); } finally { if (prev === undefined) delete process.env.PI_VISION_PROXY_IMAGE_RECALL_BYTES; else process.env.PI_VISION_PROXY_IMAGE_RECALL_BYTES = prev; } }); it("bumps recency on access so the touched entry survives eviction", () => { const store = createImageDataStore(); const prev = process.env.PI_VISION_PROXY_IMAGE_RECALL_BYTES; // Budget of 14 decoded bytes; each 8-char base64 entry decodes to 6 bytes. process.env.PI_VISION_PROXY_IMAGE_RECALL_BYTES = "14"; try { storeImageData(store, "h1", "AAAAAAAA", "image/png"); // 6 decoded bytes, total 6 storeImageData(store, "h2", "BBBBBBBB", "image/png"); // 6 decoded bytes, total 12 getImageData(store, "h1"); // bump h1 to most-recent storeImageData(store, "h3", "CCCCCCCC", "image/png"); // 6 decoded bytes, total 18 > 14 → evict LRU (h2) assert.deepEqual(getImageData(store, "h1"), { data: "AAAAAAAA", mimeType: "image/png" }); assert.equal(getImageData(store, "h2"), undefined); assert.deepEqual(getImageData(store, "h3"), { data: "CCCCCCCC", mimeType: "image/png" }); } finally { if (prev === undefined) delete process.env.PI_VISION_PROXY_IMAGE_RECALL_BYTES; else process.env.PI_VISION_PROXY_IMAGE_RECALL_BYTES = prev; } }); }); describe("spinnerFrame", () => { it("returns a frame from the set and wraps around", () => { assert.equal(spinnerFrame(0), SPINNER_FRAMES[0]); assert.equal(spinnerFrame(SPINNER_FRAMES.length), SPINNER_FRAMES[0]); assert.equal(spinnerFrame(SPINNER_FRAMES.length + 1), SPINNER_FRAMES[1]); assert.ok(SPINNER_FRAMES.includes(spinnerFrame(7))); }); it("handles negative ticks without throwing", () => { assert.ok(SPINNER_FRAMES.includes(spinnerFrame(-1))); assert.ok(SPINNER_FRAMES.includes(spinnerFrame(-13))); }); }); describe("formatProgressStatus", () => { it("includes frame, label, and elapsed seconds", () => { assert.equal( formatProgressStatus("Analyzing image 2/4…", "⠙", 3), "multimodal-proxy ⠙ Analyzing image 2/4… (3s)", ); }); it("floors and clamps elapsed seconds", () => { assert.match(formatProgressStatus("x", "⠋", 4.9), /\(4s\)$/); assert.match(formatProgressStatus("x", "⠋", -2), /\(0s\)$/); }); }); describe("RECALL_HINT", () => { it("mentions analyze_image and the image id", () => { assert.match(RECALL_HINT, /analyze_image/); assert.match(RECALL_HINT, /image id/i); }); }); describe("splitSubcommand", () => { it("splits sub and value with arbitrary whitespace", () => { assert.deepEqual(splitSubcommand("model anthropic/claude"), { sub: "model", value: "anthropic/claude" }); assert.deepEqual(splitSubcommand("model anthropic/claude "), { sub: "model", value: "anthropic/claude", }); assert.deepEqual(splitSubcommand("CONSENT YES"), { sub: "consent", value: "YES" }); }); it("handles bare sub with no value", () => { assert.deepEqual(splitSubcommand("consent"), { sub: "consent", value: "" }); }); it("handles empty input", () => { assert.deepEqual(splitSubcommand(""), { sub: "", value: "" }); }); }); describe("buildConversationContext", () => { it("returns empty for no message entries", () => { assert.equal(buildConversationContext([]), ""); }); it("concatenates user and assistant text in order", () => { const entries: Entry[] = [ messageEntry("user", "first"), messageEntry("assistant", "reply"), customEntry("other", {}), ]; const out = buildConversationContext(entries); assert.equal(out, "User: first\nAssistant: reply"); }); it("keeps only the last 8 message entries", () => { const entries: Entry[] = []; for (let i = 0; i < 12; i++) entries.push(messageEntry("user", `m${i}`)); const out = buildConversationContext(entries); const lines = out.split("\n"); assert.equal(lines.length, 8); assert.equal(lines[0], "User: m4"); assert.equal(lines[7], "User: m11"); }); it("truncates assistant content to 500 chars", () => { const long = "x".repeat(800); const out = buildConversationContext([messageEntry("assistant", long)]); assert.ok(out.startsWith("Assistant: ")); assert.equal(out.length, "Assistant: ".length + 500); }); it("truncates total to last 3000 chars with ellipsis", () => { const entries: Entry[] = []; for (let i = 0; i < 8; i++) entries.push(messageEntry("user", "y".repeat(490))); const out = buildConversationContext(entries); assert.ok(out.length <= 3001); assert.ok(out.startsWith("…")); }); }); describe("findDescriptions", () => { it("collects hash → description from custom entries", () => { const entries: Entry[] = [ customEntry(CUSTOM_TYPE_DESCRIPTION, { hash: "abc", description: "desc-a" }), customEntry(CUSTOM_TYPE_DESCRIPTION, { hash: "def", description: "desc-b" }), customEntry("other", {}), customEntry(CUSTOM_TYPE_DESCRIPTION, { hash: "", description: "skip" }), ]; const map = findDescriptions(entries); assert.equal(map.size, 2); assert.equal(map.get("abc"), "desc-a"); assert.equal(map.get("def"), "desc-b"); }); }); describe("hasConsent", () => { it("returns false with no entries", () => { assert.equal(hasConsent([]), false); }); it("uses the most recent consent entry", () => { const entries: Entry[] = [ customEntry(CUSTOM_TYPE_CONSENT, { granted: true }), customEntry(CUSTOM_TYPE_CONSENT, { granted: false }), ]; assert.equal(hasConsent(entries), false); const granted: Entry[] = [ customEntry(CUSTOM_TYPE_CONSENT, { granted: false }), customEntry(CUSTOM_TYPE_CONSENT, { granted: true }), ]; assert.equal(hasConsent(granted), true); }); it("supports per-provider consent", () => { // Consent for anthropic should not carry over to openai const entries: Entry[] = [ customEntry(CUSTOM_TYPE_CONSENT, { granted: true, provider: "anthropic" }), ]; assert.equal(hasConsent(entries, "anthropic"), true); assert.equal(hasConsent(entries, "openai"), false); // Without provider arg, any granted consent matches assert.equal(hasConsent(entries), true); }); it("global consent (no provider) does NOT satisfy per-provider check", () => { const entries: Entry[] = [ customEntry(CUSTOM_TYPE_CONSENT, { granted: true }), ]; // Global consent is valid when no specific provider is requested assert.equal(hasConsent(entries), true); // But it does NOT satisfy a per-provider consent check assert.equal(hasConsent(entries, "anthropic"), false); assert.equal(hasConsent(entries, "openai"), false); }); }); describe("toPiAiImage", () => { it("passes through new shape", () => { const img = { type: "image", data: "AAAA", mimeType: "image/png" } as any; assert.deepEqual(toPiAiImage(img), { type: "image", data: "AAAA", mimeType: "image/png" }); }); it("converts legacy { source: { data, mediaType } } shape", () => { const legacy = { source: { data: "BBBB", mediaType: "image/jpeg" } }; assert.deepEqual(toPiAiImage(legacy), { type: "image", data: "BBBB", mimeType: "image/jpeg" }); }); it("throws on unsupported shape", () => { assert.throws(() => toPiAiImage({} as any), /Unsupported image content shape/); }); }); describe("shouldStripImages", () => { const cfg = (mode: VisionConfig["mode"]): VisionConfig => ({ ...DEFAULT_CONFIG, mode }); it("off → never strip", () => { assert.equal(shouldStripImages(cfg("off"), undefined), false); assert.equal(shouldStripImages(cfg("off"), ["image", "text"]), false); }); it("always → always strip", () => { assert.equal(shouldStripImages(cfg("always"), undefined), true); assert.equal(shouldStripImages(cfg("always"), ["image"]), true); }); it("fallback → strip only when model lacks image input", () => { assert.equal(shouldStripImages(cfg("fallback"), ["text"]), true); assert.equal(shouldStripImages(cfg("fallback"), undefined), true); assert.equal(shouldStripImages(cfg("fallback"), ["text", "image"]), false); }); }); describe("extractCandidateImagePaths", () => { it("detects pi-clipboard temp files (Windows)", () => { const text = "What is this? C:\\Users\\Alessandro\\AppData\\Local\\Temp\\pi-clipboard-57a452d3-a1b2-c3d4-e5f6-789012345678.png"; const paths = extractCandidateImagePaths(text); assert.equal(paths.length, 1); assert.ok(paths[0].includes("pi-clipboard-")); assert.ok(paths[0].endsWith(".png")); }); it("detects pi-clipboard temp files (Unix)", () => { const text = "/tmp/pi-clipboard-abc123-def456.png"; const paths = extractCandidateImagePaths(text); assert.equal(paths.length, 1); assert.ok(paths[0].includes("pi-clipboard-")); }); it("detects general image paths with common extensions", () => { const cases = [ { input: "see ./screenshot.jpg", ext: ".jpg" }, { input: "look at /home/user/photo.jpeg", ext: ".jpeg" }, { input: "check /tmp/diagram.gif", ext: ".gif" }, { input: "view C:\\logs\\capture.webp", ext: ".webp" }, { input: "show ~/pic.bmp", ext: ".bmp" }, { input: "open ./scan.tiff", ext: ".tiff" }, { input: "see ./icon.ico", ext: ".ico" }, { input: "view ./photo.avif", ext: ".avif" }, ]; for (const { input, ext } of cases) { const paths = extractCandidateImagePaths(input); assert.equal(paths.length, 1, `should detect ${ext} in: ${input}`); assert.ok(paths[0].endsWith(ext), `path should end with ${ext}`); } }); it("deduplicates identical paths", () => { const text = "see ./img.png and ./img.png again"; const paths = extractCandidateImagePaths(text); assert.equal(paths.length, 1); }); it("returns empty for text without image paths", () => { assert.deepEqual(extractCandidateImagePaths("hello world"), []); assert.deepEqual(extractCandidateImagePaths(""), []); assert.deepEqual(extractCandidateImagePaths("no images here.txt"), []); }); it("does not match URLs", () => { const paths = extractCandidateImagePaths("see https://example.com/photo.png for details"); assert.equal(paths.length, 0); }); it("does not match bare filenames (HTML/Markdown)", () => { assert.deepEqual(extractCandidateImagePaths(''), []); assert.deepEqual(extractCandidateImagePaths('![alt](photo.png)'), []); assert.deepEqual(extractCandidateImagePaths('photo.png'), []); }); it("does not match file:// URLs as bare paths", () => { // file:///tmp/x.png — leading "file:" not in allow-list; only the inner /tmp portion // matters, but the colon prevents the anchor from matching cleanly. Should not double-emit. const paths = extractCandidateImagePaths("see file:///tmp/x.png"); assert.ok(paths.every((p) => !p.startsWith("file:"))); }); }); describe("extractCandidateMediaPaths", () => { it("detects quoted Windows video paths with spaces", () => { const input = `video is not working: "D:\\Downloads\\Rethinking Agents - Harness is All you Need_.mp4" transcribe this`; assert.deepEqual(extractCandidateVideoPaths(input), [ "D:\\Downloads\\Rethinking Agents - Harness is All you Need_.mp4", ]); }); it("detects unquoted video paths without spaces", () => { assert.deepEqual(extractCandidateVideoPaths("see D:\\Downloads\\clip.mp4 now"), ["D:\\Downloads\\clip.mp4"]); assert.deepEqual(extractCandidateVideoPaths("see ./clip.mkv now"), ["./clip.mkv"]); }); it("detects unquoted Windows video paths with spaces", () => { const input = "Transcribe this video with timestamps D:\\Downloads\\Rethinking Agents - Harness is All you Need_.mp4"; assert.deepEqual(extractCandidateVideoPaths(input), [ "D:\\Downloads\\Rethinking Agents - Harness is All you Need_.mp4", ]); }); it("detects quoted relative paths and audio paths with spaces", () => { assert.deepEqual(extractCandidateVideoPaths(`see "./my video.mp4" now`), ["./my video.mp4"]); assert.deepEqual(extractCandidateAudioPaths(`listen "C:\\Users\\Me\\Audio File.m4a" please`), [ "C:\\Users\\Me\\Audio File.m4a", ]); }); it("does not treat unquoted paths with spaces as a single path", () => { assert.deepEqual(extractCandidateVideoPaths("see ./my video.mp4 now"), []); }); }); describe("stripMediaPaths", () => { it("replaces media paths with placeholder", () => { const mediaPath = "D:\\Downloads\\Rethinking Agents - Harness is All you Need_.mp4"; const result = stripMediaPaths(`transcribe "${mediaPath}" please`, [mediaPath]); assert.equal(result, `transcribe "${VIDEO_PATH_PLACEHOLDER}" please`); }); }); describe("stripImagePaths", () => { it("replaces a single path with placeholder", () => { const result = stripImagePaths("see /tmp/pi-clipboard-abc.png here", ["/tmp/pi-clipboard-abc.png"]); assert.equal(result, `see ${IMAGE_PATH_PLACEHOLDER} here`); }); it("replaces multiple paths", () => { const result = stripImagePaths( "/tmp/a.png and /tmp/b.jpg", ["/tmp/a.png", "/tmp/b.jpg"], ); assert.ok(!result.includes("/tmp/a.png")); assert.ok(!result.includes("/tmp/b.jpg")); assert.equal(result.match(/\[image file/g)?.length, 2); }); it("handles empty paths array", () => { const text = "unchanged text"; assert.equal(stripImagePaths(text, []), text); }); it("handles longer paths first to avoid partial replacements", () => { const result = stripImagePaths( "/tmp/img.png /tmp/img.png.bak", ["/tmp/img.png.bak", "/tmp/img.png"], ); assert.ok(!result.includes("/tmp/img.png")); }); }); // 1×1 transparent PNG const TINY_PNG = Buffer.from( "89504e470d0a1a0a0000000d49484452000000010000000108060000001f15c4890000000d4944415478da6300010000000500010d0a2db40000000049454e44ae426082", "hex", ); describe("isPathAllowed", () => { it("allows files inside tmpdir", async () => { const dir = await mkdtemp(join(os.tmpdir(), "vp-test-")); const file = join(dir, "x.png"); await writeFile(file, TINY_PNG); try { assert.equal(await isPathAllowed(file), true); } finally { await rm(dir, { recursive: true, force: true }); } }); it("allows non-existent files whose parent directory is in the allow-list", async () => { // Non-existent files in tmpdir pass the check so callers can return "unreadable" // instead of the misleading "denied" / "path outside allowed directories" message. assert.equal(await isPathAllowed(join(os.tmpdir(), "does-not-exist-xyz.png")), true); }); it("denies non-existent files outside the allow-list", async () => { assert.equal(await isPathAllowed("/etc/does-not-exist-vp-xyz.png"), false); }); it("allows files inside /tmp (system-wide Unix temp dir)", async () => { if (os.platform() === "win32") return; const file = join("/tmp", `vp-test-direct-${Date.now()}.png`); await writeFile(file, TINY_PNG); try { assert.equal(await isPathAllowed(file), true); } finally { try { await rm(file); } catch { /* ignore */ } } }); it("allows non-existent files inside /tmp", async () => { if (os.platform() === "win32") return; assert.equal(await isPathAllowed("/tmp/does-not-exist-vp-xyz.png"), true); }); it("allows local Windows drive paths by default", async () => { const root = parse(process.cwd()).root; if (!/^[a-z]:[\\/]/i.test(root)) return; const prevDrives = process.env.PI_VISION_PROXY_ALLOW_DRIVES; try { delete process.env.PI_VISION_PROXY_ALLOW_DRIVES; assert.equal(await isPathAllowed(process.cwd()), true); } finally { if (prevDrives === undefined) delete process.env.PI_VISION_PROXY_ALLOW_DRIVES; else process.env.PI_VISION_PROXY_ALLOW_DRIVES = prevDrives; } }); it("can disable local Windows drive path access with PI_VISION_PROXY_ALLOW_DRIVES=0", async () => { const root = parse(process.cwd()).root; if (!/^[a-z]:[\\/]/i.test(root)) return; const prevDrives = process.env.PI_VISION_PROXY_ALLOW_DRIVES; try { process.env.PI_VISION_PROXY_ALLOW_DRIVES = "0"; // cwd is still allowed by the cwd rule, so assert using the user home when it is outside cwd. const home = os.homedir(); if (!home.toLowerCase().startsWith(process.cwd().toLowerCase())) { assert.equal(await isPathAllowed(home), false); } } finally { if (prevDrives === undefined) delete process.env.PI_VISION_PROXY_ALLOW_DRIVES; else process.env.PI_VISION_PROXY_ALLOW_DRIVES = prevDrives; } }); it("allows homedir files when PI_VISION_PROXY_ALLOW_HOME=1", async () => { const home = os.homedir(); const prevHome = process.env.PI_VISION_PROXY_ALLOW_HOME; const prevDrives = process.env.PI_VISION_PROXY_ALLOW_DRIVES; try { process.env.PI_VISION_PROXY_ALLOW_DRIVES = "0"; delete process.env.PI_VISION_PROXY_ALLOW_HOME; if (!home.toLowerCase().startsWith(process.cwd().toLowerCase())) { assert.equal(await isPathAllowed(home), false); } process.env.PI_VISION_PROXY_ALLOW_HOME = "1"; assert.equal(await isPathAllowed(home), true); } finally { if (prevHome === undefined) delete process.env.PI_VISION_PROXY_ALLOW_HOME; else process.env.PI_VISION_PROXY_ALLOW_HOME = prevHome; if (prevDrives === undefined) delete process.env.PI_VISION_PROXY_ALLOW_DRIVES; else process.env.PI_VISION_PROXY_ALLOW_DRIVES = prevDrives; } }); }); describe("readImageFileWithReason", () => { it("reads valid PNG inside tmpdir", async () => { const dir = await mkdtemp(join(os.tmpdir(), "vp-test-")); const file = join(dir, "ok.png"); await writeFile(file, TINY_PNG); try { const r = await readImageFileWithReason(file); assert.ok(r.image, "image should be returned"); assert.equal(r.image?.mimeType, "image/png"); assert.equal(r.image?.type, "image"); assert.ok((r.image?.data ?? "").length > 0); } finally { await rm(dir, { recursive: true, force: true }); } }); it("returns reason=not-an-image for unsupported extensions", async () => { const r = await readImageFileWithReason("/tmp/foo.txt"); assert.equal(r.image, null); assert.equal(r.reason, "not-an-image"); }); it("returns reason=denied for path outside allow-list", async () => { const r = await readImageFileWithReason("/etc/never-exists-vp.png"); assert.equal(r.image, null); assert.equal(r.reason, "denied"); }); it("returns reason=unreadable for non-existent file inside /tmp", async () => { if (os.platform() === "win32") return; // Previously returned "denied" (misleading); now returns "unreadable" so the user // knows the file is simply missing, not that /tmp itself is forbidden. const r = await readImageFileWithReason("/tmp/does-not-exist-vp-xyz.png"); assert.equal(r.image, null); assert.equal(r.reason, "unreadable"); }); it("returns reason=empty for zero-byte image", async () => { const dir = await mkdtemp(join(os.tmpdir(), "vp-test-")); const file = join(dir, "empty.png"); await writeFile(file, ""); try { const r = await readImageFileWithReason(file); assert.equal(r.image, null); assert.equal(r.reason, "empty"); } finally { await rm(dir, { recursive: true, force: true }); } }); it("returns reason=too-large when above PI_VISION_PROXY_MAX_IMAGE_BYTES", async () => { const dir = await mkdtemp(join(os.tmpdir(), "vp-test-")); const file = join(dir, "big.png"); await writeFile(file, Buffer.alloc(64)); const prev = process.env.PI_VISION_PROXY_MAX_IMAGE_BYTES; process.env.PI_VISION_PROXY_MAX_IMAGE_BYTES = "32"; try { const r = await readImageFileWithReason(file); assert.equal(r.image, null); assert.equal(r.reason, "too-large"); assert.equal(r.bytes, 64); } finally { if (prev === undefined) delete process.env.PI_VISION_PROXY_MAX_IMAGE_BYTES; else process.env.PI_VISION_PROXY_MAX_IMAGE_BYTES = prev; await rm(dir, { recursive: true, force: true }); } }); it("denies symlink resolving outside allow-list", async () => { const dir = await mkdtemp(join(os.tmpdir(), "vp-test-")); const target = "/etc/never-exists-vp-target.png"; const link = join(dir, "link.png"); try { try { await symlink(target, link); } catch { return; // platform doesn't support symlinks (e.g., Windows w/o admin) → skip } const r = await readImageFileWithReason(link); assert.equal(r.image, null); assert.equal(r.reason, "denied"); } finally { await rm(dir, { recursive: true, force: true }); } }); it("denies symlink to existing file outside allow-list (TOCTOU post-read check)", async () => { // Simulate the TOCTOU race: the symlink target exists and is readable, so // readFile() succeeds, but the post-read realpath re-verification must catch that // the resolved path is outside the allow-list and return "denied". if (os.platform() === "win32") return; // Find a readable file outside the allow-list to use as a target. // /etc/hostname is present on most Unix systems; use it if it exists. const target = "/etc/hostname"; let targetExists = false; try { await lstat(target); targetExists = true; } catch { /* skip if absent */ } if (!targetExists) return; const dir = await mkdtemp(join(os.tmpdir(), "vp-test-")); const link = join(dir, "link.png"); try { try { await symlink(target, link); } catch { return; // no symlink support → skip } const r = await readImageFileWithReason(link); assert.equal(r.image, null); assert.equal(r.reason, "denied"); } finally { await rm(dir, { recursive: true, force: true }); } }); }); describe("readPersistentFile / writePersistentFile", () => { it("round-trips config through a file", async () => { const dir = await mkdtemp(join(os.tmpdir(), "vp-test-")); try { const cfg: Partial = { mode: "always", provider: "openai", modelId: "gpt-4o" }; await writePersistentFile(cfg, dir); const read = await readPersistentFile(dir); assert.equal(read.mode, "always"); assert.equal(read.provider, "openai"); assert.equal(read.modelId, "gpt-4o"); } finally { await rm(dir, { recursive: true, force: true }); } }); it("returns empty when file does not exist", async () => { const dir = await mkdtemp(join(os.tmpdir(), "vp-test-")); try { const read = await readPersistentFile(dir); assert.deepEqual(read, {}); } finally { await rm(dir, { recursive: true, force: true }); } }); it("returns empty for invalid JSON", async () => { const dir = await mkdtemp(join(os.tmpdir(), "vp-test-")); try { await writeFile(join(dir, "vision-proxy.json"), "not json"); const read = await readPersistentFile(dir); assert.deepEqual(read, {}); } finally { await rm(dir, { recursive: true, force: true }); } }); }); describe("resolveConfig with fileConfig", () => { it("layers fileConfig between defaults and session entries", () => { const entries: Entry[] = []; const fileConfig: Partial = { mode: "always", provider: "openai", modelId: "gpt-4o" }; const cfg = resolveConfig(entries, {}, fileConfig); assert.equal(cfg.mode, "always"); assert.equal(cfg.provider, "openai"); assert.equal(cfg.modelId, "gpt-4o"); }); it("session entries override fileConfig", () => { const entries: Entry[] = [customEntry(CUSTOM_TYPE_CONFIG, { mode: "off" })]; const fileConfig: Partial = { mode: "always" }; const cfg = resolveConfig(entries, {}, fileConfig); assert.equal(cfg.mode, "off"); }); it("env overrides both file and session entries", () => { const entries: Entry[] = [customEntry(CUSTOM_TYPE_CONFIG, { mode: "off" })]; const fileConfig: Partial = { mode: "always" }; const cfg = resolveConfig(entries, { PI_VISION_PROXY_MODE: "fallback" }, fileConfig); assert.equal(cfg.mode, "fallback"); }); it("defaults fill in missing fileConfig fields", () => { const fileConfig: Partial = { mode: "off" }; const cfg = resolveConfig([], {}, fileConfig); assert.equal(cfg.mode, "off"); assert.equal(cfg.provider, DEFAULT_CONFIG.provider); assert.equal(cfg.modelId, DEFAULT_CONFIG.modelId); assert.equal(cfg.systemPrompt, DEFAULT_CONFIG.systemPrompt); assert.equal(cfg.includeContext, DEFAULT_CONFIG.includeContext); }); }); describe("fuzzyMatches", () => { it("matches when all query chars appear in order", () => { assert.equal(fuzzyMatches("Claude Sonnet 4.5", "cs4"), true); assert.equal(fuzzyMatches("Claude Opus 4.6", "op46"), true); assert.equal(fuzzyMatches("GPT-5.4 Pro", "g54"), true); }); it("is case-insensitive", () => { assert.equal(fuzzyMatches("Claude Sonnet", "CLAUDE"), true); assert.equal(fuzzyMatches("gpt-4o", "GPT4O"), true); }); it("rejects when chars are out of order or missing", () => { assert.equal(fuzzyMatches("Claude Sonnet 4.5", "4cs"), false); assert.equal(fuzzyMatches("GPT-5", "xyz"), false); assert.equal(fuzzyMatches("Gemini", "gpt"), false); }); it("matches empty query against anything", () => { assert.equal(fuzzyMatches("anything", ""), true); }); it("matches exact string", () => { assert.equal(fuzzyMatches("Claude Sonnet 4.5", "Claude Sonnet 4.5"), true); }); it("matches partial name", () => { assert.equal(fuzzyMatches("Claude Opus 4.6 (EU)", "opus eu"), true); assert.equal(fuzzyMatches("Nova Premier", "nova"), true); }); }); // ── 1.4.0 tests ────────────────────────────────────────────────────────── describe("isValidNamedRegion", () => { it("accepts valid region names", () => { for (const r of ["top-left", "bottom-right", "center", "top-half", "right"]) { assert.equal(isValidNamedRegion(r), true, r); } }); it("rejects invalid names", () => { assert.equal(isValidNamedRegion("middle"), false); assert.equal(isValidNamedRegion(""), false); assert.equal(isValidNamedRegion("TOP-LEFT"), false); // case-sensitive }); }); describe("resolveRegion", () => { it("returns normalized rectangle for each region", () => { const tl = resolveRegion("top-left"); assert.deepEqual(tl, { x: 0, y: 0, width: 0.5, height: 0.5 }); const br = resolveRegion("bottom-right"); assert.deepEqual(br, { x: 0.5, y: 0.5, width: 0.5, height: 0.5 }); const center = resolveRegion("center"); assert.deepEqual(center, { x: 0.25, y: 0.25, width: 0.5, height: 0.5 }); }); it("top-half aliases top", () => { assert.deepEqual(resolveRegion("top-half"), resolveRegion("top")); }); }); describe("normalizedToPixels", () => { it("converts normalized coordinates to pixels", () => { const result = normalizedToPixels({ x: 0.5, y: 0.5, width: 0.5, height: 0.5 }, 1000, 1000); assert.ok(result); assert.equal(result!.x, 500); assert.equal(result!.y, 500); assert.equal(result!.width, 500); assert.equal(result!.height, 500); }); it("clamps to image bounds", () => { // x=-0.5 clamped to 0, x+width=(-0.5+0.3)*100=-20 clamped to 0 → zero area → null const result = normalizedToPixels({ x: -0.5, y: 0.9, width: 0.3, height: 0.3 }, 100, 100); assert.equal(result, null, "negative x with small width should be null after clamp"); // A valid clamped case const result2 = normalizedToPixels({ x: -0.1, y: 0.5, width: 0.8, height: 0.6 }, 100, 100); assert.ok(result2); assert.equal(result2!.x, 0); assert.equal(result2!.y, 50); }); it("returns null for zero-area crop", () => { // Edge case: both x and x+width clamp to same value const result = normalizedToPixels({ x: 1.0, y: 0, width: 0, height: 0.5 }, 100, 100); assert.equal(result, null); }); }); describe("clampPixels", () => { it("clamps pixel coordinates to image bounds", () => { const result = clampPixels({ x: -10, y: 50, width: 200, height: 100 }, 100, 200); assert.ok(result); assert.equal(result!.x, 0); assert.equal(result!.y, 50); assert.equal(result!.width, 100); assert.equal(result!.height, 100); }); it("returns null for zero-area after clamping", () => { const result = clampPixels({ x: 200, y: 200, width: 10, height: 10 }, 100, 100); assert.equal(result, null); }); it("handles valid crop within bounds", () => { const result = clampPixels({ x: 10, y: 20, width: 30, height: 40 }, 100, 100); assert.ok(result); assert.deepEqual(result, { x: 10, y: 20, width: 30, height: 40 }); }); }); describe("resolveCropEntry", () => { it("resolves region crop", () => { const result = resolveCropEntry({ image_index: 0, region: "top-left" }, 1000, 1000); assert.equal(result.x, 0); assert.equal(result.y, 0); assert.equal(result.width, 500); assert.equal(result.height, 500); }); it("resolves normalized crop", () => { const result = resolveCropEntry( { image_index: 0, normalized: { x: 0.25, y: 0.25, width: 0.5, height: 0.5 } }, 1000, 1000, ); assert.equal(result.x, 250); assert.equal(result.y, 250); assert.equal(result.width, 500); assert.equal(result.height, 500); }); it("resolves pixel crop", () => { const result = resolveCropEntry( { image_index: 0, pixels: { x: 100, y: 200, width: 300, height: 400 } }, 1000, 1000, ); assert.deepEqual(result, { x: 100, y: 200, width: 300, height: 400 }); }); it("clamps pixel crop to image bounds", () => { const result = resolveCropEntry( { image_index: 0, pixels: { x: 900, y: 900, width: 200, height: 200 } }, 1000, 1000, ); assert.equal(result.width, 100); assert.equal(result.height, 100); }); it("throws for zero-area normalized crop", () => { assert.throws( () => resolveCropEntry({ image_index: 0, normalized: { x: 1.0, y: 1.0, width: 0, height: 0 } }, 100, 100), /zero area/, ); }); it("throws for zero-area pixel crop", () => { assert.throws( () => resolveCropEntry({ image_index: 0, pixels: { x: 200, y: 200, width: 10, height: 10 } }, 100, 100), /zero area/, ); }); }); describe("cropSignature", () => { it("formats x,y,width,height", () => { assert.equal(cropSignature({ x: 10, y: 20, width: 30, height: 40 }), "10,20,30,40"); }); }); describe("LRUCache", () => { it("stores and retrieves values", () => { const cache = new LRUCache(3); cache.set("a", 1); assert.equal(cache.get("a"), 1); }); it("evicts oldest when over capacity", () => { const cache = new LRUCache(2); cache.set("a", 1); cache.set("b", 2); cache.set("c", 3); // evicts "a" assert.equal(cache.get("a"), undefined); assert.equal(cache.get("b"), 2); assert.equal(cache.get("c"), 3); }); it("renews entry on get", () => { const cache = new LRUCache(2); cache.set("a", 1); cache.set("b", 2); cache.get("a"); // "a" is now most recent cache.set("c", 3); // evicts "b" instead of "a" assert.equal(cache.get("a"), 1); assert.equal(cache.get("b"), undefined); }); it("reports size", () => { const cache = new LRUCache(10); assert.equal(cache.size, 0); cache.set("x", 1); assert.equal(cache.size, 1); }); it("clear removes all entries", () => { const cache = new LRUCache(10); cache.set("a", 1); cache.clear(); assert.equal(cache.size, 0); assert.equal(cache.get("a"), undefined); }); it("resize shrinks the cache and evicts excess", () => { const cache = new LRUCache(5); for (let i = 0; i < 5; i++) cache.set(`k${i}`, i); assert.equal(cache.size, 5); cache.resize(2); assert.equal(cache.size, 2); assert.equal(cache.maxSize, 2); // Oldest entries should be evicted assert.equal(cache.get("k0"), undefined); assert.equal(cache.get("k1"), undefined); assert.equal(cache.get("k2"), undefined); // Newest should survive assert.equal(cache.get("k3"), 3); assert.equal(cache.get("k4"), 4); }); it("resize to larger does not lose entries", () => { const cache = new LRUCache(3); cache.set("a", 1); cache.set("b", 2); cache.resize(10); assert.equal(cache.size, 2); assert.equal(cache.get("a"), 1); assert.equal(cache.get("b"), 2); }); }); describe("extractDimensions", () => { it("extracts dimensions from a PNG buffer", () => { // TINY_PNG is 1×1 const dims = extractDimensions(TINY_PNG); assert.ok(dims, "should return dimensions for valid PNG"); assert.equal(dims!.width, 1); assert.equal(dims!.height, 1); }); it("returns undefined for invalid data", () => { const dims = extractDimensions(Buffer.from("not an image")); assert.equal(dims, undefined); }); }); describe("buildVideoDescriptionFence", () => { it("builds video fence with file, hash, and mime attributes", () => { const fence = buildVideoDescriptionFence("abc123", "clip.mp4", "video/mp4", "Hello world"); assert.ok(fence.includes('file="clip.mp4"')); assert.ok(fence.includes('hash="abc123"')); assert.ok(fence.includes('mime="video/mp4"')); assert.ok(fence.includes("Hello world")); }); }); describe("buildVideoEmptyResponseError", () => { it("returns actionable guidance for providers that accept video but return no text", () => { const message = buildVideoEmptyResponseError("xai", "grok-4.3"); assert.ok(message.includes("empty response from xai/grok-4.3")); assert.ok(message.includes("provider accepted the request but returned no text")); assert.ok(message.includes("native xAI STT or Files/Responses path")); assert.ok(message.includes("shorter clip")); assert.ok(message.includes("smaller/transcoded video")); assert.ok(message.includes("Gemini")); }); }); describe("xAI native media helpers", () => { it("detects transcription requests", () => { assert.equal(isTranscriptionRequest("Transcribe this video with timestamps"), true); assert.equal(isTranscriptionRequest("make SRT captions"), true); assert.equal(isTranscriptionRequest("summarize visual scenes"), false); }); it("detects xAI provider aliases", () => { assert.equal(isXaiProvider("xai"), true); assert.equal(isXaiProvider("x-ai"), true); assert.equal(isXaiProvider("google"), false); }); it("formats xAI STT words into timestamped transcript", () => { const formatted = formatXaiSttTranscript({ text: "Hello world.", language: "English", duration: 1.5, words: [ { text: "Hello", start: 0.1, end: 0.5 }, { text: "world.", start: 0.6, end: 1.2 }, ], }, "clip.mp4"); assert.ok(formatted.includes("xAI Speech-to-Text transcription for clip.mp4")); assert.ok(formatted.includes("Detected language: English")); assert.ok(formatted.includes("[00:00")); assert.ok(formatted.includes("Hello world.")); }); it("extracts output text from xAI Responses objects", () => { assert.equal(extractXaiResponsesText({ output_text: "direct" }), "direct"); assert.equal(extractXaiResponsesText({ output: [{ type: "message", content: [{ type: "output_text", text: "nested" }] }] }), "nested"); }); }); describe("buildVideoProxySection", () => { it("instructs downstream agents to use injected video analysis instead of local transcription tools", () => { const section = buildVideoProxySection( 1, "x-ai", "grok-4.3", buildVideoDescriptionFence("abc123", "clip.mp4", "video/mp4", "[00:00] hello"), ); assert.ok(section.includes("already analyzed the media")); assert.ok(section.includes("answer from this injected context")); assert.ok(section.includes("Do not run local media-processing or transcription tools")); assert.ok(section.includes("ffmpeg")); assert.ok(section.includes("Whisper")); assert.ok(section.includes("Python")); assert.ok(section.includes("Only use external/local tools")); assert.ok(section.includes(" { it("builds fence with metadata attributes", () => { const fence = buildDescriptionFence("abc123", "A screenshot", { width: 1920, height: 1080, filename: "screen.png" }); assert.ok(fence.startsWith("")); }); it("includes crop_origin when cropped", () => { const fence = buildDescriptionFence("abc123", "Detail", { width: 3840, height: 2160 }, { x: 1840, y: 120, width: 840, height: 360 }); assert.ok(fence.includes('#crop:1840,120,840,360')); assert.ok(fence.includes('crop_origin="1840,120"')); assert.ok(fence.includes('width="840"')); assert.ok(fence.includes('height="360"')); }); }); describe("buildAnalysisFence", () => { it("builds fence with grounding_format", () => { const fence = buildAnalysisFence("abc", "Analysis", { width: 100, height: 100 }, undefined, "qwen_pixels"); assert.ok(fence.includes('grounding_format="qwen_pixels"')); }); it("omits grounding_format when undefined", () => { const fence = buildAnalysisFence("abc", "Analysis", { width: 100, height: 100 }); assert.ok(!fence.includes("grounding_format")); }); }); describe("fenceUntrusted (all three tags)", () => { it("neutralizes vision_proxy_analysis tags", () => { const out = fenceUntrusted('content'); assert.ok(!out.includes("")); assert.ok(!out.includes("")); }); it("neutralizes vision_proxy_joint_description tags", () => { const out = fenceUntrusted('content'); assert.ok(!out.includes("")); }); it("neutralizes vision_proxy_description tags (unchanged)", () => { const out = fenceUntrusted('content'); assert.ok(!out.includes("")); }); it("neutralizes both < and > in tags", () => { const out = fenceUntrusted('test'); // Neither raw < nor raw > should appear in the tag parts const tagMatch = out.match(/vision_proxy_description/g); assert.ok(tagMatch); // The opening bracket of each tag should be neutralized assert.ok(!out.includes(" { const out = fenceUntrusted(''); assert.ok(!out.includes(""), "closing tag with space should be neutralized"); }); it("neutralizes tags with attributes", () => { const out = fenceUntrusted(''); assert.ok(!out.includes(" { it("escapes double quotes", () => { assert.equal(escapeAttr('file"name.png'), "file"name.png"); }); it("escapes angle brackets", () => { assert.equal(escapeAttr("ac"), "a<b>c"); }); it("escapes ampersands", () => { assert.equal(escapeAttr("a&b"), "a&b"); }); it("leaves safe characters intact", () => { assert.equal(escapeAttr("photo.png"), "photo.png"); }); it("handles empty string", () => { assert.equal(escapeAttr(""), ""); }); }); describe("getGroundingFormat", () => { it("returns format for known model", () => { const fmt = getGroundingFormat(DEFAULT_CONFIG, "Qwen", "Qwen2.5-VL-7B-Instruct"); assert.equal(fmt, "qwen_pixels"); }); it("returns 'none' for unknown model", () => { const fmt = getGroundingFormat(DEFAULT_CONFIG, "anthropic", "claude-sonnet-4-5"); assert.equal(fmt, "none"); }); }); describe("readEnvOverrides (1.4.0 fields)", () => { it("reads PI_VISION_PROXY_TOOL", () => { assert.equal(readEnvOverrides({ PI_VISION_PROXY_TOOL: "on" }).tool, "on"); assert.equal(readEnvOverrides({ PI_VISION_PROXY_TOOL: "off" }).tool, "off"); assert.equal(readEnvOverrides({ PI_VISION_PROXY_TOOL: "bogus" }).tool, undefined); }); it("reads PI_VISION_PROXY_MAX_IMAGES_PER_CALL", () => { assert.equal(readEnvOverrides({ PI_VISION_PROXY_MAX_IMAGES_PER_CALL: "5" }).maxImagesPerCall, 5); assert.equal(readEnvOverrides({ PI_VISION_PROXY_MAX_IMAGES_PER_CALL: "0" }).maxImagesPerCall, undefined); assert.equal(readEnvOverrides({ PI_VISION_PROXY_MAX_IMAGES_PER_CALL: "21" }).maxImagesPerCall, undefined); }); it("reads PI_VISION_PROXY_MAX_BATCH", () => { assert.equal(readEnvOverrides({ PI_VISION_PROXY_MAX_BATCH: "3" }).maxBatch, 3); assert.equal(readEnvOverrides({ PI_VISION_PROXY_MAX_BATCH: "0" }).maxBatch, undefined); }); it("reads PI_VISION_PROXY_CACHE_SIZE", () => { assert.equal(readEnvOverrides({ PI_VISION_PROXY_CACHE_SIZE: "100" }).cacheSize, 100); assert.equal(readEnvOverrides({ PI_VISION_PROXY_CACHE_SIZE: "501" }).cacheSize, undefined); }); it("reads PI_VISION_PROXY_PHASH_THRESHOLD", () => { assert.equal(readEnvOverrides({ PI_VISION_PROXY_PHASH_THRESHOLD: "0.9" }).pHashSimilarityThreshold, 0.9); assert.equal(readEnvOverrides({ PI_VISION_PROXY_PHASH_THRESHOLD: "1.5" }).pHashSimilarityThreshold, undefined); }); }); describe("sanitize (1.4.0 fields)", () => { it("defaults new fields when missing", () => { const result = sanitize({ mode: "fallback", provider: "anthropic", modelId: "claude-sonnet-4-5", systemPrompt: "test", includeContext: true, } as VisionConfig); assert.equal(result.tool, "on"); assert.equal(result.maxImagesPerCall, 10); assert.equal(result.maxBatch, 4); assert.equal(result.cacheSize, 50); assert.equal(result.pHashSimilarityThreshold, 0.8); assert.ok(result.groundingModels); }); it("validates maxImagesPerCall range", () => { const bad = sanitize({ ...DEFAULT_CONFIG, maxImagesPerCall: 0 }); assert.equal(bad.maxImagesPerCall, 10); // reset to default const good = sanitize({ ...DEFAULT_CONFIG, maxImagesPerCall: 15 }); assert.equal(good.maxImagesPerCall, 15); }); }); describe("readImageFileWithReason (basename)", () => { it("returns filename (basename)", async () => { const dir = await mkdtemp(join(os.tmpdir(), "vp-test-")); const file = join(dir, "test-image.png"); await writeFile(file, TINY_PNG); try { const r = await readImageFileWithReason(file); assert.equal(r.filename, "test-image.png"); } finally { await rm(dir, { recursive: true, force: true }); } }); }); // ── Create a 10×10 solid-colour PNG for crop tests ──────────────────────── async function create10x10Png(): Promise { const { Image } = await import("imagescript"); const img = new Image(10, 10); // Fill with a solid red-ish colour so we have real pixels for (let y = 0; y < 10; y++) { for (let x = 0; x < 10; x++) { img.setPixelAt(x + 1, y + 1, 0xff0000ff); // RGBA red, fully opaque } } const encoded = await img.encode(1); return Buffer.from(encoded); } describe("cropImage (ImageScript)", () => { after(async () => { await shutdownCropWorkers(); }); it("crops a 10×10 PNG to a 5×5 region", async () => { const png = await create10x10Png(); const crop = { x: 2, y: 3, width: 5, height: 5 }; const result = await cropImage(png, crop, "image/png"); assert.ok(result, "crop should succeed"); assert.ok(result.length > 0, "result should have bytes"); // Verify the cropped image has correct dimensions const dims = extractDimensions(result); assert.ok(dims, "should extract dimensions from cropped image"); assert.equal(dims.width, 5); assert.equal(dims.height, 5); }); it("returns null for out-of-bounds crop", async () => { const png = await create10x10Png(); const crop = { x: 8, y: 8, width: 10, height: 10 }; const result = await cropImage(png, crop, "image/png"); // ImageScript may clamp or fail — either way it shouldn't throw // If it returns something, it should be valid if (result) { const dims = extractDimensions(result); assert.ok(dims, "cropped result should be valid"); } }); it("encodes as JPEG when mimeType is image/jpeg", async () => { const png = await create10x10Png(); const crop = { x: 0, y: 0, width: 10, height: 10 }; const result = await cropImage(png, crop, "image/jpeg"); assert.ok(result, "crop should succeed"); // JPEG should start with FF D8 assert.equal(result[0], 0xff); assert.equal(result[1], 0xd8); }); it("succeeds within a generous decode timeout (env override is honoured)", async () => { const prev = process.env.PI_VISION_PROXY_DECODE_TIMEOUT_MS; process.env.PI_VISION_PROXY_DECODE_TIMEOUT_MS = "10000"; try { const png = await create10x10Png(); const result = await cropImage(png, { x: 0, y: 0, width: 10, height: 10 }, "image/png"); assert.ok(result, "crop should succeed with a generous timeout"); } finally { if (prev === undefined) delete process.env.PI_VISION_PROXY_DECODE_TIMEOUT_MS; else process.env.PI_VISION_PROXY_DECODE_TIMEOUT_MS = prev; } }); it("returns null (no throw) for undecodable garbage bytes", async () => { const garbage = Buffer.from([0x89, 0x50, 0x4e, 0x47, 0x00, 0x01, 0x02, 0x03, 0x04, 0x05]); const result = await cropImage(garbage, { x: 0, y: 0, width: 5, height: 5 }, "image/png"); assert.equal(result, null); }); it("falls back to the in-thread path when the worker is disabled", async () => { const prev = process.env.PI_VISION_PROXY_DECODE_WORKER; process.env.PI_VISION_PROXY_DECODE_WORKER = "0"; try { const png = await create10x10Png(); const result = await cropImage(png, { x: 2, y: 3, width: 5, height: 5 }, "image/png"); assert.ok(result, "in-thread fallback should still crop"); const dims = extractDimensions(result); assert.ok(dims && dims.width === 5 && dims.height === 5, "fallback crop should have correct dims"); } finally { if (prev === undefined) delete process.env.PI_VISION_PROXY_DECODE_WORKER; else process.env.PI_VISION_PROXY_DECODE_WORKER = prev; } }); it("handles several sequential crops (worker pool reuse)", async () => { const png = await create10x10Png(); for (let i = 0; i < 5; i++) { const result = await cropImage(png, { x: 0, y: 0, width: 5, height: 5 }, "image/png"); assert.ok(result, `crop ${i} should succeed`); const dims = extractDimensions(result); assert.ok(dims && dims.width === 5 && dims.height === 5, `crop ${i} dims`); } }); it("works with pooling disabled (spawn-per-call)", async () => { const prev = process.env.PI_VISION_PROXY_DECODE_WORKER_POOL; process.env.PI_VISION_PROXY_DECODE_WORKER_POOL = "0"; try { const png = await create10x10Png(); const a = await cropImage(png, { x: 0, y: 0, width: 5, height: 5 }, "image/png"); const b = await cropImage(png, { x: 1, y: 1, width: 4, height: 4 }, "image/png"); assert.ok(a && b, "both crops should succeed without pooling"); } finally { if (prev === undefined) delete process.env.PI_VISION_PROXY_DECODE_WORKER_POOL; else process.env.PI_VISION_PROXY_DECODE_WORKER_POOL = prev; } }); it("hard-terminates the worker on timeout (returns null, does not hang)", async () => { // Force pool=0 so a *fresh* worker is spawned (never a warmed pooled one). // Spinning up a thread + loading ImageScript + instantiating the WASM codec // is reliably far slower than the 1ms timeout regardless of machine speed, // so the main-thread timer fires and terminate()s the worker — proving the // timeout is a hard limit, without depending on wall-clock scheduling luck. const prevWorker = process.env.PI_VISION_PROXY_DECODE_WORKER; const prevPool = process.env.PI_VISION_PROXY_DECODE_WORKER_POOL; const prevTimeout = process.env.PI_VISION_PROXY_DECODE_TIMEOUT_MS; process.env.PI_VISION_PROXY_DECODE_WORKER = "1"; process.env.PI_VISION_PROXY_DECODE_WORKER_POOL = "0"; process.env.PI_VISION_PROXY_DECODE_TIMEOUT_MS = "1"; try { const png = await create10x10Png(); const started = Date.now(); const result = await cropImage(png, { x: 0, y: 0, width: 10, height: 10 }, "image/png"); assert.equal(result, null, "timed-out crop should return null"); assert.ok(Date.now() - started < 5000, "should return promptly, not hang"); } finally { if (prevWorker === undefined) delete process.env.PI_VISION_PROXY_DECODE_WORKER; else process.env.PI_VISION_PROXY_DECODE_WORKER = prevWorker; if (prevPool === undefined) delete process.env.PI_VISION_PROXY_DECODE_WORKER_POOL; else process.env.PI_VISION_PROXY_DECODE_WORKER_POOL = prevPool; if (prevTimeout === undefined) delete process.env.PI_VISION_PROXY_DECODE_TIMEOUT_MS; else process.env.PI_VISION_PROXY_DECODE_TIMEOUT_MS = prevTimeout; } }); }); describe("piAiImageToBuffer / bufferToPiAiImage", () => { it("round-trips base64 data", () => { const original = Buffer.from("hello world"); const piAiImg = bufferToPiAiImage(original, "image/png"); assert.equal(piAiImg.type, "image"); assert.equal(piAiImg.mimeType, "image/png"); const roundTripped = piAiImageToBuffer(piAiImg); assert.deepEqual(roundTripped, original); }); it("defaults to image/png mimeType", () => { const piAiImg = bufferToPiAiImage(Buffer.alloc(0)); assert.equal(piAiImg.mimeType, "image/png"); }); }); describe("computePHash", () => { it("returns a hex hash string for a valid image", async () => { const png = await create10x10Png(); const hash = await computePHash(png); // imghash may or may not be available; if it is, we get a hex string if (hash !== null) { assert.ok(/^[0-9a-f]+$/i.test(hash), `hash should be hex: ${hash}`); } }); }); describe("hammingDistance", () => { it("returns 0 for identical hashes", () => { assert.equal(hammingDistance("0000", "0000"), 0); }); it("returns correct distance for differing hashes", () => { // 0 = 0000, f = 1111 → 4 bits differ per hex char assert.equal(hammingDistance("0", "f"), 4); // 0 = 0000, 1 = 0001 → 1 bit differs assert.equal(hammingDistance("0", "1"), 1); }); it("returns Infinity for null inputs", () => { assert.equal(hammingDistance(null, "abc"), Infinity); assert.equal(hammingDistance("abc", null), Infinity); assert.equal(hammingDistance(null, null), Infinity); }); it("handles unequal length hashes", () => { // Compare only up to shorter length const dist = hammingDistance("00", "ff00"); assert.equal(dist, 8); // only first 2 hex chars compared }); }); import { parseDescribeArgs } from "../internal.ts"; describe("parseDescribeArgs (describe)", () => { it("parses basic describe with single image", () => { const result = parseDescribeArgs("/path/to/image.png"); assert.ok(typeof result !== "string", result as string); if (typeof result !== "string") { assert.deepEqual(result.images, ["/path/to/image.png"]); assert.equal(result.save, false); assert.equal(result.question, undefined); assert.equal(result.model, undefined); assert.equal(result.crops, undefined); } }); it("parses multiple images with --question", () => { const result = parseDescribeArgs('img1.png img2.png --question "What is different?"'); assert.ok(typeof result !== "string", result as string); if (typeof result !== "string") { assert.deepEqual(result.images, ["img1.png", "img2.png"]); assert.equal(result.question, "What is different?"); } }); it("parses --crop with region form", () => { const result = parseDescribeArgs("image.png --crop 0:r=top-right"); assert.ok(typeof result !== "string", result as string); if (typeof result !== "string") { assert.deepEqual(result.crops, [{ image_index: 0, region: "top-right" }]); } }); it("parses --crop with normalized form", () => { const result = parseDescribeArgs("image.png --crop 0:n=0.1,0.2,0.5,0.6"); assert.ok(typeof result !== "string", result as string); if (typeof result !== "string") { assert.deepEqual(result.crops, [{ image_index: 0, normalized: { x: 0.1, y: 0.2, width: 0.5, height: 0.6 } }]); } }); it("parses --crop with pixel form", () => { const result = parseDescribeArgs("image.png --crop 0:p=100,200,300,400"); assert.ok(typeof result !== "string", result as string); if (typeof result !== "string") { assert.deepEqual(result.crops, [{ image_index: 0, pixels: { x: 100, y: 200, width: 300, height: 400 } }]); } }); it("parses --save flag", () => { const result = parseDescribeArgs("image.png --save"); assert.ok(typeof result !== "string", result as string); if (typeof result !== "string") { assert.equal(result.save, true); } }); it("parses --model override", () => { const result = parseDescribeArgs("image.png --model Qwen/Qwen2.5-VL-7B"); assert.ok(typeof result !== "string", result as string); if (typeof result !== "string") { assert.equal(result.model, "Qwen/Qwen2.5-VL-7B"); } }); it("parses full combined command", () => { const result = parseDescribeArgs('a.png b.png --question "Compare them" --crop 0:r=center --crop 1:n=0,0,0.5,0.5 --model Qwen/Qwen2.5-VL-7B --save'); assert.ok(typeof result !== "string", result as string); if (typeof result !== "string") { assert.deepEqual(result.images, ["a.png", "b.png"]); assert.equal(result.question, "Compare them"); assert.equal(result.save, true); assert.equal(result.model, "Qwen/Qwen2.5-VL-7B"); assert.equal(result.crops!.length, 2); assert.equal(result.crops![0].image_index, 0); assert.equal(result.crops![1].image_index, 1); } }); it("returns error for empty input", () => { const result = parseDescribeArgs(""); assert.equal(typeof result, "string"); assert.ok((result as string).includes("Usage")); }); it("returns error for unknown region", () => { const result = parseDescribeArgs("image.png --crop 0:r=invalid"); assert.equal(typeof result, "string"); assert.ok((result as string).includes("unknown region")); }); it("returns error for bad crop form", () => { const result = parseDescribeArgs("image.png --crop 0:bad=form"); assert.equal(typeof result, "string"); assert.ok((result as string).includes("unknown crop form")); }); it("returns error for missing --question value", () => { const result = parseDescribeArgs("image.png --question"); assert.equal(typeof result, "string"); assert.ok((result as string).includes("--question requires")); }); it("returns error for unknown flag", () => { const result = parseDescribeArgs("image.png --bogus"); assert.equal(typeof result, "string"); assert.ok((result as string).includes("unknown flag")); }); }); describe("parseDescribeArgs (redescribe)", () => { it("parses redescribe with single image", () => { const result = parseDescribeArgs("image.png", true); assert.ok(typeof result !== "string", result as string); if (typeof result !== "string") { assert.deepEqual(result.images, ["image.png"]); assert.equal(result.save, true); // implied } }); it("returns error for redescribe with --question", () => { const result = parseDescribeArgs('image.png --question "test"', true); assert.equal(typeof result, "string"); assert.ok((result as string).includes("--question is not valid")); }); it("returns error for redescribe with --crop", () => { const result = parseDescribeArgs("image.png --crop 0:r=center", true); assert.equal(typeof result, "string"); assert.ok((result as string).includes("--crop is not valid")); }); it("returns error for redescribe with --save", () => { const result = parseDescribeArgs("image.png --save", true); assert.equal(typeof result, "string"); assert.ok((result as string).includes("--save is implied")); }); it("allows --model in redescribe", () => { const result = parseDescribeArgs("image.png --model Qwen/Qwen2.5-VL-7B", true); assert.ok(typeof result !== "string", result as string); if (typeof result !== "string") { assert.equal(result.model, "Qwen/Qwen2.5-VL-7B"); assert.equal(result.save, true); } }); }); import { buildJointDescriptionFence, buildAdaptiveJointPrompt, extractVersion, generateFilenameHints, } from "../internal.ts"; describe("buildJointDescriptionFence", () => { it("builds joint fence with dimensions JSON", () => { const metas = [ { hash: "aaa", meta: { width: 1920, height: 1080, filename: "before.png" } }, { hash: "bbb", meta: { width: 1920, height: 1080, filename: "after.png" } }, ]; const fence = buildJointDescriptionFence(metas, "Images differ in sidebar."); assert.ok(fence.startsWith("")); }); it("includes grounding_format when provided", () => { const metas = [{ hash: "abc", meta: { width: 100, height: 100 } }]; const fence = buildJointDescriptionFence(metas, "desc", "qwen_pixels"); assert.ok(fence.includes('grounding_format="qwen_pixels"')); }); it("omits grounding_format when 'none'", () => { const metas = [{ hash: "abc", meta: { width: 100, height: 100 } }]; const fence = buildJointDescriptionFence(metas, "desc", "none"); assert.ok(!fence.includes("grounding_format")); }); it("handles missing meta gracefully", () => { const metas = [{ hash: "abc" }]; const fence = buildJointDescriptionFence(metas, "desc"); assert.ok(fence.includes('"image":"abc"')); assert.ok(!fence.includes("width")); }); }); describe("buildAdaptiveJointPrompt", () => { it("includes image labels and comparison instructions", () => { const metas = [ { hash: "a", meta: { width: 800, height: 600, filename: "img1.png" } }, { hash: "b", meta: { width: 1024, height: 768, filename: "img2.png" } }, ]; const prompt = buildAdaptiveJointPrompt(metas, "What changed?"); assert.ok(prompt.includes("2 images")); assert.ok(prompt.includes("800x600")); assert.ok(prompt.includes("1024x768")); assert.ok(prompt.includes("img1.png")); assert.ok(prompt.includes("What changed?")); assert.ok(prompt.includes("comparison")); }); it("includes hints when provided", () => { const metas = [{ hash: "a" }, { hash: "b" }]; const prompt = buildAdaptiveJointPrompt(metas, "describe", ["before/after pair"]); assert.ok(prompt.includes("before/after pair")); assert.ok(prompt.includes("Structural hints")); }); it("omits hint block when no hints", () => { const metas = [{ hash: "a" }, { hash: "b" }]; const prompt = buildAdaptiveJointPrompt(metas, "describe"); assert.ok(!prompt.includes("Structural hints")); }); }); describe("extractVersion", () => { it("extracts v-prefixed version", () => { const r = extractVersion("mockup_v2.png"); assert.deepEqual(r, { prefix: "mockup_v", version: 2 }); }); it("extracts decimal version", () => { const r = extractVersion("draft_v1.2.png"); assert.deepEqual(r, { prefix: "draft_v", version: 1.2 }); }); it("extracts non-prefixed version", () => { const r = extractVersion("app3.png"); assert.deepEqual(r, { prefix: "app", version: 3 }); }); it("returns null for no version", () => { assert.equal(extractVersion("screenshot.png"), null); }); it("returns null for version-only filename", () => { assert.equal(extractVersion("3.png"), null); }); }); describe("generateFilenameHints", () => { it("detects before/after pair", () => { const hints = generateFilenameHints(["before.png", "after.png"]); assert.ok(hints.includes("before/after pair")); }); it("detects old/new pair", () => { const hints = generateFilenameHints(["old.png", "new.png"]); assert.ok(hints.includes("old/new pair")); }); it("detects versioned sequence", () => { const hints = generateFilenameHints(["mockup_v2.png", "mockup_v4.png"]); assert.ok(hints.some((h) => h.includes("versioned sequence"))); }); it("detects numbered underscore sequence", () => { const hints = generateFilenameHints(["frame_1.png", "frame_2.png"]); assert.ok(hints.includes("numbered sequence")); }); it("detects numbered dash sequence", () => { const hints = generateFilenameHints(["frame-1.png", "frame-2.png"]); assert.ok(hints.includes("numbered sequence")); }); it("detects date-ordered sequence", () => { const hints = generateFilenameHints(["2026-05-01_mockup.png", "2026-05-03_mockup.png"]); assert.ok(hints.includes("time-ordered sequence")); }); it("returns empty for no pattern", () => { const hints = generateFilenameHints(["cat.png", "dog.png"]); assert.deepEqual(hints, []); }); it("returns empty for single image", () => { assert.deepEqual(generateFilenameHints(["before.png"]), []); }); }); import { isGroundingExcluded, parseGroundingFormat, VALID_GROUNDING_FORMATS, } from "../internal.ts"; describe("isGroundingExcluded", () => { it("excludes claude models", () => { assert.equal(isGroundingExcluded("anthropic/claude-sonnet-4-5"), true); }); it("excludes gpt-4o", () => { assert.equal(isGroundingExcluded("openai/gpt-4o"), true); }); it("excludes llama vision", () => { assert.equal(isGroundingExcluded("meta/llama-3.2-11b-vision"), true); }); it("allows Qwen models", () => { assert.equal(isGroundingExcluded("Qwen/Qwen2.5-VL-7B-Instruct"), false); }); it("allows unknown models", () => { assert.equal(isGroundingExcluded("some/vendor-model"), false); }); }); describe("parseGroundingFormat", () => { it("parses valid formats", () => { assert.equal(parseGroundingFormat("qwen_pixels"), "qwen_pixels"); assert.equal(parseGroundingFormat("molmo_points"), "molmo_points"); assert.equal(parseGroundingFormat("deepseek_bbox"), "deepseek_bbox"); assert.equal(parseGroundingFormat("internvl_pixels"), "internvl_pixels"); assert.equal(parseGroundingFormat("gemini_normalized_1000"), "gemini_normalized_1000"); }); it("returns null for invalid format", () => { assert.equal(parseGroundingFormat("invalid"), null); assert.equal(parseGroundingFormat("none"), null); }); }); describe("VALID_GROUNDING_FORMATS", () => { it("contains expected formats", () => { assert.ok(VALID_GROUNDING_FORMATS.includes("qwen_pixels")); assert.ok(VALID_GROUNDING_FORMATS.includes("molmo_points")); assert.equal(VALID_GROUNDING_FORMATS.length, 5); }); }); // ── Security-specific tests ────────────────────────────────────────────── describe("Security: path traversal rejection", () => { it("extractCandidateImagePaths may detect paths with .., but before_agent_start rejects them", () => { // The regex is permissive — it may extract paths with .. // The .. check in before_agent_start is the defense layer const paths = extractCandidateImagePaths( "Check this image: /tmp/../etc/shadow.png", ); // Key point: the before_agent_start handler skips paths with .. // This test documents that extractCandidateImagePaths itself does not filter .. assert.ok(paths.length >= 0, "regex may or may not match — .. filtering is in the handler"); }); it("stripImagePaths escapes regex metacharacters safely", () => { // A path containing regex metacharacters should not cause errors const result = stripImagePaths( "Image at /tmp/test(file).png", ["/tmp/test(file).png"], ); assert.ok(!result.includes("/tmp/test(file).png")); assert.ok(result.includes(IMAGE_PATH_PLACEHOLDER)); }); it("stripImagePaths handles path with $ and ^ safely", () => { const result = stripImagePaths( "/tmp/$test^.png", ["/tmp/$test^.png"], ); assert.ok(!result.includes("/tmp/$test^.png")); }); }); describe("Security: fence injection resistance", () => { it("nested fence tags are neutralised", () => { const malicious = 'Normal text' + 'Injected content' + ''; const fence = buildDescriptionFence("abc", malicious); // Count actual closing tags — should be exactly 1 (at the end) const closings = fence.match(/<\/vision_proxy_description>/g); assert.equal(closings?.length, 1, "should have exactly 1 closing tag"); }); it("analysis fence with mixed injection types", () => { const malicious = 'x'; const fence = buildAnalysisFence("abc", malicious); // fenceUntrusted neutralises ALL vision_proxy tags but not arbitrary HTML assert.ok(!fence.includes("<"), "closing tag should be neutralised"); assert.ok(!fence.includes(""), "description tag should be neutralised"); assert.ok(!fence.includes(""), "joint opening tag should be neutralised"); }); it("fenceUntrusted handles empty string", () => { assert.equal(fenceUntrusted(""), ""); }); it("fenceUntrusted handles non-ASCII content", () => { const text = "描述图片中的内容 🖼️ 画像の内容を説明"; const safe = fenceUntrusted(text); assert.equal(safe, text, "non-ASCII should pass through unchanged"); }); }); describe("Security: consent integrity", () => { it("consent entry without provider does not satisfy per-provider check", () => { const entries = [ { type: "custom", customType: CUSTOM_TYPE_CONSENT, data: { granted: true } }, ]; assert.equal(hasConsent(entries, "anthropic"), false); assert.equal(hasConsent(entries, "openai"), false); }); it("consent entry with wrong provider does not satisfy check", () => { const entries = [ { type: "custom", customType: CUSTOM_TYPE_CONSENT, data: { granted: true, provider: "anthropic" } }, ]; assert.equal(hasConsent(entries, "openai"), false); }); it("consent entry with matching provider satisfies check", () => { const entries = [ { type: "custom", customType: CUSTOM_TYPE_CONSENT, data: { granted: true, provider: "anthropic" } }, ]; assert.equal(hasConsent(entries, "anthropic"), true); }); it("most recent consent entry wins", () => { const entries = [ { type: "custom", customType: CUSTOM_TYPE_CONSENT, data: { granted: true, provider: "anthropic" } }, { type: "custom", customType: CUSTOM_TYPE_CONSENT, data: { granted: false } }, ]; assert.equal(hasConsent(entries, "anthropic"), false); }); }); describe("Security: config sanitization", () => { it("rejects prototype-polluting keys from file config", () => { const cfg = sanitize({ ...DEFAULT_CONFIG, "__proto__": { admin: true } } as any); assert.equal(({} as any).admin, undefined); assert.equal(cfg.mode, "fallback"); // still valid }); it("rejects invalid provider strings", () => { const cfg = sanitize({ ...DEFAULT_CONFIG, provider: "../../evil" }); assert.equal(cfg.provider, DEFAULT_CONFIG.provider); // reset to default }); it("rejects invalid modelId strings", () => { const cfg = sanitize({ ...DEFAULT_CONFIG, modelId: "model; rm -rf /" }); assert.equal(cfg.modelId, DEFAULT_CONFIG.modelId); // reset to default }); it("clamps out-of-range numeric values", () => { const cfg = sanitize({ ...DEFAULT_CONFIG, maxImagesPerCall: 9999, maxBatch: -1, cacheSize: 1e6 }); assert.equal(cfg.maxImagesPerCall, DEFAULT_CONFIG.maxImagesPerCall); assert.equal(cfg.maxBatch, DEFAULT_CONFIG.maxBatch); assert.equal(cfg.cacheSize, DEFAULT_CONFIG.cacheSize); }); }); describe("Security: attribute escaping", () => { it("escapeAttr handles all XML-special characters", () => { assert.equal(escapeAttr(''), "<script>alert("xss")</script>"); assert.equal(escapeAttr("a&b"), "a&b"); }); it("escapeAttr handles empty string", () => { assert.equal(escapeAttr(""), ""); }); it("escapeAttr neutralises null bytes (SEC-6)", () => { assert.equal(escapeAttr("before\x00after"), "before\uFFFDafter"); assert.equal(escapeAttr("\x00"), "\uFFFD"); // Null bytes in filename attribute context const fence = buildDescriptionFence("abc", "desc", { width: 1, height: 1, filename: "test\x00evil.png" }); assert.ok(!fence.includes("\x00"), "fence should contain no null bytes"); assert.ok(fence.includes("\uFFFD"), "null byte should be replaced with replacement char"); }); }); describe("Security: telemetry sanitization (SEC-3)", () => { it("sanitizeForLog strips control characters", () => { assert.equal(sanitizeForLog("hello\x00world"), "helloworld"); assert.equal(sanitizeForLog("bell\x07ring"), "bellring"); assert.equal(sanitizeForLog("normal text"), "normal text"); // Tab, LF, CR are safe and preserved assert.equal(sanitizeForLog("tab\there"), "tab\there"); assert.equal(sanitizeForLog("line\nbreak"), "line\nbreak"); }); it("sanitizeForLog enforces length limit", () => { const long = "a".repeat(500); assert.equal(sanitizeForLog(long).length, 200); assert.equal(sanitizeForLog(long, 50).length, 50); }); it("sanitizeForLog preserves Unicode", () => { const text = "描述 🖼️ 画像"; assert.equal(sanitizeForLog(text), text); }); it("sanitizeForLog handles empty string", () => { assert.equal(sanitizeForLog(""), ""); }); }); describe("Security: persistent config key filtering (SEC-4)", () => { it("readPersistentFile filters unknown keys", async () => { const dir = await mkdtemp(join(os.tmpdir(), "vp-cfg-sec-")); try { const malicious = JSON.stringify({ mode: "always", __proto__: { admin: true }, unknownKey: "should be removed", provider: "anthropic", }); await writeFile(join(dir, "vision-proxy.json"), malicious); const result = await readPersistentFile(dir) as any; assert.equal(result.mode, "always"); assert.equal(result.provider, "anthropic"); assert.equal(result.unknownKey, undefined, "unknown key should be filtered"); // Check own properties only — constructor is inherited from Object.prototype assert.ok(!Object.keys(result).includes("constructor"), "constructor should not be an own property"); assert.ok(!Object.keys(result).includes("__proto__"), "__proto__ should not be an own property"); // Verify prototype is not polluted assert.equal(({} as any).admin, undefined); } finally { await rm(dir, { recursive: true }); } }); it("readPersistentFile handles invalid JSON", async () => { const dir = await mkdtemp(join(os.tmpdir(), "vp-cfg-inv-")); try { await writeFile(join(dir, "vision-proxy.json"), "not json at all"); const result = await readPersistentFile(dir); assert.deepEqual(result, {}); } finally { await rm(dir, { recursive: true }); } }); }); describe("Security: image decode bomb protection", () => { it("storeImageMeta rejects dimensions exceeding MAX_IMAGE_DIMENSION", async () => { // Can't easily create a real 16K×16K image, but we can test the path // by verifying that normal images are accepted const { Image } = await import("imagescript"); const img = new Image(100, 100); const encoded = Buffer.from(await img.encode(1)); const hash = "test-decode-bomb-normal"; const store = createImageMetaStore(); storeImageMeta(store, hash, encoded); const meta = store.get(hash); // Normal image should be accepted assert.ok(meta, "normal image should be stored"); }); }); describe("Review fixes: hasConsent per-provider semantics", () => { it("revoking consent for provider A does not affect provider B", () => { const entries: Entry[] = [ customEntry(CUSTOM_TYPE_CONSENT, { granted: true, provider: "anthropic" }), customEntry(CUSTOM_TYPE_CONSENT, { granted: true, provider: "google" }), customEntry(CUSTOM_TYPE_CONSENT, { granted: false, provider: "anthropic" }), ]; assert.equal(hasConsent(entries, "anthropic"), false, "anthropic should be revoked"); assert.equal(hasConsent(entries, "google"), true, "google should still be granted"); }); it("provider-less revoked consent blocks all providers", () => { const entries: Entry[] = [ customEntry(CUSTOM_TYPE_CONSENT, { granted: true, provider: "anthropic" }), customEntry(CUSTOM_TYPE_CONSENT, { granted: false }), // global revoke ]; assert.equal(hasConsent(entries, "anthropic"), false); assert.equal(hasConsent(entries, "google"), false); }); it("provider-less granted does not satisfy per-provider check", () => { const entries: Entry[] = [ customEntry(CUSTOM_TYPE_CONSENT, { granted: true }), ]; assert.equal(hasConsent(entries, "anthropic"), false, "global grant should not satisfy per-provider"); assert.equal(hasConsent(entries), true, "global check should see the grant"); }); }); describe("Review fixes: grounding format validation in sanitize()", () => { it("strips invalid grounding format values", () => { const config = { ...DEFAULT_CONFIG, groundingModels: { "test/model": { format: "invalid_format" }, "anthropic/claude-sonnet-4-5": { format: "qwen_pixels" }, }, }; const safe = sanitize(config); assert.equal((safe.groundingModels as any)["test/model"], undefined, "invalid format should be stripped"); assert.equal((safe.groundingModels as any)["anthropic/claude-sonnet-4-5"].format, "qwen_pixels"); }); it("preserves valid formats", () => { const config = { ...DEFAULT_CONFIG, groundingModels: { "test/model": { format: "molmo_points" }, }, }; const safe = sanitize(config); assert.equal((safe.groundingModels as any)["test/model"].format, "molmo_points"); }); }); describe("Review fixes: buildAdaptiveJointPrompt sanitizes userPrompt", () => { it("escapes XML-breaking characters in user_message", () => { const prompt = buildAdaptiveJointPrompt( [{ hash: "abc", meta: { width: 100, height: 200 } }], "Hello injected", ); assert.ok(prompt.includes("</user_message>"), "closing tag should be escaped"); assert.ok(!prompt.includes(""), "raw tags should be escaped"); }); }); describe("Review fixes: buildJointDescriptionFence dimensions escaping", () => { it("escapes special chars in dimensions attribute", () => { const fence = buildJointDescriptionFence( [{ hash: "abc", meta: { width: 100, height: 200, filename: "test's file & .png" } }], "desc", ); // Inside the single-quoted JSON attribute, & < > ' must be escaped assert.ok(!fence.includes("test's"), "single quote should be escaped"); assert.ok(fence.includes("'"), "should contain escaped single quote"); assert.ok(fence.includes("&"), "should contain escaped ampersand"); }); }); describe("Review fixes: storeImageMeta filename backfill", () => { it("backfills filename on second call without overwriting dimensions", async () => { const { Image } = await import("imagescript"); const img = new Image(50, 60); const encoded = Buffer.from(await img.encode(1)); const hash = "test-backfill-filename"; const store = createImageMetaStore(); storeImageMeta(store, hash, encoded); // first call, no filename storeImageMeta(store, hash, encoded, "photo.png"); // second call, with filename const meta = store.get(hash); assert.ok(meta, "meta should exist"); assert.equal(meta!.filename, "photo.png", "filename should be backfilled"); }); it("keeps stores isolated — one session's metadata does not leak into another (issue #12)", async () => { const { Image } = await import("imagescript"); const encoded = Buffer.from(await new Image(40, 30).encode(1)); const sessionA = createImageMetaStore(); const sessionB = createImageMetaStore(); storeImageMeta(sessionA, "shared-hash", encoded, "a.png"); assert.ok(sessionA.get("shared-hash"), "session A should have the metadata"); assert.equal(sessionB.get("shared-hash"), undefined, "session B must not inherit it"); }); });