/**
* Unit tests for vision-proxy pure helpers.
*
* Run:
* node --experimental-strip-types --test extensions/__tests__/internal.test.ts
*
* Requires Node 22+ for native TypeScript stripping. No build / no deps.
*/
import { strict as assert } from "node:assert";
import { after, describe, it } from "node:test";
import { lstat, mkdtemp, rm, symlink, writeFile } from "node:fs/promises";
import os from "node:os";
import { join, parse } from "node:path";
import {
buildConversationContext,
buildDescriptionFence,
buildAnalysisFence,
buildVideoDescriptionFence,
buildVideoEmptyResponseError,
buildVideoProxySection,
clampPixels,
extractXaiResponsesText,
formatXaiSttTranscript,
isTranscriptionRequest,
isXaiProvider,
CUSTOM_TYPE_CONFIG,
CUSTOM_TYPE_CONSENT,
CUSTOM_TYPE_DESCRIPTION,
cropSignature,
DEFAULT_CONFIG,
envFlags,
escapeAttr,
extractCandidateImagePaths,
extractCandidateVideoPaths,
extractCandidateAudioPaths,
extractDimensions,
fenceUntrusted,
findDescriptions,
fuzzyMatches,
getGroundingFormat,
hasConsent,
hashImageData,
IMAGE_PATH_PLACEHOLDER,
VIDEO_PATH_PLACEHOLDER,
isPathAllowed,
isValidNamedRegion,
LRUCache,
normalizedToPixels,
parseModelString,
pluralImages,
readEnvOverrides,
readImageFileWithReason,
readPersistentFile,
resolveConfig,
resolveCropEntry,
resolveRegion,
sanitize,
hammingDistance,
computePHash,
cropImage,
shutdownCropWorkers,
piAiImageToBuffer,
bufferToPiAiImage,
shouldStripImages,
splitSubcommand,
stripImagePaths,
stripMediaPaths,
toPiAiImage,
type VisionConfig,
writePersistentFile,
sanitizeForLog,
storeImageMeta,
createImageMetaStore,
storeImageData,
getImageData,
createImageDataStore,
parseRecallRef,
spinnerFrame,
formatProgressStatus,
SPINNER_FRAMES,
RECALL_HINT,
} from "../internal.ts";
// SessionEntry minimal shape — typed loose because peer dep types are not loaded in test
type Entry = any;
const customEntry = (customType: string, data: unknown): Entry => ({
type: "custom",
customType,
data,
});
const messageEntry = (role: "user" | "assistant", text: string): Entry => ({
type: "message",
message: { role, content: [{ type: "text", text }] },
});
describe("parseModelString", () => {
it("accepts valid provider/model pairs", () => {
assert.deepEqual(parseModelString("anthropic/claude-sonnet-4-5"), {
provider: "anthropic",
modelId: "claude-sonnet-4-5",
});
assert.deepEqual(parseModelString("openai/gpt-4o"), { provider: "openai", modelId: "gpt-4o" });
assert.deepEqual(parseModelString("provider/path/with/slashes"), {
provider: "provider",
modelId: "path/with/slashes",
});
});
it("normalizes legacy x-ai provider id to Pi's xai provider", () => {
assert.deepEqual(parseModelString("x-ai/grok-4.3"), {
provider: "xai",
modelId: "grok-4.3",
});
assert.deepEqual(parseModelString("xai/grok-4.3"), {
provider: "xai",
modelId: "grok-4.3",
});
});
it("rejects malformed strings", () => {
assert.equal(parseModelString(""), null);
assert.equal(parseModelString("/foo"), null);
assert.equal(parseModelString("foo/"), null);
assert.equal(parseModelString("noslash"), null);
assert.equal(parseModelString("provider with space/m"), null);
assert.equal(parseModelString("provider/has space"), null);
});
});
describe("sanitize", () => {
it("clobbers garbage to defaults", () => {
const out = sanitize({
mode: "weird" as any,
provider: "bad provider",
modelId: "bad model id",
systemPrompt: "",
includeContext: "yes" as any,
});
assert.equal(out.mode, DEFAULT_CONFIG.mode);
assert.equal(out.provider, DEFAULT_CONFIG.provider);
assert.equal(out.modelId, DEFAULT_CONFIG.modelId);
assert.equal(out.systemPrompt, DEFAULT_CONFIG.systemPrompt);
assert.equal(out.includeContext, DEFAULT_CONFIG.includeContext);
});
it("normalizes legacy x-ai provider id in config", () => {
const result = sanitize({ ...DEFAULT_CONFIG, provider: "x-ai", videoProvider: "x-ai" });
assert.equal(result.provider, "xai");
assert.equal(result.videoProvider, "xai");
});
it("preserves valid values", () => {
const cfg: VisionConfig = {
mode: "always",
provider: "openai",
modelId: "gpt-4o",
systemPrompt: "custom prompt",
includeContext: false,
tool: "on",
maxImagesPerCall: 5,
maxBatch: 2,
cacheSize: 100,
pHashSimilarityThreshold: 0.9,
groundingModels: {},
};
const result = sanitize(cfg);
assert.equal(result.mode, cfg.mode);
assert.equal(result.provider, cfg.provider);
assert.equal(result.modelId, cfg.modelId);
assert.equal(result.systemPrompt, cfg.systemPrompt);
assert.equal(result.includeContext, cfg.includeContext);
assert.equal(result.tool, cfg.tool);
assert.equal(result.maxImagesPerCall, cfg.maxImagesPerCall);
assert.equal(result.maxBatch, cfg.maxBatch);
assert.equal(result.cacheSize, cfg.cacheSize);
assert.equal(result.pHashSimilarityThreshold, cfg.pHashSimilarityThreshold);
});
});
describe("readEnvOverrides", () => {
it("returns empty when env unset", () => {
assert.deepEqual(readEnvOverrides({}), {});
});
it("reads valid mode", () => {
assert.deepEqual(readEnvOverrides({ PI_VISION_PROXY_MODE: "always" }), { mode: "always" });
assert.deepEqual(readEnvOverrides({ PI_VISION_PROXY_MODE: "off" }), { mode: "off" });
});
it("ignores invalid mode", () => {
assert.deepEqual(readEnvOverrides({ PI_VISION_PROXY_MODE: "bogus" }), {});
});
it("reads model string", () => {
const out = readEnvOverrides({ PI_VISION_PROXY_MODEL: "openai/gpt-4o" });
assert.equal(out.provider, "openai");
assert.equal(out.modelId, "gpt-4o");
});
it("normalizes legacy x-ai video model env override", () => {
const out = readEnvOverrides({ PI_VISION_PROXY_VIDEO_MODEL: "x-ai/grok-4.3" });
assert.equal(out.videoProvider, "xai");
assert.equal(out.videoModelId, "grok-4.3");
});
it("ignores malformed model string", () => {
assert.deepEqual(readEnvOverrides({ PI_VISION_PROXY_MODEL: "noslash" }), {});
});
it("parses includeContext truthy/falsy values", () => {
for (const v of ["1", "true", "yes", "on", "TRUE", "On"]) {
assert.equal(readEnvOverrides({ PI_VISION_PROXY_INCLUDE_CONTEXT: v }).includeContext, true, `truthy ${v}`);
}
for (const v of ["0", "false", "no", "off", "FALSE"]) {
assert.equal(readEnvOverrides({ PI_VISION_PROXY_INCLUDE_CONTEXT: v }).includeContext, false, `falsy ${v}`);
}
assert.equal(readEnvOverrides({ PI_VISION_PROXY_INCLUDE_CONTEXT: "garbage" }).includeContext, undefined);
});
});
describe("envFlags", () => {
it("reports presence per variable", () => {
assert.deepEqual(envFlags({}), { mode: false, model: false, context: false, tool: false, maxImagesPerCall: false, maxBatch: false, cacheSize: false, videoModel: false });
assert.deepEqual(
envFlags({
PI_VISION_PROXY_MODE: "x",
PI_VISION_PROXY_MODEL: "y",
PI_VISION_PROXY_INCLUDE_CONTEXT: "",
}),
{ mode: true, model: true, context: true, tool: false, maxImagesPerCall: false, maxBatch: false, cacheSize: false, videoModel: false },
);
});
});
describe("resolveConfig", () => {
it("returns defaults with no entries and empty env", () => {
const cfg = resolveConfig([], {});
assert.deepEqual(cfg, DEFAULT_CONFIG);
});
it("env wins over persisted", () => {
const entries: Entry[] = [customEntry(CUSTOM_TYPE_CONFIG, { mode: "off" })];
const cfg = resolveConfig(entries, { PI_VISION_PROXY_MODE: "always" });
assert.equal(cfg.mode, "always");
});
it("uses last persisted entry", () => {
const entries: Entry[] = [
customEntry(CUSTOM_TYPE_CONFIG, { mode: "off" }),
customEntry(CUSTOM_TYPE_CONFIG, { mode: "always" }),
];
assert.equal(resolveConfig(entries, {}).mode, "always");
});
});
describe("fenceUntrusted", () => {
it("neutralizes opening tag", () => {
const out = fenceUntrusted("");
assert.notEqual(out, "");
assert.ok(out.includes(""), "ZWSP injected");
});
it("neutralizes closing tag, case-insensitive", () => {
const out = fenceUntrusted("");
assert.notEqual(out, "");
});
it("leaves unrelated text intact", () => {
assert.equal(fenceUntrusted("plain text "), "plain text ");
});
});
describe("hashImageData", () => {
it("is deterministic and 32 chars", () => {
const a = hashImageData("hello");
const b = hashImageData("hello");
assert.equal(a, b);
assert.equal(a.length, 32);
});
it("differs for different inputs", () => {
assert.notEqual(hashImageData("a"), hashImageData("b"));
});
});
describe("pluralImages", () => {
it("singular vs plural", () => {
assert.equal(pluralImages(1), "1 image");
assert.equal(pluralImages(0), "0 images");
assert.equal(pluralImages(5), "5 images");
});
});
describe("parseRecallRef", () => {
const hash = "a".repeat(32);
it("accepts a bare 32-hex hash", () => {
assert.equal(parseRecallRef(hash), hash);
});
it("accepts a sha256:-prefixed hash", () => {
assert.equal(parseRecallRef(`sha256:${hash}`), hash);
});
it("strips a #crop suffix and recalls the base image", () => {
assert.equal(parseRecallRef(`${hash}#crop:1840,120,840,360`), hash);
});
it("normalizes uppercase to lowercase", () => {
assert.equal(parseRecallRef("A".repeat(32)), hash);
});
it("rejects file paths and non-hash refs", () => {
assert.equal(parseRecallRef("/tmp/shot.png"), null);
assert.equal(parseRecallRef("./a.png"), null);
assert.equal(parseRecallRef("screenshot.png"), null);
// Wrong length / non-hex
assert.equal(parseRecallRef("a".repeat(31)), null);
assert.equal(parseRecallRef("a".repeat(33)), null);
assert.equal(parseRecallRef("z".repeat(32)), null);
});
});
describe("session image recall store", () => {
it("round-trips retained image bytes by hash", () => {
const store = createImageDataStore();
storeImageData(store, "hash1", "AAAA", "image/png");
const got = getImageData(store, "hash1");
assert.deepEqual(got, { data: "AAAA", mimeType: "image/png" });
assert.equal(getImageData(store, "missing"), undefined);
});
it("ignores empty hash or data", () => {
const store = createImageDataStore();
storeImageData(store, "", "AAAA", "image/png");
storeImageData(store, "hash", "", "image/png");
assert.equal(store.map.size, 0);
});
it("does not duplicate on re-store of the same hash", () => {
const store = createImageDataStore();
storeImageData(store, "hash1", "AAAA", "image/png");
storeImageData(store, "hash1", "AAAA", "image/png");
assert.equal(store.map.size, 1);
});
it("keeps stores isolated — one session's bytes do not leak into another (issue #12)", () => {
const sessionA = createImageDataStore();
const sessionB = createImageDataStore();
storeImageData(sessionA, "hash1", "AAAA", "image/png");
assert.deepEqual(getImageData(sessionA, "hash1"), { data: "AAAA", mimeType: "image/png" });
assert.equal(getImageData(sessionB, "hash1"), undefined);
});
it("evicts least-recently-used entries past the byte budget", () => {
const store = createImageDataStore();
const prev = process.env.PI_VISION_PROXY_IMAGE_RECALL_BYTES;
// Budget of 10 decoded bytes; each 8-char base64 entry decodes to 6 bytes.
process.env.PI_VISION_PROXY_IMAGE_RECALL_BYTES = "10";
try {
storeImageData(store, "h1", "AAAAAAAA", "image/png"); // 6 decoded bytes, total 6
storeImageData(store, "h2", "BBBBBBBB", "image/png"); // 6 decoded bytes, total 12 > 10 → evict h1
assert.equal(getImageData(store, "h1"), undefined);
assert.deepEqual(getImageData(store, "h2"), { data: "BBBBBBBB", mimeType: "image/png" });
} finally {
if (prev === undefined) delete process.env.PI_VISION_PROXY_IMAGE_RECALL_BYTES;
else process.env.PI_VISION_PROXY_IMAGE_RECALL_BYTES = prev;
}
});
it("keeps a single oversized image rather than evicting everything", () => {
const store = createImageDataStore();
const prev = process.env.PI_VISION_PROXY_IMAGE_RECALL_BYTES;
process.env.PI_VISION_PROXY_IMAGE_RECALL_BYTES = "4";
try {
storeImageData(store, "big", "AAAAAAAAAAAA", "image/png"); // 9 decoded bytes > 4 budget
assert.deepEqual(getImageData(store, "big"), { data: "AAAAAAAAAAAA", mimeType: "image/png" });
assert.equal(store.map.size, 1);
} finally {
if (prev === undefined) delete process.env.PI_VISION_PROXY_IMAGE_RECALL_BYTES;
else process.env.PI_VISION_PROXY_IMAGE_RECALL_BYTES = prev;
}
});
it("bumps recency on access so the touched entry survives eviction", () => {
const store = createImageDataStore();
const prev = process.env.PI_VISION_PROXY_IMAGE_RECALL_BYTES;
// Budget of 14 decoded bytes; each 8-char base64 entry decodes to 6 bytes.
process.env.PI_VISION_PROXY_IMAGE_RECALL_BYTES = "14";
try {
storeImageData(store, "h1", "AAAAAAAA", "image/png"); // 6 decoded bytes, total 6
storeImageData(store, "h2", "BBBBBBBB", "image/png"); // 6 decoded bytes, total 12
getImageData(store, "h1"); // bump h1 to most-recent
storeImageData(store, "h3", "CCCCCCCC", "image/png"); // 6 decoded bytes, total 18 > 14 → evict LRU (h2)
assert.deepEqual(getImageData(store, "h1"), { data: "AAAAAAAA", mimeType: "image/png" });
assert.equal(getImageData(store, "h2"), undefined);
assert.deepEqual(getImageData(store, "h3"), { data: "CCCCCCCC", mimeType: "image/png" });
} finally {
if (prev === undefined) delete process.env.PI_VISION_PROXY_IMAGE_RECALL_BYTES;
else process.env.PI_VISION_PROXY_IMAGE_RECALL_BYTES = prev;
}
});
});
describe("spinnerFrame", () => {
it("returns a frame from the set and wraps around", () => {
assert.equal(spinnerFrame(0), SPINNER_FRAMES[0]);
assert.equal(spinnerFrame(SPINNER_FRAMES.length), SPINNER_FRAMES[0]);
assert.equal(spinnerFrame(SPINNER_FRAMES.length + 1), SPINNER_FRAMES[1]);
assert.ok(SPINNER_FRAMES.includes(spinnerFrame(7)));
});
it("handles negative ticks without throwing", () => {
assert.ok(SPINNER_FRAMES.includes(spinnerFrame(-1)));
assert.ok(SPINNER_FRAMES.includes(spinnerFrame(-13)));
});
});
describe("formatProgressStatus", () => {
it("includes frame, label, and elapsed seconds", () => {
assert.equal(
formatProgressStatus("Analyzing image 2/4…", "⠙", 3),
"multimodal-proxy ⠙ Analyzing image 2/4… (3s)",
);
});
it("floors and clamps elapsed seconds", () => {
assert.match(formatProgressStatus("x", "⠋", 4.9), /\(4s\)$/);
assert.match(formatProgressStatus("x", "⠋", -2), /\(0s\)$/);
});
});
describe("RECALL_HINT", () => {
it("mentions analyze_image and the image id", () => {
assert.match(RECALL_HINT, /analyze_image/);
assert.match(RECALL_HINT, /image id/i);
});
});
describe("splitSubcommand", () => {
it("splits sub and value with arbitrary whitespace", () => {
assert.deepEqual(splitSubcommand("model anthropic/claude"), { sub: "model", value: "anthropic/claude" });
assert.deepEqual(splitSubcommand("model anthropic/claude "), {
sub: "model",
value: "anthropic/claude",
});
assert.deepEqual(splitSubcommand("CONSENT YES"), { sub: "consent", value: "YES" });
});
it("handles bare sub with no value", () => {
assert.deepEqual(splitSubcommand("consent"), { sub: "consent", value: "" });
});
it("handles empty input", () => {
assert.deepEqual(splitSubcommand(""), { sub: "", value: "" });
});
});
describe("buildConversationContext", () => {
it("returns empty for no message entries", () => {
assert.equal(buildConversationContext([]), "");
});
it("concatenates user and assistant text in order", () => {
const entries: Entry[] = [
messageEntry("user", "first"),
messageEntry("assistant", "reply"),
customEntry("other", {}),
];
const out = buildConversationContext(entries);
assert.equal(out, "User: first\nAssistant: reply");
});
it("keeps only the last 8 message entries", () => {
const entries: Entry[] = [];
for (let i = 0; i < 12; i++) entries.push(messageEntry("user", `m${i}`));
const out = buildConversationContext(entries);
const lines = out.split("\n");
assert.equal(lines.length, 8);
assert.equal(lines[0], "User: m4");
assert.equal(lines[7], "User: m11");
});
it("truncates assistant content to 500 chars", () => {
const long = "x".repeat(800);
const out = buildConversationContext([messageEntry("assistant", long)]);
assert.ok(out.startsWith("Assistant: "));
assert.equal(out.length, "Assistant: ".length + 500);
});
it("truncates total to last 3000 chars with ellipsis", () => {
const entries: Entry[] = [];
for (let i = 0; i < 8; i++) entries.push(messageEntry("user", "y".repeat(490)));
const out = buildConversationContext(entries);
assert.ok(out.length <= 3001);
assert.ok(out.startsWith("…"));
});
});
describe("findDescriptions", () => {
it("collects hash → description from custom entries", () => {
const entries: Entry[] = [
customEntry(CUSTOM_TYPE_DESCRIPTION, { hash: "abc", description: "desc-a" }),
customEntry(CUSTOM_TYPE_DESCRIPTION, { hash: "def", description: "desc-b" }),
customEntry("other", {}),
customEntry(CUSTOM_TYPE_DESCRIPTION, { hash: "", description: "skip" }),
];
const map = findDescriptions(entries);
assert.equal(map.size, 2);
assert.equal(map.get("abc"), "desc-a");
assert.equal(map.get("def"), "desc-b");
});
});
describe("hasConsent", () => {
it("returns false with no entries", () => {
assert.equal(hasConsent([]), false);
});
it("uses the most recent consent entry", () => {
const entries: Entry[] = [
customEntry(CUSTOM_TYPE_CONSENT, { granted: true }),
customEntry(CUSTOM_TYPE_CONSENT, { granted: false }),
];
assert.equal(hasConsent(entries), false);
const granted: Entry[] = [
customEntry(CUSTOM_TYPE_CONSENT, { granted: false }),
customEntry(CUSTOM_TYPE_CONSENT, { granted: true }),
];
assert.equal(hasConsent(granted), true);
});
it("supports per-provider consent", () => {
// Consent for anthropic should not carry over to openai
const entries: Entry[] = [
customEntry(CUSTOM_TYPE_CONSENT, { granted: true, provider: "anthropic" }),
];
assert.equal(hasConsent(entries, "anthropic"), true);
assert.equal(hasConsent(entries, "openai"), false);
// Without provider arg, any granted consent matches
assert.equal(hasConsent(entries), true);
});
it("global consent (no provider) does NOT satisfy per-provider check", () => {
const entries: Entry[] = [
customEntry(CUSTOM_TYPE_CONSENT, { granted: true }),
];
// Global consent is valid when no specific provider is requested
assert.equal(hasConsent(entries), true);
// But it does NOT satisfy a per-provider consent check
assert.equal(hasConsent(entries, "anthropic"), false);
assert.equal(hasConsent(entries, "openai"), false);
});
});
describe("toPiAiImage", () => {
it("passes through new shape", () => {
const img = { type: "image", data: "AAAA", mimeType: "image/png" } as any;
assert.deepEqual(toPiAiImage(img), { type: "image", data: "AAAA", mimeType: "image/png" });
});
it("converts legacy { source: { data, mediaType } } shape", () => {
const legacy = { source: { data: "BBBB", mediaType: "image/jpeg" } };
assert.deepEqual(toPiAiImage(legacy), { type: "image", data: "BBBB", mimeType: "image/jpeg" });
});
it("throws on unsupported shape", () => {
assert.throws(() => toPiAiImage({} as any), /Unsupported image content shape/);
});
});
describe("shouldStripImages", () => {
const cfg = (mode: VisionConfig["mode"]): VisionConfig => ({ ...DEFAULT_CONFIG, mode });
it("off → never strip", () => {
assert.equal(shouldStripImages(cfg("off"), undefined), false);
assert.equal(shouldStripImages(cfg("off"), ["image", "text"]), false);
});
it("always → always strip", () => {
assert.equal(shouldStripImages(cfg("always"), undefined), true);
assert.equal(shouldStripImages(cfg("always"), ["image"]), true);
});
it("fallback → strip only when model lacks image input", () => {
assert.equal(shouldStripImages(cfg("fallback"), ["text"]), true);
assert.equal(shouldStripImages(cfg("fallback"), undefined), true);
assert.equal(shouldStripImages(cfg("fallback"), ["text", "image"]), false);
});
});
describe("extractCandidateImagePaths", () => {
it("detects pi-clipboard temp files (Windows)", () => {
const text = "What is this? C:\\Users\\Alessandro\\AppData\\Local\\Temp\\pi-clipboard-57a452d3-a1b2-c3d4-e5f6-789012345678.png";
const paths = extractCandidateImagePaths(text);
assert.equal(paths.length, 1);
assert.ok(paths[0].includes("pi-clipboard-"));
assert.ok(paths[0].endsWith(".png"));
});
it("detects pi-clipboard temp files (Unix)", () => {
const text = "/tmp/pi-clipboard-abc123-def456.png";
const paths = extractCandidateImagePaths(text);
assert.equal(paths.length, 1);
assert.ok(paths[0].includes("pi-clipboard-"));
});
it("detects general image paths with common extensions", () => {
const cases = [
{ input: "see ./screenshot.jpg", ext: ".jpg" },
{ input: "look at /home/user/photo.jpeg", ext: ".jpeg" },
{ input: "check /tmp/diagram.gif", ext: ".gif" },
{ input: "view C:\\logs\\capture.webp", ext: ".webp" },
{ input: "show ~/pic.bmp", ext: ".bmp" },
{ input: "open ./scan.tiff", ext: ".tiff" },
{ input: "see ./icon.ico", ext: ".ico" },
{ input: "view ./photo.avif", ext: ".avif" },
];
for (const { input, ext } of cases) {
const paths = extractCandidateImagePaths(input);
assert.equal(paths.length, 1, `should detect ${ext} in: ${input}`);
assert.ok(paths[0].endsWith(ext), `path should end with ${ext}`);
}
});
it("deduplicates identical paths", () => {
const text = "see ./img.png and ./img.png again";
const paths = extractCandidateImagePaths(text);
assert.equal(paths.length, 1);
});
it("returns empty for text without image paths", () => {
assert.deepEqual(extractCandidateImagePaths("hello world"), []);
assert.deepEqual(extractCandidateImagePaths(""), []);
assert.deepEqual(extractCandidateImagePaths("no images here.txt"), []);
});
it("does not match URLs", () => {
const paths = extractCandidateImagePaths("see https://example.com/photo.png for details");
assert.equal(paths.length, 0);
});
it("does not match bare filenames (HTML/Markdown)", () => {
assert.deepEqual(extractCandidateImagePaths('
'), []);
assert.deepEqual(extractCandidateImagePaths(''), []);
assert.deepEqual(extractCandidateImagePaths('photo.png'), []);
});
it("does not match file:// URLs as bare paths", () => {
// file:///tmp/x.png — leading "file:" not in allow-list; only the inner /tmp portion
// matters, but the colon prevents the anchor from matching cleanly. Should not double-emit.
const paths = extractCandidateImagePaths("see file:///tmp/x.png");
assert.ok(paths.every((p) => !p.startsWith("file:")));
});
});
describe("extractCandidateMediaPaths", () => {
it("detects quoted Windows video paths with spaces", () => {
const input = `video is not working: "D:\\Downloads\\Rethinking Agents - Harness is All you Need_.mp4" transcribe this`;
assert.deepEqual(extractCandidateVideoPaths(input), [
"D:\\Downloads\\Rethinking Agents - Harness is All you Need_.mp4",
]);
});
it("detects unquoted video paths without spaces", () => {
assert.deepEqual(extractCandidateVideoPaths("see D:\\Downloads\\clip.mp4 now"), ["D:\\Downloads\\clip.mp4"]);
assert.deepEqual(extractCandidateVideoPaths("see ./clip.mkv now"), ["./clip.mkv"]);
});
it("detects unquoted Windows video paths with spaces", () => {
const input = "Transcribe this video with timestamps D:\\Downloads\\Rethinking Agents - Harness is All you Need_.mp4";
assert.deepEqual(extractCandidateVideoPaths(input), [
"D:\\Downloads\\Rethinking Agents - Harness is All you Need_.mp4",
]);
});
it("detects quoted relative paths and audio paths with spaces", () => {
assert.deepEqual(extractCandidateVideoPaths(`see "./my video.mp4" now`), ["./my video.mp4"]);
assert.deepEqual(extractCandidateAudioPaths(`listen "C:\\Users\\Me\\Audio File.m4a" please`), [
"C:\\Users\\Me\\Audio File.m4a",
]);
});
it("does not treat unquoted paths with spaces as a single path", () => {
assert.deepEqual(extractCandidateVideoPaths("see ./my video.mp4 now"), []);
});
});
describe("stripMediaPaths", () => {
it("replaces media paths with placeholder", () => {
const mediaPath = "D:\\Downloads\\Rethinking Agents - Harness is All you Need_.mp4";
const result = stripMediaPaths(`transcribe "${mediaPath}" please`, [mediaPath]);
assert.equal(result, `transcribe "${VIDEO_PATH_PLACEHOLDER}" please`);
});
});
describe("stripImagePaths", () => {
it("replaces a single path with placeholder", () => {
const result = stripImagePaths("see /tmp/pi-clipboard-abc.png here", ["/tmp/pi-clipboard-abc.png"]);
assert.equal(result, `see ${IMAGE_PATH_PLACEHOLDER} here`);
});
it("replaces multiple paths", () => {
const result = stripImagePaths(
"/tmp/a.png and /tmp/b.jpg",
["/tmp/a.png", "/tmp/b.jpg"],
);
assert.ok(!result.includes("/tmp/a.png"));
assert.ok(!result.includes("/tmp/b.jpg"));
assert.equal(result.match(/\[image file/g)?.length, 2);
});
it("handles empty paths array", () => {
const text = "unchanged text";
assert.equal(stripImagePaths(text, []), text);
});
it("handles longer paths first to avoid partial replacements", () => {
const result = stripImagePaths(
"/tmp/img.png /tmp/img.png.bak",
["/tmp/img.png.bak", "/tmp/img.png"],
);
assert.ok(!result.includes("/tmp/img.png"));
});
});
// 1×1 transparent PNG
const TINY_PNG = Buffer.from(
"89504e470d0a1a0a0000000d49484452000000010000000108060000001f15c4890000000d4944415478da6300010000000500010d0a2db40000000049454e44ae426082",
"hex",
);
describe("isPathAllowed", () => {
it("allows files inside tmpdir", async () => {
const dir = await mkdtemp(join(os.tmpdir(), "vp-test-"));
const file = join(dir, "x.png");
await writeFile(file, TINY_PNG);
try {
assert.equal(await isPathAllowed(file), true);
} finally {
await rm(dir, { recursive: true, force: true });
}
});
it("allows non-existent files whose parent directory is in the allow-list", async () => {
// Non-existent files in tmpdir pass the check so callers can return "unreadable"
// instead of the misleading "denied" / "path outside allowed directories" message.
assert.equal(await isPathAllowed(join(os.tmpdir(), "does-not-exist-xyz.png")), true);
});
it("denies non-existent files outside the allow-list", async () => {
assert.equal(await isPathAllowed("/etc/does-not-exist-vp-xyz.png"), false);
});
it("allows files inside /tmp (system-wide Unix temp dir)", async () => {
if (os.platform() === "win32") return;
const file = join("/tmp", `vp-test-direct-${Date.now()}.png`);
await writeFile(file, TINY_PNG);
try {
assert.equal(await isPathAllowed(file), true);
} finally {
try { await rm(file); } catch { /* ignore */ }
}
});
it("allows non-existent files inside /tmp", async () => {
if (os.platform() === "win32") return;
assert.equal(await isPathAllowed("/tmp/does-not-exist-vp-xyz.png"), true);
});
it("allows local Windows drive paths by default", async () => {
const root = parse(process.cwd()).root;
if (!/^[a-z]:[\\/]/i.test(root)) return;
const prevDrives = process.env.PI_VISION_PROXY_ALLOW_DRIVES;
try {
delete process.env.PI_VISION_PROXY_ALLOW_DRIVES;
assert.equal(await isPathAllowed(process.cwd()), true);
} finally {
if (prevDrives === undefined) delete process.env.PI_VISION_PROXY_ALLOW_DRIVES;
else process.env.PI_VISION_PROXY_ALLOW_DRIVES = prevDrives;
}
});
it("can disable local Windows drive path access with PI_VISION_PROXY_ALLOW_DRIVES=0", async () => {
const root = parse(process.cwd()).root;
if (!/^[a-z]:[\\/]/i.test(root)) return;
const prevDrives = process.env.PI_VISION_PROXY_ALLOW_DRIVES;
try {
process.env.PI_VISION_PROXY_ALLOW_DRIVES = "0";
// cwd is still allowed by the cwd rule, so assert using the user home when it is outside cwd.
const home = os.homedir();
if (!home.toLowerCase().startsWith(process.cwd().toLowerCase())) {
assert.equal(await isPathAllowed(home), false);
}
} finally {
if (prevDrives === undefined) delete process.env.PI_VISION_PROXY_ALLOW_DRIVES;
else process.env.PI_VISION_PROXY_ALLOW_DRIVES = prevDrives;
}
});
it("allows homedir files when PI_VISION_PROXY_ALLOW_HOME=1", async () => {
const home = os.homedir();
const prevHome = process.env.PI_VISION_PROXY_ALLOW_HOME;
const prevDrives = process.env.PI_VISION_PROXY_ALLOW_DRIVES;
try {
process.env.PI_VISION_PROXY_ALLOW_DRIVES = "0";
delete process.env.PI_VISION_PROXY_ALLOW_HOME;
if (!home.toLowerCase().startsWith(process.cwd().toLowerCase())) {
assert.equal(await isPathAllowed(home), false);
}
process.env.PI_VISION_PROXY_ALLOW_HOME = "1";
assert.equal(await isPathAllowed(home), true);
} finally {
if (prevHome === undefined) delete process.env.PI_VISION_PROXY_ALLOW_HOME;
else process.env.PI_VISION_PROXY_ALLOW_HOME = prevHome;
if (prevDrives === undefined) delete process.env.PI_VISION_PROXY_ALLOW_DRIVES;
else process.env.PI_VISION_PROXY_ALLOW_DRIVES = prevDrives;
}
});
});
describe("readImageFileWithReason", () => {
it("reads valid PNG inside tmpdir", async () => {
const dir = await mkdtemp(join(os.tmpdir(), "vp-test-"));
const file = join(dir, "ok.png");
await writeFile(file, TINY_PNG);
try {
const r = await readImageFileWithReason(file);
assert.ok(r.image, "image should be returned");
assert.equal(r.image?.mimeType, "image/png");
assert.equal(r.image?.type, "image");
assert.ok((r.image?.data ?? "").length > 0);
} finally {
await rm(dir, { recursive: true, force: true });
}
});
it("returns reason=not-an-image for unsupported extensions", async () => {
const r = await readImageFileWithReason("/tmp/foo.txt");
assert.equal(r.image, null);
assert.equal(r.reason, "not-an-image");
});
it("returns reason=denied for path outside allow-list", async () => {
const r = await readImageFileWithReason("/etc/never-exists-vp.png");
assert.equal(r.image, null);
assert.equal(r.reason, "denied");
});
it("returns reason=unreadable for non-existent file inside /tmp", async () => {
if (os.platform() === "win32") return;
// Previously returned "denied" (misleading); now returns "unreadable" so the user
// knows the file is simply missing, not that /tmp itself is forbidden.
const r = await readImageFileWithReason("/tmp/does-not-exist-vp-xyz.png");
assert.equal(r.image, null);
assert.equal(r.reason, "unreadable");
});
it("returns reason=empty for zero-byte image", async () => {
const dir = await mkdtemp(join(os.tmpdir(), "vp-test-"));
const file = join(dir, "empty.png");
await writeFile(file, "");
try {
const r = await readImageFileWithReason(file);
assert.equal(r.image, null);
assert.equal(r.reason, "empty");
} finally {
await rm(dir, { recursive: true, force: true });
}
});
it("returns reason=too-large when above PI_VISION_PROXY_MAX_IMAGE_BYTES", async () => {
const dir = await mkdtemp(join(os.tmpdir(), "vp-test-"));
const file = join(dir, "big.png");
await writeFile(file, Buffer.alloc(64));
const prev = process.env.PI_VISION_PROXY_MAX_IMAGE_BYTES;
process.env.PI_VISION_PROXY_MAX_IMAGE_BYTES = "32";
try {
const r = await readImageFileWithReason(file);
assert.equal(r.image, null);
assert.equal(r.reason, "too-large");
assert.equal(r.bytes, 64);
} finally {
if (prev === undefined) delete process.env.PI_VISION_PROXY_MAX_IMAGE_BYTES;
else process.env.PI_VISION_PROXY_MAX_IMAGE_BYTES = prev;
await rm(dir, { recursive: true, force: true });
}
});
it("denies symlink resolving outside allow-list", async () => {
const dir = await mkdtemp(join(os.tmpdir(), "vp-test-"));
const target = "/etc/never-exists-vp-target.png";
const link = join(dir, "link.png");
try {
try {
await symlink(target, link);
} catch {
return; // platform doesn't support symlinks (e.g., Windows w/o admin) → skip
}
const r = await readImageFileWithReason(link);
assert.equal(r.image, null);
assert.equal(r.reason, "denied");
} finally {
await rm(dir, { recursive: true, force: true });
}
});
it("denies symlink to existing file outside allow-list (TOCTOU post-read check)", async () => {
// Simulate the TOCTOU race: the symlink target exists and is readable, so
// readFile() succeeds, but the post-read realpath re-verification must catch that
// the resolved path is outside the allow-list and return "denied".
if (os.platform() === "win32") return;
// Find a readable file outside the allow-list to use as a target.
// /etc/hostname is present on most Unix systems; use it if it exists.
const target = "/etc/hostname";
let targetExists = false;
try { await lstat(target); targetExists = true; } catch { /* skip if absent */ }
if (!targetExists) return;
const dir = await mkdtemp(join(os.tmpdir(), "vp-test-"));
const link = join(dir, "link.png");
try {
try {
await symlink(target, link);
} catch {
return; // no symlink support → skip
}
const r = await readImageFileWithReason(link);
assert.equal(r.image, null);
assert.equal(r.reason, "denied");
} finally {
await rm(dir, { recursive: true, force: true });
}
});
});
describe("readPersistentFile / writePersistentFile", () => {
it("round-trips config through a file", async () => {
const dir = await mkdtemp(join(os.tmpdir(), "vp-test-"));
try {
const cfg: Partial = { mode: "always", provider: "openai", modelId: "gpt-4o" };
await writePersistentFile(cfg, dir);
const read = await readPersistentFile(dir);
assert.equal(read.mode, "always");
assert.equal(read.provider, "openai");
assert.equal(read.modelId, "gpt-4o");
} finally {
await rm(dir, { recursive: true, force: true });
}
});
it("returns empty when file does not exist", async () => {
const dir = await mkdtemp(join(os.tmpdir(), "vp-test-"));
try {
const read = await readPersistentFile(dir);
assert.deepEqual(read, {});
} finally {
await rm(dir, { recursive: true, force: true });
}
});
it("returns empty for invalid JSON", async () => {
const dir = await mkdtemp(join(os.tmpdir(), "vp-test-"));
try {
await writeFile(join(dir, "vision-proxy.json"), "not json");
const read = await readPersistentFile(dir);
assert.deepEqual(read, {});
} finally {
await rm(dir, { recursive: true, force: true });
}
});
});
describe("resolveConfig with fileConfig", () => {
it("layers fileConfig between defaults and session entries", () => {
const entries: Entry[] = [];
const fileConfig: Partial = { mode: "always", provider: "openai", modelId: "gpt-4o" };
const cfg = resolveConfig(entries, {}, fileConfig);
assert.equal(cfg.mode, "always");
assert.equal(cfg.provider, "openai");
assert.equal(cfg.modelId, "gpt-4o");
});
it("session entries override fileConfig", () => {
const entries: Entry[] = [customEntry(CUSTOM_TYPE_CONFIG, { mode: "off" })];
const fileConfig: Partial = { mode: "always" };
const cfg = resolveConfig(entries, {}, fileConfig);
assert.equal(cfg.mode, "off");
});
it("env overrides both file and session entries", () => {
const entries: Entry[] = [customEntry(CUSTOM_TYPE_CONFIG, { mode: "off" })];
const fileConfig: Partial = { mode: "always" };
const cfg = resolveConfig(entries, { PI_VISION_PROXY_MODE: "fallback" }, fileConfig);
assert.equal(cfg.mode, "fallback");
});
it("defaults fill in missing fileConfig fields", () => {
const fileConfig: Partial = { mode: "off" };
const cfg = resolveConfig([], {}, fileConfig);
assert.equal(cfg.mode, "off");
assert.equal(cfg.provider, DEFAULT_CONFIG.provider);
assert.equal(cfg.modelId, DEFAULT_CONFIG.modelId);
assert.equal(cfg.systemPrompt, DEFAULT_CONFIG.systemPrompt);
assert.equal(cfg.includeContext, DEFAULT_CONFIG.includeContext);
});
});
describe("fuzzyMatches", () => {
it("matches when all query chars appear in order", () => {
assert.equal(fuzzyMatches("Claude Sonnet 4.5", "cs4"), true);
assert.equal(fuzzyMatches("Claude Opus 4.6", "op46"), true);
assert.equal(fuzzyMatches("GPT-5.4 Pro", "g54"), true);
});
it("is case-insensitive", () => {
assert.equal(fuzzyMatches("Claude Sonnet", "CLAUDE"), true);
assert.equal(fuzzyMatches("gpt-4o", "GPT4O"), true);
});
it("rejects when chars are out of order or missing", () => {
assert.equal(fuzzyMatches("Claude Sonnet 4.5", "4cs"), false);
assert.equal(fuzzyMatches("GPT-5", "xyz"), false);
assert.equal(fuzzyMatches("Gemini", "gpt"), false);
});
it("matches empty query against anything", () => {
assert.equal(fuzzyMatches("anything", ""), true);
});
it("matches exact string", () => {
assert.equal(fuzzyMatches("Claude Sonnet 4.5", "Claude Sonnet 4.5"), true);
});
it("matches partial name", () => {
assert.equal(fuzzyMatches("Claude Opus 4.6 (EU)", "opus eu"), true);
assert.equal(fuzzyMatches("Nova Premier", "nova"), true);
});
});
// ── 1.4.0 tests ──────────────────────────────────────────────────────────
describe("isValidNamedRegion", () => {
it("accepts valid region names", () => {
for (const r of ["top-left", "bottom-right", "center", "top-half", "right"]) {
assert.equal(isValidNamedRegion(r), true, r);
}
});
it("rejects invalid names", () => {
assert.equal(isValidNamedRegion("middle"), false);
assert.equal(isValidNamedRegion(""), false);
assert.equal(isValidNamedRegion("TOP-LEFT"), false); // case-sensitive
});
});
describe("resolveRegion", () => {
it("returns normalized rectangle for each region", () => {
const tl = resolveRegion("top-left");
assert.deepEqual(tl, { x: 0, y: 0, width: 0.5, height: 0.5 });
const br = resolveRegion("bottom-right");
assert.deepEqual(br, { x: 0.5, y: 0.5, width: 0.5, height: 0.5 });
const center = resolveRegion("center");
assert.deepEqual(center, { x: 0.25, y: 0.25, width: 0.5, height: 0.5 });
});
it("top-half aliases top", () => {
assert.deepEqual(resolveRegion("top-half"), resolveRegion("top"));
});
});
describe("normalizedToPixels", () => {
it("converts normalized coordinates to pixels", () => {
const result = normalizedToPixels({ x: 0.5, y: 0.5, width: 0.5, height: 0.5 }, 1000, 1000);
assert.ok(result);
assert.equal(result!.x, 500);
assert.equal(result!.y, 500);
assert.equal(result!.width, 500);
assert.equal(result!.height, 500);
});
it("clamps to image bounds", () => {
// x=-0.5 clamped to 0, x+width=(-0.5+0.3)*100=-20 clamped to 0 → zero area → null
const result = normalizedToPixels({ x: -0.5, y: 0.9, width: 0.3, height: 0.3 }, 100, 100);
assert.equal(result, null, "negative x with small width should be null after clamp");
// A valid clamped case
const result2 = normalizedToPixels({ x: -0.1, y: 0.5, width: 0.8, height: 0.6 }, 100, 100);
assert.ok(result2);
assert.equal(result2!.x, 0);
assert.equal(result2!.y, 50);
});
it("returns null for zero-area crop", () => {
// Edge case: both x and x+width clamp to same value
const result = normalizedToPixels({ x: 1.0, y: 0, width: 0, height: 0.5 }, 100, 100);
assert.equal(result, null);
});
});
describe("clampPixels", () => {
it("clamps pixel coordinates to image bounds", () => {
const result = clampPixels({ x: -10, y: 50, width: 200, height: 100 }, 100, 200);
assert.ok(result);
assert.equal(result!.x, 0);
assert.equal(result!.y, 50);
assert.equal(result!.width, 100);
assert.equal(result!.height, 100);
});
it("returns null for zero-area after clamping", () => {
const result = clampPixels({ x: 200, y: 200, width: 10, height: 10 }, 100, 100);
assert.equal(result, null);
});
it("handles valid crop within bounds", () => {
const result = clampPixels({ x: 10, y: 20, width: 30, height: 40 }, 100, 100);
assert.ok(result);
assert.deepEqual(result, { x: 10, y: 20, width: 30, height: 40 });
});
});
describe("resolveCropEntry", () => {
it("resolves region crop", () => {
const result = resolveCropEntry({ image_index: 0, region: "top-left" }, 1000, 1000);
assert.equal(result.x, 0);
assert.equal(result.y, 0);
assert.equal(result.width, 500);
assert.equal(result.height, 500);
});
it("resolves normalized crop", () => {
const result = resolveCropEntry(
{ image_index: 0, normalized: { x: 0.25, y: 0.25, width: 0.5, height: 0.5 } },
1000, 1000,
);
assert.equal(result.x, 250);
assert.equal(result.y, 250);
assert.equal(result.width, 500);
assert.equal(result.height, 500);
});
it("resolves pixel crop", () => {
const result = resolveCropEntry(
{ image_index: 0, pixels: { x: 100, y: 200, width: 300, height: 400 } },
1000, 1000,
);
assert.deepEqual(result, { x: 100, y: 200, width: 300, height: 400 });
});
it("clamps pixel crop to image bounds", () => {
const result = resolveCropEntry(
{ image_index: 0, pixels: { x: 900, y: 900, width: 200, height: 200 } },
1000, 1000,
);
assert.equal(result.width, 100);
assert.equal(result.height, 100);
});
it("throws for zero-area normalized crop", () => {
assert.throws(
() => resolveCropEntry({ image_index: 0, normalized: { x: 1.0, y: 1.0, width: 0, height: 0 } }, 100, 100),
/zero area/,
);
});
it("throws for zero-area pixel crop", () => {
assert.throws(
() => resolveCropEntry({ image_index: 0, pixels: { x: 200, y: 200, width: 10, height: 10 } }, 100, 100),
/zero area/,
);
});
});
describe("cropSignature", () => {
it("formats x,y,width,height", () => {
assert.equal(cropSignature({ x: 10, y: 20, width: 30, height: 40 }), "10,20,30,40");
});
});
describe("LRUCache", () => {
it("stores and retrieves values", () => {
const cache = new LRUCache(3);
cache.set("a", 1);
assert.equal(cache.get("a"), 1);
});
it("evicts oldest when over capacity", () => {
const cache = new LRUCache(2);
cache.set("a", 1);
cache.set("b", 2);
cache.set("c", 3); // evicts "a"
assert.equal(cache.get("a"), undefined);
assert.equal(cache.get("b"), 2);
assert.equal(cache.get("c"), 3);
});
it("renews entry on get", () => {
const cache = new LRUCache(2);
cache.set("a", 1);
cache.set("b", 2);
cache.get("a"); // "a" is now most recent
cache.set("c", 3); // evicts "b" instead of "a"
assert.equal(cache.get("a"), 1);
assert.equal(cache.get("b"), undefined);
});
it("reports size", () => {
const cache = new LRUCache(10);
assert.equal(cache.size, 0);
cache.set("x", 1);
assert.equal(cache.size, 1);
});
it("clear removes all entries", () => {
const cache = new LRUCache(10);
cache.set("a", 1);
cache.clear();
assert.equal(cache.size, 0);
assert.equal(cache.get("a"), undefined);
});
it("resize shrinks the cache and evicts excess", () => {
const cache = new LRUCache(5);
for (let i = 0; i < 5; i++) cache.set(`k${i}`, i);
assert.equal(cache.size, 5);
cache.resize(2);
assert.equal(cache.size, 2);
assert.equal(cache.maxSize, 2);
// Oldest entries should be evicted
assert.equal(cache.get("k0"), undefined);
assert.equal(cache.get("k1"), undefined);
assert.equal(cache.get("k2"), undefined);
// Newest should survive
assert.equal(cache.get("k3"), 3);
assert.equal(cache.get("k4"), 4);
});
it("resize to larger does not lose entries", () => {
const cache = new LRUCache(3);
cache.set("a", 1);
cache.set("b", 2);
cache.resize(10);
assert.equal(cache.size, 2);
assert.equal(cache.get("a"), 1);
assert.equal(cache.get("b"), 2);
});
});
describe("extractDimensions", () => {
it("extracts dimensions from a PNG buffer", () => {
// TINY_PNG is 1×1
const dims = extractDimensions(TINY_PNG);
assert.ok(dims, "should return dimensions for valid PNG");
assert.equal(dims!.width, 1);
assert.equal(dims!.height, 1);
});
it("returns undefined for invalid data", () => {
const dims = extractDimensions(Buffer.from("not an image"));
assert.equal(dims, undefined);
});
});
describe("buildVideoDescriptionFence", () => {
it("builds video fence with file, hash, and mime attributes", () => {
const fence = buildVideoDescriptionFence("abc123", "clip.mp4", "video/mp4", "Hello world");
assert.ok(fence.includes('file="clip.mp4"'));
assert.ok(fence.includes('hash="abc123"'));
assert.ok(fence.includes('mime="video/mp4"'));
assert.ok(fence.includes("Hello world"));
});
});
describe("buildVideoEmptyResponseError", () => {
it("returns actionable guidance for providers that accept video but return no text", () => {
const message = buildVideoEmptyResponseError("xai", "grok-4.3");
assert.ok(message.includes("empty response from xai/grok-4.3"));
assert.ok(message.includes("provider accepted the request but returned no text"));
assert.ok(message.includes("native xAI STT or Files/Responses path"));
assert.ok(message.includes("shorter clip"));
assert.ok(message.includes("smaller/transcoded video"));
assert.ok(message.includes("Gemini"));
});
});
describe("xAI native media helpers", () => {
it("detects transcription requests", () => {
assert.equal(isTranscriptionRequest("Transcribe this video with timestamps"), true);
assert.equal(isTranscriptionRequest("make SRT captions"), true);
assert.equal(isTranscriptionRequest("summarize visual scenes"), false);
});
it("detects xAI provider aliases", () => {
assert.equal(isXaiProvider("xai"), true);
assert.equal(isXaiProvider("x-ai"), true);
assert.equal(isXaiProvider("google"), false);
});
it("formats xAI STT words into timestamped transcript", () => {
const formatted = formatXaiSttTranscript({
text: "Hello world.",
language: "English",
duration: 1.5,
words: [
{ text: "Hello", start: 0.1, end: 0.5 },
{ text: "world.", start: 0.6, end: 1.2 },
],
}, "clip.mp4");
assert.ok(formatted.includes("xAI Speech-to-Text transcription for clip.mp4"));
assert.ok(formatted.includes("Detected language: English"));
assert.ok(formatted.includes("[00:00"));
assert.ok(formatted.includes("Hello world."));
});
it("extracts output text from xAI Responses objects", () => {
assert.equal(extractXaiResponsesText({ output_text: "direct" }), "direct");
assert.equal(extractXaiResponsesText({ output: [{ type: "message", content: [{ type: "output_text", text: "nested" }] }] }), "nested");
});
});
describe("buildVideoProxySection", () => {
it("instructs downstream agents to use injected video analysis instead of local transcription tools", () => {
const section = buildVideoProxySection(
1,
"x-ai",
"grok-4.3",
buildVideoDescriptionFence("abc123", "clip.mp4", "video/mp4", "[00:00] hello"),
);
assert.ok(section.includes("already analyzed the media"));
assert.ok(section.includes("answer from this injected context"));
assert.ok(section.includes("Do not run local media-processing or transcription tools"));
assert.ok(section.includes("ffmpeg"));
assert.ok(section.includes("Whisper"));
assert.ok(section.includes("Python"));
assert.ok(section.includes("Only use external/local tools"));
assert.ok(section.includes(" {
it("builds fence with metadata attributes", () => {
const fence = buildDescriptionFence("abc123", "A screenshot", { width: 1920, height: 1080, filename: "screen.png" });
assert.ok(fence.startsWith(""));
});
it("includes crop_origin when cropped", () => {
const fence = buildDescriptionFence("abc123", "Detail", { width: 3840, height: 2160 }, { x: 1840, y: 120, width: 840, height: 360 });
assert.ok(fence.includes('#crop:1840,120,840,360'));
assert.ok(fence.includes('crop_origin="1840,120"'));
assert.ok(fence.includes('width="840"'));
assert.ok(fence.includes('height="360"'));
});
});
describe("buildAnalysisFence", () => {
it("builds fence with grounding_format", () => {
const fence = buildAnalysisFence("abc", "Analysis", { width: 100, height: 100 }, undefined, "qwen_pixels");
assert.ok(fence.includes('grounding_format="qwen_pixels"'));
});
it("omits grounding_format when undefined", () => {
const fence = buildAnalysisFence("abc", "Analysis", { width: 100, height: 100 });
assert.ok(!fence.includes("grounding_format"));
});
});
describe("fenceUntrusted (all three tags)", () => {
it("neutralizes vision_proxy_analysis tags", () => {
const out = fenceUntrusted('content');
assert.ok(!out.includes(""));
assert.ok(!out.includes(""));
});
it("neutralizes vision_proxy_joint_description tags", () => {
const out = fenceUntrusted('content');
assert.ok(!out.includes(""));
});
it("neutralizes vision_proxy_description tags (unchanged)", () => {
const out = fenceUntrusted('content');
assert.ok(!out.includes(""));
});
it("neutralizes both < and > in tags", () => {
const out = fenceUntrusted('test');
// Neither raw < nor raw > should appear in the tag parts
const tagMatch = out.match(/vision_proxy_description/g);
assert.ok(tagMatch);
// The opening bracket of each tag should be neutralized
assert.ok(!out.includes(" {
const out = fenceUntrusted('');
assert.ok(!out.includes(""), "closing tag with space should be neutralized");
});
it("neutralizes tags with attributes", () => {
const out = fenceUntrusted('');
assert.ok(!out.includes(" {
it("escapes double quotes", () => {
assert.equal(escapeAttr('file"name.png'), "file"name.png");
});
it("escapes angle brackets", () => {
assert.equal(escapeAttr("ac"), "a<b>c");
});
it("escapes ampersands", () => {
assert.equal(escapeAttr("a&b"), "a&b");
});
it("leaves safe characters intact", () => {
assert.equal(escapeAttr("photo.png"), "photo.png");
});
it("handles empty string", () => {
assert.equal(escapeAttr(""), "");
});
});
describe("getGroundingFormat", () => {
it("returns format for known model", () => {
const fmt = getGroundingFormat(DEFAULT_CONFIG, "Qwen", "Qwen2.5-VL-7B-Instruct");
assert.equal(fmt, "qwen_pixels");
});
it("returns 'none' for unknown model", () => {
const fmt = getGroundingFormat(DEFAULT_CONFIG, "anthropic", "claude-sonnet-4-5");
assert.equal(fmt, "none");
});
});
describe("readEnvOverrides (1.4.0 fields)", () => {
it("reads PI_VISION_PROXY_TOOL", () => {
assert.equal(readEnvOverrides({ PI_VISION_PROXY_TOOL: "on" }).tool, "on");
assert.equal(readEnvOverrides({ PI_VISION_PROXY_TOOL: "off" }).tool, "off");
assert.equal(readEnvOverrides({ PI_VISION_PROXY_TOOL: "bogus" }).tool, undefined);
});
it("reads PI_VISION_PROXY_MAX_IMAGES_PER_CALL", () => {
assert.equal(readEnvOverrides({ PI_VISION_PROXY_MAX_IMAGES_PER_CALL: "5" }).maxImagesPerCall, 5);
assert.equal(readEnvOverrides({ PI_VISION_PROXY_MAX_IMAGES_PER_CALL: "0" }).maxImagesPerCall, undefined);
assert.equal(readEnvOverrides({ PI_VISION_PROXY_MAX_IMAGES_PER_CALL: "21" }).maxImagesPerCall, undefined);
});
it("reads PI_VISION_PROXY_MAX_BATCH", () => {
assert.equal(readEnvOverrides({ PI_VISION_PROXY_MAX_BATCH: "3" }).maxBatch, 3);
assert.equal(readEnvOverrides({ PI_VISION_PROXY_MAX_BATCH: "0" }).maxBatch, undefined);
});
it("reads PI_VISION_PROXY_CACHE_SIZE", () => {
assert.equal(readEnvOverrides({ PI_VISION_PROXY_CACHE_SIZE: "100" }).cacheSize, 100);
assert.equal(readEnvOverrides({ PI_VISION_PROXY_CACHE_SIZE: "501" }).cacheSize, undefined);
});
it("reads PI_VISION_PROXY_PHASH_THRESHOLD", () => {
assert.equal(readEnvOverrides({ PI_VISION_PROXY_PHASH_THRESHOLD: "0.9" }).pHashSimilarityThreshold, 0.9);
assert.equal(readEnvOverrides({ PI_VISION_PROXY_PHASH_THRESHOLD: "1.5" }).pHashSimilarityThreshold, undefined);
});
});
describe("sanitize (1.4.0 fields)", () => {
it("defaults new fields when missing", () => {
const result = sanitize({
mode: "fallback",
provider: "anthropic",
modelId: "claude-sonnet-4-5",
systemPrompt: "test",
includeContext: true,
} as VisionConfig);
assert.equal(result.tool, "on");
assert.equal(result.maxImagesPerCall, 10);
assert.equal(result.maxBatch, 4);
assert.equal(result.cacheSize, 50);
assert.equal(result.pHashSimilarityThreshold, 0.8);
assert.ok(result.groundingModels);
});
it("validates maxImagesPerCall range", () => {
const bad = sanitize({ ...DEFAULT_CONFIG, maxImagesPerCall: 0 });
assert.equal(bad.maxImagesPerCall, 10); // reset to default
const good = sanitize({ ...DEFAULT_CONFIG, maxImagesPerCall: 15 });
assert.equal(good.maxImagesPerCall, 15);
});
});
describe("readImageFileWithReason (basename)", () => {
it("returns filename (basename)", async () => {
const dir = await mkdtemp(join(os.tmpdir(), "vp-test-"));
const file = join(dir, "test-image.png");
await writeFile(file, TINY_PNG);
try {
const r = await readImageFileWithReason(file);
assert.equal(r.filename, "test-image.png");
} finally {
await rm(dir, { recursive: true, force: true });
}
});
});
// ── Create a 10×10 solid-colour PNG for crop tests ────────────────────────
async function create10x10Png(): Promise {
const { Image } = await import("imagescript");
const img = new Image(10, 10);
// Fill with a solid red-ish colour so we have real pixels
for (let y = 0; y < 10; y++) {
for (let x = 0; x < 10; x++) {
img.setPixelAt(x + 1, y + 1, 0xff0000ff); // RGBA red, fully opaque
}
}
const encoded = await img.encode(1);
return Buffer.from(encoded);
}
describe("cropImage (ImageScript)", () => {
after(async () => {
await shutdownCropWorkers();
});
it("crops a 10×10 PNG to a 5×5 region", async () => {
const png = await create10x10Png();
const crop = { x: 2, y: 3, width: 5, height: 5 };
const result = await cropImage(png, crop, "image/png");
assert.ok(result, "crop should succeed");
assert.ok(result.length > 0, "result should have bytes");
// Verify the cropped image has correct dimensions
const dims = extractDimensions(result);
assert.ok(dims, "should extract dimensions from cropped image");
assert.equal(dims.width, 5);
assert.equal(dims.height, 5);
});
it("returns null for out-of-bounds crop", async () => {
const png = await create10x10Png();
const crop = { x: 8, y: 8, width: 10, height: 10 };
const result = await cropImage(png, crop, "image/png");
// ImageScript may clamp or fail — either way it shouldn't throw
// If it returns something, it should be valid
if (result) {
const dims = extractDimensions(result);
assert.ok(dims, "cropped result should be valid");
}
});
it("encodes as JPEG when mimeType is image/jpeg", async () => {
const png = await create10x10Png();
const crop = { x: 0, y: 0, width: 10, height: 10 };
const result = await cropImage(png, crop, "image/jpeg");
assert.ok(result, "crop should succeed");
// JPEG should start with FF D8
assert.equal(result[0], 0xff);
assert.equal(result[1], 0xd8);
});
it("succeeds within a generous decode timeout (env override is honoured)", async () => {
const prev = process.env.PI_VISION_PROXY_DECODE_TIMEOUT_MS;
process.env.PI_VISION_PROXY_DECODE_TIMEOUT_MS = "10000";
try {
const png = await create10x10Png();
const result = await cropImage(png, { x: 0, y: 0, width: 10, height: 10 }, "image/png");
assert.ok(result, "crop should succeed with a generous timeout");
} finally {
if (prev === undefined) delete process.env.PI_VISION_PROXY_DECODE_TIMEOUT_MS;
else process.env.PI_VISION_PROXY_DECODE_TIMEOUT_MS = prev;
}
});
it("returns null (no throw) for undecodable garbage bytes", async () => {
const garbage = Buffer.from([0x89, 0x50, 0x4e, 0x47, 0x00, 0x01, 0x02, 0x03, 0x04, 0x05]);
const result = await cropImage(garbage, { x: 0, y: 0, width: 5, height: 5 }, "image/png");
assert.equal(result, null);
});
it("falls back to the in-thread path when the worker is disabled", async () => {
const prev = process.env.PI_VISION_PROXY_DECODE_WORKER;
process.env.PI_VISION_PROXY_DECODE_WORKER = "0";
try {
const png = await create10x10Png();
const result = await cropImage(png, { x: 2, y: 3, width: 5, height: 5 }, "image/png");
assert.ok(result, "in-thread fallback should still crop");
const dims = extractDimensions(result);
assert.ok(dims && dims.width === 5 && dims.height === 5, "fallback crop should have correct dims");
} finally {
if (prev === undefined) delete process.env.PI_VISION_PROXY_DECODE_WORKER;
else process.env.PI_VISION_PROXY_DECODE_WORKER = prev;
}
});
it("handles several sequential crops (worker pool reuse)", async () => {
const png = await create10x10Png();
for (let i = 0; i < 5; i++) {
const result = await cropImage(png, { x: 0, y: 0, width: 5, height: 5 }, "image/png");
assert.ok(result, `crop ${i} should succeed`);
const dims = extractDimensions(result);
assert.ok(dims && dims.width === 5 && dims.height === 5, `crop ${i} dims`);
}
});
it("works with pooling disabled (spawn-per-call)", async () => {
const prev = process.env.PI_VISION_PROXY_DECODE_WORKER_POOL;
process.env.PI_VISION_PROXY_DECODE_WORKER_POOL = "0";
try {
const png = await create10x10Png();
const a = await cropImage(png, { x: 0, y: 0, width: 5, height: 5 }, "image/png");
const b = await cropImage(png, { x: 1, y: 1, width: 4, height: 4 }, "image/png");
assert.ok(a && b, "both crops should succeed without pooling");
} finally {
if (prev === undefined) delete process.env.PI_VISION_PROXY_DECODE_WORKER_POOL;
else process.env.PI_VISION_PROXY_DECODE_WORKER_POOL = prev;
}
});
it("hard-terminates the worker on timeout (returns null, does not hang)", async () => {
// Force pool=0 so a *fresh* worker is spawned (never a warmed pooled one).
// Spinning up a thread + loading ImageScript + instantiating the WASM codec
// is reliably far slower than the 1ms timeout regardless of machine speed,
// so the main-thread timer fires and terminate()s the worker — proving the
// timeout is a hard limit, without depending on wall-clock scheduling luck.
const prevWorker = process.env.PI_VISION_PROXY_DECODE_WORKER;
const prevPool = process.env.PI_VISION_PROXY_DECODE_WORKER_POOL;
const prevTimeout = process.env.PI_VISION_PROXY_DECODE_TIMEOUT_MS;
process.env.PI_VISION_PROXY_DECODE_WORKER = "1";
process.env.PI_VISION_PROXY_DECODE_WORKER_POOL = "0";
process.env.PI_VISION_PROXY_DECODE_TIMEOUT_MS = "1";
try {
const png = await create10x10Png();
const started = Date.now();
const result = await cropImage(png, { x: 0, y: 0, width: 10, height: 10 }, "image/png");
assert.equal(result, null, "timed-out crop should return null");
assert.ok(Date.now() - started < 5000, "should return promptly, not hang");
} finally {
if (prevWorker === undefined) delete process.env.PI_VISION_PROXY_DECODE_WORKER;
else process.env.PI_VISION_PROXY_DECODE_WORKER = prevWorker;
if (prevPool === undefined) delete process.env.PI_VISION_PROXY_DECODE_WORKER_POOL;
else process.env.PI_VISION_PROXY_DECODE_WORKER_POOL = prevPool;
if (prevTimeout === undefined) delete process.env.PI_VISION_PROXY_DECODE_TIMEOUT_MS;
else process.env.PI_VISION_PROXY_DECODE_TIMEOUT_MS = prevTimeout;
}
});
});
describe("piAiImageToBuffer / bufferToPiAiImage", () => {
it("round-trips base64 data", () => {
const original = Buffer.from("hello world");
const piAiImg = bufferToPiAiImage(original, "image/png");
assert.equal(piAiImg.type, "image");
assert.equal(piAiImg.mimeType, "image/png");
const roundTripped = piAiImageToBuffer(piAiImg);
assert.deepEqual(roundTripped, original);
});
it("defaults to image/png mimeType", () => {
const piAiImg = bufferToPiAiImage(Buffer.alloc(0));
assert.equal(piAiImg.mimeType, "image/png");
});
});
describe("computePHash", () => {
it("returns a hex hash string for a valid image", async () => {
const png = await create10x10Png();
const hash = await computePHash(png);
// imghash may or may not be available; if it is, we get a hex string
if (hash !== null) {
assert.ok(/^[0-9a-f]+$/i.test(hash), `hash should be hex: ${hash}`);
}
});
});
describe("hammingDistance", () => {
it("returns 0 for identical hashes", () => {
assert.equal(hammingDistance("0000", "0000"), 0);
});
it("returns correct distance for differing hashes", () => {
// 0 = 0000, f = 1111 → 4 bits differ per hex char
assert.equal(hammingDistance("0", "f"), 4);
// 0 = 0000, 1 = 0001 → 1 bit differs
assert.equal(hammingDistance("0", "1"), 1);
});
it("returns Infinity for null inputs", () => {
assert.equal(hammingDistance(null, "abc"), Infinity);
assert.equal(hammingDistance("abc", null), Infinity);
assert.equal(hammingDistance(null, null), Infinity);
});
it("handles unequal length hashes", () => {
// Compare only up to shorter length
const dist = hammingDistance("00", "ff00");
assert.equal(dist, 8); // only first 2 hex chars compared
});
});
import { parseDescribeArgs } from "../internal.ts";
describe("parseDescribeArgs (describe)", () => {
it("parses basic describe with single image", () => {
const result = parseDescribeArgs("/path/to/image.png");
assert.ok(typeof result !== "string", result as string);
if (typeof result !== "string") {
assert.deepEqual(result.images, ["/path/to/image.png"]);
assert.equal(result.save, false);
assert.equal(result.question, undefined);
assert.equal(result.model, undefined);
assert.equal(result.crops, undefined);
}
});
it("parses multiple images with --question", () => {
const result = parseDescribeArgs('img1.png img2.png --question "What is different?"');
assert.ok(typeof result !== "string", result as string);
if (typeof result !== "string") {
assert.deepEqual(result.images, ["img1.png", "img2.png"]);
assert.equal(result.question, "What is different?");
}
});
it("parses --crop with region form", () => {
const result = parseDescribeArgs("image.png --crop 0:r=top-right");
assert.ok(typeof result !== "string", result as string);
if (typeof result !== "string") {
assert.deepEqual(result.crops, [{ image_index: 0, region: "top-right" }]);
}
});
it("parses --crop with normalized form", () => {
const result = parseDescribeArgs("image.png --crop 0:n=0.1,0.2,0.5,0.6");
assert.ok(typeof result !== "string", result as string);
if (typeof result !== "string") {
assert.deepEqual(result.crops, [{ image_index: 0, normalized: { x: 0.1, y: 0.2, width: 0.5, height: 0.6 } }]);
}
});
it("parses --crop with pixel form", () => {
const result = parseDescribeArgs("image.png --crop 0:p=100,200,300,400");
assert.ok(typeof result !== "string", result as string);
if (typeof result !== "string") {
assert.deepEqual(result.crops, [{ image_index: 0, pixels: { x: 100, y: 200, width: 300, height: 400 } }]);
}
});
it("parses --save flag", () => {
const result = parseDescribeArgs("image.png --save");
assert.ok(typeof result !== "string", result as string);
if (typeof result !== "string") {
assert.equal(result.save, true);
}
});
it("parses --model override", () => {
const result = parseDescribeArgs("image.png --model Qwen/Qwen2.5-VL-7B");
assert.ok(typeof result !== "string", result as string);
if (typeof result !== "string") {
assert.equal(result.model, "Qwen/Qwen2.5-VL-7B");
}
});
it("parses full combined command", () => {
const result = parseDescribeArgs('a.png b.png --question "Compare them" --crop 0:r=center --crop 1:n=0,0,0.5,0.5 --model Qwen/Qwen2.5-VL-7B --save');
assert.ok(typeof result !== "string", result as string);
if (typeof result !== "string") {
assert.deepEqual(result.images, ["a.png", "b.png"]);
assert.equal(result.question, "Compare them");
assert.equal(result.save, true);
assert.equal(result.model, "Qwen/Qwen2.5-VL-7B");
assert.equal(result.crops!.length, 2);
assert.equal(result.crops![0].image_index, 0);
assert.equal(result.crops![1].image_index, 1);
}
});
it("returns error for empty input", () => {
const result = parseDescribeArgs("");
assert.equal(typeof result, "string");
assert.ok((result as string).includes("Usage"));
});
it("returns error for unknown region", () => {
const result = parseDescribeArgs("image.png --crop 0:r=invalid");
assert.equal(typeof result, "string");
assert.ok((result as string).includes("unknown region"));
});
it("returns error for bad crop form", () => {
const result = parseDescribeArgs("image.png --crop 0:bad=form");
assert.equal(typeof result, "string");
assert.ok((result as string).includes("unknown crop form"));
});
it("returns error for missing --question value", () => {
const result = parseDescribeArgs("image.png --question");
assert.equal(typeof result, "string");
assert.ok((result as string).includes("--question requires"));
});
it("returns error for unknown flag", () => {
const result = parseDescribeArgs("image.png --bogus");
assert.equal(typeof result, "string");
assert.ok((result as string).includes("unknown flag"));
});
});
describe("parseDescribeArgs (redescribe)", () => {
it("parses redescribe with single image", () => {
const result = parseDescribeArgs("image.png", true);
assert.ok(typeof result !== "string", result as string);
if (typeof result !== "string") {
assert.deepEqual(result.images, ["image.png"]);
assert.equal(result.save, true); // implied
}
});
it("returns error for redescribe with --question", () => {
const result = parseDescribeArgs('image.png --question "test"', true);
assert.equal(typeof result, "string");
assert.ok((result as string).includes("--question is not valid"));
});
it("returns error for redescribe with --crop", () => {
const result = parseDescribeArgs("image.png --crop 0:r=center", true);
assert.equal(typeof result, "string");
assert.ok((result as string).includes("--crop is not valid"));
});
it("returns error for redescribe with --save", () => {
const result = parseDescribeArgs("image.png --save", true);
assert.equal(typeof result, "string");
assert.ok((result as string).includes("--save is implied"));
});
it("allows --model in redescribe", () => {
const result = parseDescribeArgs("image.png --model Qwen/Qwen2.5-VL-7B", true);
assert.ok(typeof result !== "string", result as string);
if (typeof result !== "string") {
assert.equal(result.model, "Qwen/Qwen2.5-VL-7B");
assert.equal(result.save, true);
}
});
});
import {
buildJointDescriptionFence,
buildAdaptiveJointPrompt,
extractVersion,
generateFilenameHints,
} from "../internal.ts";
describe("buildJointDescriptionFence", () => {
it("builds joint fence with dimensions JSON", () => {
const metas = [
{ hash: "aaa", meta: { width: 1920, height: 1080, filename: "before.png" } },
{ hash: "bbb", meta: { width: 1920, height: 1080, filename: "after.png" } },
];
const fence = buildJointDescriptionFence(metas, "Images differ in sidebar.");
assert.ok(fence.startsWith(""));
});
it("includes grounding_format when provided", () => {
const metas = [{ hash: "abc", meta: { width: 100, height: 100 } }];
const fence = buildJointDescriptionFence(metas, "desc", "qwen_pixels");
assert.ok(fence.includes('grounding_format="qwen_pixels"'));
});
it("omits grounding_format when 'none'", () => {
const metas = [{ hash: "abc", meta: { width: 100, height: 100 } }];
const fence = buildJointDescriptionFence(metas, "desc", "none");
assert.ok(!fence.includes("grounding_format"));
});
it("handles missing meta gracefully", () => {
const metas = [{ hash: "abc" }];
const fence = buildJointDescriptionFence(metas, "desc");
assert.ok(fence.includes('"image":"abc"'));
assert.ok(!fence.includes("width"));
});
});
describe("buildAdaptiveJointPrompt", () => {
it("includes image labels and comparison instructions", () => {
const metas = [
{ hash: "a", meta: { width: 800, height: 600, filename: "img1.png" } },
{ hash: "b", meta: { width: 1024, height: 768, filename: "img2.png" } },
];
const prompt = buildAdaptiveJointPrompt(metas, "What changed?");
assert.ok(prompt.includes("2 images"));
assert.ok(prompt.includes("800x600"));
assert.ok(prompt.includes("1024x768"));
assert.ok(prompt.includes("img1.png"));
assert.ok(prompt.includes("What changed?"));
assert.ok(prompt.includes("comparison"));
});
it("includes hints when provided", () => {
const metas = [{ hash: "a" }, { hash: "b" }];
const prompt = buildAdaptiveJointPrompt(metas, "describe", ["before/after pair"]);
assert.ok(prompt.includes("before/after pair"));
assert.ok(prompt.includes("Structural hints"));
});
it("omits hint block when no hints", () => {
const metas = [{ hash: "a" }, { hash: "b" }];
const prompt = buildAdaptiveJointPrompt(metas, "describe");
assert.ok(!prompt.includes("Structural hints"));
});
});
describe("extractVersion", () => {
it("extracts v-prefixed version", () => {
const r = extractVersion("mockup_v2.png");
assert.deepEqual(r, { prefix: "mockup_v", version: 2 });
});
it("extracts decimal version", () => {
const r = extractVersion("draft_v1.2.png");
assert.deepEqual(r, { prefix: "draft_v", version: 1.2 });
});
it("extracts non-prefixed version", () => {
const r = extractVersion("app3.png");
assert.deepEqual(r, { prefix: "app", version: 3 });
});
it("returns null for no version", () => {
assert.equal(extractVersion("screenshot.png"), null);
});
it("returns null for version-only filename", () => {
assert.equal(extractVersion("3.png"), null);
});
});
describe("generateFilenameHints", () => {
it("detects before/after pair", () => {
const hints = generateFilenameHints(["before.png", "after.png"]);
assert.ok(hints.includes("before/after pair"));
});
it("detects old/new pair", () => {
const hints = generateFilenameHints(["old.png", "new.png"]);
assert.ok(hints.includes("old/new pair"));
});
it("detects versioned sequence", () => {
const hints = generateFilenameHints(["mockup_v2.png", "mockup_v4.png"]);
assert.ok(hints.some((h) => h.includes("versioned sequence")));
});
it("detects numbered underscore sequence", () => {
const hints = generateFilenameHints(["frame_1.png", "frame_2.png"]);
assert.ok(hints.includes("numbered sequence"));
});
it("detects numbered dash sequence", () => {
const hints = generateFilenameHints(["frame-1.png", "frame-2.png"]);
assert.ok(hints.includes("numbered sequence"));
});
it("detects date-ordered sequence", () => {
const hints = generateFilenameHints(["2026-05-01_mockup.png", "2026-05-03_mockup.png"]);
assert.ok(hints.includes("time-ordered sequence"));
});
it("returns empty for no pattern", () => {
const hints = generateFilenameHints(["cat.png", "dog.png"]);
assert.deepEqual(hints, []);
});
it("returns empty for single image", () => {
assert.deepEqual(generateFilenameHints(["before.png"]), []);
});
});
import {
isGroundingExcluded,
parseGroundingFormat,
VALID_GROUNDING_FORMATS,
} from "../internal.ts";
describe("isGroundingExcluded", () => {
it("excludes claude models", () => {
assert.equal(isGroundingExcluded("anthropic/claude-sonnet-4-5"), true);
});
it("excludes gpt-4o", () => {
assert.equal(isGroundingExcluded("openai/gpt-4o"), true);
});
it("excludes llama vision", () => {
assert.equal(isGroundingExcluded("meta/llama-3.2-11b-vision"), true);
});
it("allows Qwen models", () => {
assert.equal(isGroundingExcluded("Qwen/Qwen2.5-VL-7B-Instruct"), false);
});
it("allows unknown models", () => {
assert.equal(isGroundingExcluded("some/vendor-model"), false);
});
});
describe("parseGroundingFormat", () => {
it("parses valid formats", () => {
assert.equal(parseGroundingFormat("qwen_pixels"), "qwen_pixels");
assert.equal(parseGroundingFormat("molmo_points"), "molmo_points");
assert.equal(parseGroundingFormat("deepseek_bbox"), "deepseek_bbox");
assert.equal(parseGroundingFormat("internvl_pixels"), "internvl_pixels");
assert.equal(parseGroundingFormat("gemini_normalized_1000"), "gemini_normalized_1000");
});
it("returns null for invalid format", () => {
assert.equal(parseGroundingFormat("invalid"), null);
assert.equal(parseGroundingFormat("none"), null);
});
});
describe("VALID_GROUNDING_FORMATS", () => {
it("contains expected formats", () => {
assert.ok(VALID_GROUNDING_FORMATS.includes("qwen_pixels"));
assert.ok(VALID_GROUNDING_FORMATS.includes("molmo_points"));
assert.equal(VALID_GROUNDING_FORMATS.length, 5);
});
});
// ── Security-specific tests ──────────────────────────────────────────────
describe("Security: path traversal rejection", () => {
it("extractCandidateImagePaths may detect paths with .., but before_agent_start rejects them", () => {
// The regex is permissive — it may extract paths with ..
// The .. check in before_agent_start is the defense layer
const paths = extractCandidateImagePaths(
"Check this image: /tmp/../etc/shadow.png",
);
// Key point: the before_agent_start handler skips paths with ..
// This test documents that extractCandidateImagePaths itself does not filter ..
assert.ok(paths.length >= 0, "regex may or may not match — .. filtering is in the handler");
});
it("stripImagePaths escapes regex metacharacters safely", () => {
// A path containing regex metacharacters should not cause errors
const result = stripImagePaths(
"Image at /tmp/test(file).png",
["/tmp/test(file).png"],
);
assert.ok(!result.includes("/tmp/test(file).png"));
assert.ok(result.includes(IMAGE_PATH_PLACEHOLDER));
});
it("stripImagePaths handles path with $ and ^ safely", () => {
const result = stripImagePaths(
"/tmp/$test^.png",
["/tmp/$test^.png"],
);
assert.ok(!result.includes("/tmp/$test^.png"));
});
});
describe("Security: fence injection resistance", () => {
it("nested fence tags are neutralised", () => {
const malicious =
'Normal text' +
'Injected content' +
'';
const fence = buildDescriptionFence("abc", malicious);
// Count actual closing tags — should be exactly 1 (at the end)
const closings = fence.match(/<\/vision_proxy_description>/g);
assert.equal(closings?.length, 1, "should have exactly 1 closing tag");
});
it("analysis fence with mixed injection types", () => {
const malicious =
'x';
const fence = buildAnalysisFence("abc", malicious);
// fenceUntrusted neutralises ALL vision_proxy tags but not arbitrary HTML
assert.ok(!fence.includes("<"), "closing tag should be neutralised");
assert.ok(!fence.includes(""), "description tag should be neutralised");
assert.ok(!fence.includes(""), "joint opening tag should be neutralised");
});
it("fenceUntrusted handles empty string", () => {
assert.equal(fenceUntrusted(""), "");
});
it("fenceUntrusted handles non-ASCII content", () => {
const text = "描述图片中的内容 🖼️ 画像の内容を説明";
const safe = fenceUntrusted(text);
assert.equal(safe, text, "non-ASCII should pass through unchanged");
});
});
describe("Security: consent integrity", () => {
it("consent entry without provider does not satisfy per-provider check", () => {
const entries = [
{ type: "custom", customType: CUSTOM_TYPE_CONSENT, data: { granted: true } },
];
assert.equal(hasConsent(entries, "anthropic"), false);
assert.equal(hasConsent(entries, "openai"), false);
});
it("consent entry with wrong provider does not satisfy check", () => {
const entries = [
{ type: "custom", customType: CUSTOM_TYPE_CONSENT, data: { granted: true, provider: "anthropic" } },
];
assert.equal(hasConsent(entries, "openai"), false);
});
it("consent entry with matching provider satisfies check", () => {
const entries = [
{ type: "custom", customType: CUSTOM_TYPE_CONSENT, data: { granted: true, provider: "anthropic" } },
];
assert.equal(hasConsent(entries, "anthropic"), true);
});
it("most recent consent entry wins", () => {
const entries = [
{ type: "custom", customType: CUSTOM_TYPE_CONSENT, data: { granted: true, provider: "anthropic" } },
{ type: "custom", customType: CUSTOM_TYPE_CONSENT, data: { granted: false } },
];
assert.equal(hasConsent(entries, "anthropic"), false);
});
});
describe("Security: config sanitization", () => {
it("rejects prototype-polluting keys from file config", () => {
const cfg = sanitize({ ...DEFAULT_CONFIG, "__proto__": { admin: true } } as any);
assert.equal(({} as any).admin, undefined);
assert.equal(cfg.mode, "fallback"); // still valid
});
it("rejects invalid provider strings", () => {
const cfg = sanitize({ ...DEFAULT_CONFIG, provider: "../../evil" });
assert.equal(cfg.provider, DEFAULT_CONFIG.provider); // reset to default
});
it("rejects invalid modelId strings", () => {
const cfg = sanitize({ ...DEFAULT_CONFIG, modelId: "model; rm -rf /" });
assert.equal(cfg.modelId, DEFAULT_CONFIG.modelId); // reset to default
});
it("clamps out-of-range numeric values", () => {
const cfg = sanitize({ ...DEFAULT_CONFIG, maxImagesPerCall: 9999, maxBatch: -1, cacheSize: 1e6 });
assert.equal(cfg.maxImagesPerCall, DEFAULT_CONFIG.maxImagesPerCall);
assert.equal(cfg.maxBatch, DEFAULT_CONFIG.maxBatch);
assert.equal(cfg.cacheSize, DEFAULT_CONFIG.cacheSize);
});
});
describe("Security: attribute escaping", () => {
it("escapeAttr handles all XML-special characters", () => {
assert.equal(escapeAttr(''), "<script>alert("xss")</script>");
assert.equal(escapeAttr("a&b"), "a&b");
});
it("escapeAttr handles empty string", () => {
assert.equal(escapeAttr(""), "");
});
it("escapeAttr neutralises null bytes (SEC-6)", () => {
assert.equal(escapeAttr("before\x00after"), "before\uFFFDafter");
assert.equal(escapeAttr("\x00"), "\uFFFD");
// Null bytes in filename attribute context
const fence = buildDescriptionFence("abc", "desc", { width: 1, height: 1, filename: "test\x00evil.png" });
assert.ok(!fence.includes("\x00"), "fence should contain no null bytes");
assert.ok(fence.includes("\uFFFD"), "null byte should be replaced with replacement char");
});
});
describe("Security: telemetry sanitization (SEC-3)", () => {
it("sanitizeForLog strips control characters", () => {
assert.equal(sanitizeForLog("hello\x00world"), "helloworld");
assert.equal(sanitizeForLog("bell\x07ring"), "bellring");
assert.equal(sanitizeForLog("normal text"), "normal text");
// Tab, LF, CR are safe and preserved
assert.equal(sanitizeForLog("tab\there"), "tab\there");
assert.equal(sanitizeForLog("line\nbreak"), "line\nbreak");
});
it("sanitizeForLog enforces length limit", () => {
const long = "a".repeat(500);
assert.equal(sanitizeForLog(long).length, 200);
assert.equal(sanitizeForLog(long, 50).length, 50);
});
it("sanitizeForLog preserves Unicode", () => {
const text = "描述 🖼️ 画像";
assert.equal(sanitizeForLog(text), text);
});
it("sanitizeForLog handles empty string", () => {
assert.equal(sanitizeForLog(""), "");
});
});
describe("Security: persistent config key filtering (SEC-4)", () => {
it("readPersistentFile filters unknown keys", async () => {
const dir = await mkdtemp(join(os.tmpdir(), "vp-cfg-sec-"));
try {
const malicious = JSON.stringify({
mode: "always",
__proto__: { admin: true },
unknownKey: "should be removed",
provider: "anthropic",
});
await writeFile(join(dir, "vision-proxy.json"), malicious);
const result = await readPersistentFile(dir) as any;
assert.equal(result.mode, "always");
assert.equal(result.provider, "anthropic");
assert.equal(result.unknownKey, undefined, "unknown key should be filtered");
// Check own properties only — constructor is inherited from Object.prototype
assert.ok(!Object.keys(result).includes("constructor"), "constructor should not be an own property");
assert.ok(!Object.keys(result).includes("__proto__"), "__proto__ should not be an own property");
// Verify prototype is not polluted
assert.equal(({} as any).admin, undefined);
} finally {
await rm(dir, { recursive: true });
}
});
it("readPersistentFile handles invalid JSON", async () => {
const dir = await mkdtemp(join(os.tmpdir(), "vp-cfg-inv-"));
try {
await writeFile(join(dir, "vision-proxy.json"), "not json at all");
const result = await readPersistentFile(dir);
assert.deepEqual(result, {});
} finally {
await rm(dir, { recursive: true });
}
});
});
describe("Security: image decode bomb protection", () => {
it("storeImageMeta rejects dimensions exceeding MAX_IMAGE_DIMENSION", async () => {
// Can't easily create a real 16K×16K image, but we can test the path
// by verifying that normal images are accepted
const { Image } = await import("imagescript");
const img = new Image(100, 100);
const encoded = Buffer.from(await img.encode(1));
const hash = "test-decode-bomb-normal";
const store = createImageMetaStore();
storeImageMeta(store, hash, encoded);
const meta = store.get(hash);
// Normal image should be accepted
assert.ok(meta, "normal image should be stored");
});
});
describe("Review fixes: hasConsent per-provider semantics", () => {
it("revoking consent for provider A does not affect provider B", () => {
const entries: Entry[] = [
customEntry(CUSTOM_TYPE_CONSENT, { granted: true, provider: "anthropic" }),
customEntry(CUSTOM_TYPE_CONSENT, { granted: true, provider: "google" }),
customEntry(CUSTOM_TYPE_CONSENT, { granted: false, provider: "anthropic" }),
];
assert.equal(hasConsent(entries, "anthropic"), false, "anthropic should be revoked");
assert.equal(hasConsent(entries, "google"), true, "google should still be granted");
});
it("provider-less revoked consent blocks all providers", () => {
const entries: Entry[] = [
customEntry(CUSTOM_TYPE_CONSENT, { granted: true, provider: "anthropic" }),
customEntry(CUSTOM_TYPE_CONSENT, { granted: false }), // global revoke
];
assert.equal(hasConsent(entries, "anthropic"), false);
assert.equal(hasConsent(entries, "google"), false);
});
it("provider-less granted does not satisfy per-provider check", () => {
const entries: Entry[] = [
customEntry(CUSTOM_TYPE_CONSENT, { granted: true }),
];
assert.equal(hasConsent(entries, "anthropic"), false, "global grant should not satisfy per-provider");
assert.equal(hasConsent(entries), true, "global check should see the grant");
});
});
describe("Review fixes: grounding format validation in sanitize()", () => {
it("strips invalid grounding format values", () => {
const config = {
...DEFAULT_CONFIG,
groundingModels: {
"test/model": { format: "invalid_format" },
"anthropic/claude-sonnet-4-5": { format: "qwen_pixels" },
},
};
const safe = sanitize(config);
assert.equal((safe.groundingModels as any)["test/model"], undefined, "invalid format should be stripped");
assert.equal((safe.groundingModels as any)["anthropic/claude-sonnet-4-5"].format, "qwen_pixels");
});
it("preserves valid formats", () => {
const config = {
...DEFAULT_CONFIG,
groundingModels: {
"test/model": { format: "molmo_points" },
},
};
const safe = sanitize(config);
assert.equal((safe.groundingModels as any)["test/model"].format, "molmo_points");
});
});
describe("Review fixes: buildAdaptiveJointPrompt sanitizes userPrompt", () => {
it("escapes XML-breaking characters in user_message", () => {
const prompt = buildAdaptiveJointPrompt(
[{ hash: "abc", meta: { width: 100, height: 200 } }],
"Hello injected",
);
assert.ok(prompt.includes("</user_message>"), "closing tag should be escaped");
assert.ok(!prompt.includes(""), "raw tags should be escaped");
});
});
describe("Review fixes: buildJointDescriptionFence dimensions escaping", () => {
it("escapes special chars in dimensions attribute", () => {
const fence = buildJointDescriptionFence(
[{ hash: "abc", meta: { width: 100, height: 200, filename: "test's file & .png" } }],
"desc",
);
// Inside the single-quoted JSON attribute, & < > ' must be escaped
assert.ok(!fence.includes("test's"), "single quote should be escaped");
assert.ok(fence.includes("'"), "should contain escaped single quote");
assert.ok(fence.includes("&"), "should contain escaped ampersand");
});
});
describe("Review fixes: storeImageMeta filename backfill", () => {
it("backfills filename on second call without overwriting dimensions", async () => {
const { Image } = await import("imagescript");
const img = new Image(50, 60);
const encoded = Buffer.from(await img.encode(1));
const hash = "test-backfill-filename";
const store = createImageMetaStore();
storeImageMeta(store, hash, encoded); // first call, no filename
storeImageMeta(store, hash, encoded, "photo.png"); // second call, with filename
const meta = store.get(hash);
assert.ok(meta, "meta should exist");
assert.equal(meta!.filename, "photo.png", "filename should be backfilled");
});
it("keeps stores isolated — one session's metadata does not leak into another (issue #12)", async () => {
const { Image } = await import("imagescript");
const encoded = Buffer.from(await new Image(40, 30).encode(1));
const sessionA = createImageMetaStore();
const sessionB = createImageMetaStore();
storeImageMeta(sessionA, "shared-hash", encoded, "a.png");
assert.ok(sessionA.get("shared-hash"), "session A should have the metadata");
assert.equal(sessionB.get("shared-hash"), undefined, "session B must not inherit it");
});
});