import { describe, expect, test } from "bun:test"; import { mkdir, mkdtemp, readFile, rm, writeFile } from "node:fs/promises"; import { tmpdir } from "node:os"; import { join } from "node:path"; import { setTimeout as delay } from "node:timers/promises"; import type { AgentMessage } from "@earendil-works/pi-agent-core"; import type { Api, AssistantMessage, Context, Model, SimpleStreamOptions, } from "@earendil-works/pi-ai"; import type { ExtensionAPI, ExtensionCommandContext, SessionEntry, } from "@earendil-works/pi-coding-agent"; import { initTheme } from "@earendil-works/pi-coding-agent"; import type { AutocompleteProvider } from "@earendil-works/pi-tui"; import askLlm from "./index.ts"; const AGENT_DIR_ENV = "PI_CODING_AGENT_DIR"; const AGENT_SUITE_DIR_ENV = "PI_AGENT_SUITE_DIR"; const USER_QUESTION_OPEN_TAG = ""; const USER_QUESTION_CLOSE_TAG = ""; const CONTEXT_PROJECTION_CUSTOM_TYPE = "context-projection"; initTheme("dark"); interface RegisteredCommandFake { readonly name: string; readonly handler: ( args: string, ctx: ExtensionCommandContext, ) => Promise; } type BeforeAgentStartHandler = (event: { readonly systemPromptOptions?: { readonly contextFiles?: readonly { readonly path: string; readonly content: string; }[]; readonly skills?: readonly { readonly baseDir: string }[]; }; }) => void; interface ExtensionApiFake extends ExtensionAPI { readonly commands: RegisteredCommandFake[]; readonly sessionWriteCalls: string[]; readonly beforeAgentStartHandlers: BeforeAgentStartHandler[]; } interface CompletionCall { readonly model: Model; readonly context: Context; readonly options: SimpleStreamOptions | undefined; } interface CompletionResponseOutcome { readonly kind: "response"; readonly content: AssistantMessage["content"]; readonly stopReason?: AssistantMessage["stopReason"]; readonly errorMessage?: string; } interface CompletionThrowOutcome { readonly kind: "throw"; readonly error: Error; } type CompletionOutcome = CompletionResponseOutcome | CompletionThrowOutcome; type AuthResult = | { readonly ok: true; readonly apiKey?: string; readonly headers?: Record; } | { readonly ok: false; readonly error: string }; interface CustomComponentFake { readonly render?: (width: number) => string[]; readonly handleInput?: (data: string) => void | Promise; readonly dispose?: () => void; } interface AskLlmContextFake extends ExtensionCommandContext { readonly notifications: Array<{ readonly message: string; readonly type: string | undefined; }>; readonly renderedCustomOutputs: string[]; readonly editorPrompts: string[]; readonly customOptions: unknown[]; readonly customComponents: CustomComponentFake[]; } /** Runs one test with isolated pi storage so extension config never touches user files. */ async function withIsolatedAgentDir( action: (agentDir: string) => Promise, ): Promise { const previousAgentDir = process.env[AGENT_DIR_ENV]; const previousAgentSuiteDir = process.env[AGENT_SUITE_DIR_ENV]; const agentDir = await mkdtemp(join(tmpdir(), "pi-ask-llm-")); process.env[AGENT_DIR_ENV] = agentDir; delete process.env[AGENT_SUITE_DIR_ENV]; try { return await action(agentDir); } finally { if (previousAgentDir === undefined) { delete process.env[AGENT_DIR_ENV]; } else { process.env[AGENT_DIR_ENV] = previousAgentDir; } if (previousAgentSuiteDir === undefined) { delete process.env[AGENT_SUITE_DIR_ENV]; } else { process.env[AGENT_SUITE_DIR_ENV] = previousAgentSuiteDir; } await rm(agentDir, { recursive: true, force: true }); } } /** Writes ask-llm config under the suite-owned extension directory. */ async function writeConfig(agentDir: string, config: unknown): Promise { const configDir = join(agentDir, "agent-suite", "ask-llm"); await mkdir(configDir, { recursive: true }); await writeFile(join(configDir, "config.json"), JSON.stringify(config)); } /** Writes a legacy ask-llm config file that new ask-llm must ignore. */ async function writeLegacyConfig( agentDir: string, config: unknown, ): Promise { const configDir = join(agentDir, "config"); await mkdir(configDir, { recursive: true }); await writeFile(join(configDir, "ask-llm.json"), JSON.stringify(config)); } /** Writes context-projection config under the suite-owned extension directory. */ async function writeProjectionConfig( agentDir: string, config: unknown, ): Promise { const configDir = join(agentDir, "agent-suite", "context-projection"); await mkdir(configDir, { recursive: true }); await writeFile(join(configDir, "config.json"), JSON.stringify(config)); } /** Creates the ExtensionAPI fake used to observe command registration and session writes. */ function createExtensionApiFake(): ExtensionApiFake { const commands: RegisteredCommandFake[] = []; const sessionWriteCalls: string[] = []; const beforeAgentStartHandlers: BeforeAgentStartHandler[] = []; return { commands, sessionWriteCalls, beforeAgentStartHandlers, on(eventName: string, handler: BeforeAgentStartHandler): void { if (eventName === "before_agent_start") { beforeAgentStartHandlers.push(handler); } }, registerTool(): void {}, registerCommand(name: string, options: RegisteredCommandFake): void { commands.push({ name, handler: options.handler }); }, registerShortcut(): void {}, registerFlag(): void {}, getFlag(): undefined { return undefined; }, registerMessageRenderer(): void {}, sendMessage(): void { sessionWriteCalls.push("sendMessage"); }, sendUserMessage(): void { sessionWriteCalls.push("sendUserMessage"); }, appendEntry(): void { sessionWriteCalls.push("appendEntry"); }, setSessionName(): void {}, getSessionName(): undefined { return undefined; }, setLabel(): void {}, async exec() { return { exitCode: 0, stdout: "", stderr: "" }; }, getActiveTools(): string[] { return []; }, getAllTools(): never[] { return []; }, setActiveTools(): void {}, getCommands(): never[] { return []; }, async setModel(): Promise { return true; }, getThinkingLevel(): string { return "medium"; }, setThinkingLevel(): void {}, registerProvider(): void {}, unregisterProvider(): void {}, events: { emit(): void {}, on(): () => void { return () => {}; }, }, } as unknown as ExtensionApiFake; } /** Returns the single registered ask command. */ function getAskCommand(pi: ExtensionApiFake): RegisteredCommandFake { const command = pi.commands.find((candidate) => candidate.name === "ask"); if (command === undefined) { throw new Error("expected ask command"); } return command; } /** Emits before-agent-start handlers in registration order. */ function emitBeforeAgentStartHandlers( pi: ExtensionApiFake, event: Parameters[0], ): void { if (pi.beforeAgentStartHandlers.length === 0) { throw new Error("expected before_agent_start handler"); } for (const handler of pi.beforeAgentStartHandlers) { handler(event); } } /** Creates a model fixture that can be resolved by provider and model ID. */ function createModel(provider: string, id: string): Model { return { provider, id, api: "fake-api", baseUrl: "https://example.test", reasoning: true, name: `${provider}/${id}`, input: ["text"], cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0 }, contextWindow: 100_000, maxTokens: 8_192, }; } /** Creates fake model completion and records every request sent by ask-llm. */ function createCompletionFake( text = "LLM answer", responseOverrides: Partial = {}, ): { readonly calls: CompletionCall[]; readonly completeSimple: ( model: Model, context: Context, options?: SimpleStreamOptions, ) => Promise; } { return createCompletionSequenceFake([ { kind: "response", content: [{ type: "text", text }], ...(responseOverrides.stopReason !== undefined ? { stopReason: responseOverrides.stopReason } : {}), ...(responseOverrides.errorMessage !== undefined ? { errorMessage: responseOverrides.errorMessage } : {}), }, ]); } /** Creates fake completeSimple that returns or throws one configured outcome per call. */ function createCompletionSequenceFake(outcomes: readonly CompletionOutcome[]): { readonly calls: CompletionCall[]; readonly completeSimple: ( model: Model, context: Context, options?: SimpleStreamOptions, ) => Promise; } { const calls: CompletionCall[] = []; return { calls, async completeSimple( model: Model, context: Context, options?: SimpleStreamOptions, ): Promise { calls.push({ model: model as Model, context, options }); const outcome = outcomes[Math.min(calls.length - 1, outcomes.length - 1)]; if (outcome === undefined) { throw new Error("expected completion outcome"); } if (outcome.kind === "throw") { throw outcome.error; } return createAssistantResponse(model, outcome); }, }; } /** Creates one assistant response with standard fake usage metadata. */ function createAssistantResponse( model: Model, outcome: CompletionResponseOutcome, ): AssistantMessage { return { role: "assistant", content: outcome.content, api: model.api, provider: model.provider, model: model.id, usage: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0, totalTokens: 0, cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0, total: 0, }, }, stopReason: outcome.stopReason ?? "stop", ...(outcome.errorMessage !== undefined ? { errorMessage: outcome.errorMessage } : {}), timestamp: 1, }; } /** Creates a command context fake with isolated UI, model registry, and forbidden session reads. */ function createContextFake( models: readonly Model[], editorResult = "Question from editor", entries: readonly SessionEntry[] = [], authResult: AuthResult = { ok: true, apiKey: "ask-llm-api-key", headers: { "x-ask-llm": "enabled" }, }, hasUI = true, autoCloseAnswer = true, autoSubmitFirstCustom = true, ): AskLlmContextFake { const notifications: Array<{ message: string; type: string | undefined; }> = []; const renderedCustomOutputs: string[] = []; const editorPrompts: string[] = []; const customOptions: unknown[] = []; const customComponents: CustomComponentFake[] = []; return { cwd: "/tmp/project", hasUI, model: models[0], notifications, renderedCustomOutputs, editorPrompts, customOptions, customComponents, modelRegistry: { find(provider: string, modelId: string): Model | undefined { return models.find( (model) => model.provider === provider && model.id === modelId, ); }, async getApiKeyAndHeaders() { return authResult; }, }, sessionManager: { getBranch(): SessionEntry[] { return [...entries]; }, getEntries(): SessionEntry[] { return [...entries]; }, } as never, ui: { theme: { fg: (_color: string, value: string) => value, bold: (value: string) => value, }, notify(message: string, type?: string): void { notifications.push({ message, type }); }, async editor(title: string): Promise { editorPrompts.push(title); return editorResult; }, async custom(factory: never, options?: unknown): Promise { customOptions.push(options); return new Promise((resolve, reject) => { let settled = false; let component: CustomComponentFake | undefined; const done = (result: T) => { if (settled) { return; } settled = true; component?.dispose?.(); resolve(result); }; Promise.resolve( ( factory as unknown as ( tui: unknown, theme: unknown, keybindings: unknown, done: (result: T) => void, ) => unknown )( { terminal: { rows: 12 }, requestRender(): void {}, }, { fg: (_color: string, value: string) => value, bold: (value: string) => value, }, {}, done, ), ) .then(async (created) => { component = created as CustomComponentFake; customComponents.push(component); renderedCustomOutputs.push( component.render?.(100).join("\n") ?? "", ); const rendered = component.render?.(100).join("\n") ?? ""; if ( autoSubmitFirstCustom && customComponents.length === 1 && rendered.toLowerCase().includes("question") ) { for (const char of editorResult) { await component.handleInput?.(char); } await component.handleInput?.("\r"); } if (autoCloseAnswer && rendered.includes("Ctrl+Y")) { done(undefined as T); } }) .catch(reject); }); }, }, isIdle(): boolean { return true; }, signal: undefined, abort(): void {}, hasPendingMessages(): boolean { return false; }, shutdown(): void {}, getContextUsage(): undefined { return undefined; }, compact(): void {}, getSystemPrompt(): string { return "main session prompt must not be used"; }, async waitForIdle(): Promise {}, async newSession() { return { cancelled: true }; }, async fork() { return { cancelled: true }; }, async navigateTree() { return { cancelled: true }; }, } as unknown as AskLlmContextFake; } /** Creates one session message entry used as provider context for /ask. */ function createSessionMessageEntry( id: string, parentId: string | null, content: string, ): SessionEntry { return createMessageEntry(id, parentId, { role: "user", content, timestamp: 1, }); } /** Creates a session message entry for direct provider-context assertions. */ function createMessageEntry( id: string, parentId: string | null, message: AgentMessage, ): SessionEntry { return { type: "message", id, parentId, timestamp: "t", message, } as SessionEntry; } /** Creates an extension-owned projection state entry. */ function createProjectionStateEntry( id: string, projectedEntryId: string, placeholder: string, parentId: string | null, ): SessionEntry { return { type: "custom", id, parentId, timestamp: "t", customType: CONTEXT_PROJECTION_CUSTOM_TYPE, data: { projectedEntries: [{ entryId: projectedEntryId, placeholder }] }, } as SessionEntry; } /** Creates an assistant tool-call message for projection replay fixtures. */ function createAssistantToolCallMessage( toolCallId: string, ): Extract { return { role: "assistant", content: [ { type: "toolCall", id: toolCallId, name: "bash", arguments: {}, }, ], api: "openai-responses", provider: "openai", model: "main", usage: { input: 1, output: 1, cacheRead: 0, cacheWrite: 0, totalTokens: 2, cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0, total: 0 }, }, stopReason: "toolUse", timestamp: 2, }; } /** Creates a successful text tool result for projection replay fixtures. */ function createToolResultMessage( toolCallId: string, text: string, ): AgentMessage { return { role: "toolResult", toolCallId, toolName: "bash", content: [{ type: "text", text }], isError: false, timestamp: 3, }; } /** Waits until the requested custom UI component is created by the fake custom UI. */ async function waitForCustomComponent( ctx: AskLlmContextFake, index: number, ): Promise { for (let attempt = 0; attempt < 20; attempt += 1) { const component = ctx.customComponents[index]; if (component !== undefined) { return component; } await delay(0); } throw new Error(`expected custom component ${index}`); } function createAutocompleteProviderFake( onSuggestionsRequested: () => void, suggestionsReleased: Promise, ): AutocompleteProvider { return { async getSuggestions() { onSuggestionsRequested(); await suggestionsReleased; return { prefix: "@", items: [{ value: "README.md", label: "README.md" }], }; }, applyCompletion(lines, cursorLine, cursorCol, item) { const completedLines = [...lines]; completedLines[cursorLine] = item.value; return { lines: completedLines, cursorLine, cursorCol, }; }, shouldTriggerFileCompletion() { return true; }, }; } describe("ask-llm", () => { test("registers ask command by default when config is missing", async () => { // Purpose: ask-llm must be usable without setup because missing config enables the extension. // Input and expected output: no config file registers the public /ask command. // Edge case: the isolated agent directory has no suite config directory at all. // Dependencies: this test uses only an in-memory ExtensionAPI fake and temp pi storage. await withIsolatedAgentDir(async () => { const pi = createExtensionApiFake(); askLlm(pi); expect(pi.commands.map((command) => command.name)).toEqual(["ask"]); }); }); test("ignores legacy ask-llm config", async () => { // Purpose: new ask-llm config must be read only from suite-owned storage. // Input and expected output: legacy disabled config does not suppress default /ask registration. // Edge case: no suite config exists, so missing suite config still means enabled by default. // Dependencies: this test uses isolated temp config and command registration observation. await withIsolatedAgentDir(async (agentDir) => { await writeLegacyConfig(agentDir, { enabled: false }); const pi = createExtensionApiFake(); askLlm(pi); expect(pi.commands.map(({ name }) => name)).toEqual(["ask"]); }); }); test("does not register ask command when config disables the extension", async () => { // Purpose: enabled false must remove the command from pi command discovery. // Input and expected output: suite config with enabled false registers no command. // Edge case: no other config fields are needed for disablement. // Dependencies: this test uses only temp pi storage and an in-memory ExtensionAPI fake. await withIsolatedAgentDir(async (agentDir) => { await writeConfig(agentDir, { enabled: false }); const pi = createExtensionApiFake(); askLlm(pi); expect(pi.commands).toEqual([]); }); }); test("sends tagged command argument to the selected model without session writes", async () => { // Purpose: /ask must send the active branch as context without persisting its question or answer in the active session. // Input and expected output: the existing session message is preserved, and the command argument becomes the tagged final user message. // Edge case: the saved session has no /ask question or answer because those are never written through ExtensionAPI session methods. // Dependencies: this test uses a fake model layer, fake UI, and fake ExtensionAPI session-write methods. await withIsolatedAgentDir(async () => { const model = createModel("openai", "gpt-test"); const completion = createCompletionFake("Visible answer"); const pi = createExtensionApiFake(); const ctx = createContextFake([model], "Question from editor", [ createSessionMessageEntry( "session-1", null, "Existing session context", ), ]); askLlm(pi, { completeSimple: completion.completeSimple }); await getAskCommand(pi).handler("What should I check?", ctx); expect(ctx.notifications).toEqual([]); expect(completion.calls).toHaveLength(1); expect(completion.calls[0]?.model).toBe(model); expect(completion.calls[0]?.context.messages).toEqual([ { role: "user", content: "Existing session context", timestamp: 1, }, { role: "user", content: [ USER_QUESTION_OPEN_TAG, "What should I check?", USER_QUESTION_CLOSE_TAG, ].join("\n"), timestamp: expect.any(Number), }, ]); expect(completion.calls[0]?.context.tools).toEqual([]); expect(completion.calls[0]?.context.systemPrompt).toContain( USER_QUESTION_OPEN_TAG, ); expect(completion.calls[0]?.context.systemPrompt).toContain( USER_QUESTION_CLOSE_TAG, ); expect(completion.calls[0]?.options?.reasoning).toBe("medium"); expect(completion.calls[0]?.options?.apiKey).toBe("ask-llm-api-key"); expect(completion.calls[0]?.options?.headers).toEqual({ "x-ask-llm": "enabled", }); expect(ctx.renderedCustomOutputs.join("\n")).toContain("Visible answer"); expect(pi.sessionWriteCalls).toEqual([]); }); }); test("opens a centered question dialog when ask command arguments are empty", async () => { // Purpose: empty /ask must collect the question without replacing the main editor area. // Input and expected output: whitespace-only args open a centered overlay dialog and use its submitted text as the model question. // Edge case: whitespace-only args are treated as empty. // Dependencies: this test uses fake custom UI, fake model completion, and fake ExtensionAPI session-write methods. await withIsolatedAgentDir(async () => { const model = createModel("openai", "gpt-test"); const completion = createCompletionFake(); const pi = createExtensionApiFake(); const ctx = createContextFake([model], "Question from dialog"); askLlm(pi, { completeSimple: completion.completeSimple }); await getAskCommand(pi).handler(" ", ctx); expect(ctx.editorPrompts).toEqual([]); expect(ctx.customOptions).toHaveLength(3); for (const options of ctx.customOptions) { expect(options).toMatchObject({ overlay: true, overlayOptions: { anchor: "center" }, }); } expect(completion.calls[0]?.context.messages.at(-1)).toEqual({ role: "user", content: [ USER_QUESTION_OPEN_TAG, "Question from dialog", USER_QUESTION_CLOSE_TAG, ].join("\n"), timestamp: expect.any(Number), }); }); }); test("cancels without model call when the dialog question is empty", async () => { // Purpose: /ask must not call the provider when the dialog does not return a usable question. // Input and expected output: whitespace-only dialog text produces one cancellation notification and no completion request. // Edge case: whitespace is trimmed before the empty-question decision. // Dependencies: this test uses fake custom UI, fake model completion, and fake ExtensionAPI session-write methods. await withIsolatedAgentDir(async () => { const model = createModel("openai", "gpt-test"); const completion = createCompletionFake(); const pi = createExtensionApiFake(); const ctx = createContextFake([model], " "); askLlm(pi, { completeSimple: completion.completeSimple }); await getAskCommand(pi).handler(" ", ctx); expect(ctx.notifications).toEqual([ { message: "Ask cancelled", type: "info" }, ]); expect(completion.calls).toEqual([]); expect(pi.sessionWriteCalls).toEqual([]); }); }); test("wires @ file autocomplete into the centered question dialog", async () => { // Purpose: the question dialog must support the same @ file completion path used by structured-prompt. // Input and expected output: typing @ requests file suggestions from the provider and renders README.md. // Edge case: slash-command completion is outside this provider because ask questions need only file references. // Dependencies: this test uses fake custom UI, fake autocomplete provider, and fake model completion. await withIsolatedAgentDir(async () => { let providerCwd: string | undefined; let providerFdPath: string | null | undefined; let suggestionsRequested = false; const autocompleteProvider = createAutocompleteProviderFake(() => { suggestionsRequested = true; }, Promise.resolve()); const model = createModel("openai", "gpt-test"); const completion = createCompletionFake(); const pi = createExtensionApiFake(); const ctx = createContextFake( [model], "Question with @README.md", [], { ok: true, apiKey: "ask-llm-api-key", headers: { "x-ask-llm": "enabled" }, }, true, true, false, ); askLlm(pi, { completeSimple: completion.completeSimple, resolveFdPath: () => "/tmp/fd", createAutocompleteProvider: (cwd: string, fdPath: string | null) => { providerCwd = cwd; providerFdPath = fdPath; return autocompleteProvider; }, } as never); const commandPromise = getAskCommand(pi).handler(" ", ctx); const questionComponent = await waitForCustomComponent(ctx, 0); questionComponent.handleInput?.("@"); for ( let attempt = 0; attempt < 20 && !suggestionsRequested; attempt += 1 ) { await delay(10); } expect(providerCwd).toBe("/tmp/project"); expect(providerFdPath).toBe("/tmp/fd"); expect(suggestionsRequested).toBe(true); expect(questionComponent.render?.(80).join("\n")).toContain("README.md"); await questionComponent.handleInput?.("\x1b"); await questionComponent.handleInput?.("\r"); await commandPromise; }); }); test("does not call the model without interactive UI", async () => { // Purpose: /ask must avoid provider calls when the command cannot display editor, loader, or answer UI. // Input and expected output: non-interactive context exits before completion and session writes. // Edge case: inline arguments are present, so the UI guard is the only early-exit reason. // Dependencies: this test uses fake model completion and fake ExtensionAPI session-write methods. await withIsolatedAgentDir(async () => { const model = createModel("openai", "gpt-test"); const completion = createCompletionFake(); const pi = createExtensionApiFake(); const ctx = createContextFake( [model], "Question from editor", [], { ok: true, apiKey: "ask-llm-api-key", headers: { "x-ask-llm": "enabled" }, }, false, ); askLlm(pi, { completeSimple: completion.completeSimple }); await getAskCommand(pi).handler("Inline question", ctx); expect(completion.calls).toEqual([]); expect(pi.sessionWriteCalls).toEqual([]); }); }); test("escapes XML delimiters inside tagged user questions", async () => { // Purpose: user question tags must keep the question boundary unambiguous when the question contains XML-like text. // Input and expected output: angle brackets and ampersands are escaped inside the user_question block. // Edge case: a literal closing tag in the question must not close the wrapper tag early. // Dependencies: this test uses fake model completion and inspects only the direct provider request. await withIsolatedAgentDir(async () => { const model = createModel("openai", "gpt-test"); const completion = createCompletionFake(); const pi = createExtensionApiFake(); const ctx = createContextFake([model]); askLlm(pi, { completeSimple: completion.completeSimple }); await getAskCommand(pi).handler("Use & ", ctx); expect(completion.calls[0]?.context.messages.at(-1)?.content).toBe( [ USER_QUESTION_OPEN_TAG, "Use <tag> & </user_question>", USER_QUESTION_CLOSE_TAG, ].join("\n"), ); }); }); test("uses configured model, thinking, and custom system prompt", async () => { // Purpose: ask-llm config must control the direct model call without using current-session prompt text. // Input and expected output: configured provider/model and prompt file override current model and bundled prompt. // Edge case: current model remains available but must not be selected when config model.id is present. // Dependencies: this test uses temp config, temp prompt file, fake model registry, and fake completion. await withIsolatedAgentDir(async (agentDir) => { const promptFile = join(agentDir, "custom-system.md"); await writeFile(promptFile, "Custom system prompt"); await writeConfig(agentDir, { enabled: true, model: { id: "anthropic/claude-test", thinking: "high" }, systemPromptFile: promptFile, }); const currentModel = createModel("openai", "gpt-test"); const configuredModel = createModel("anthropic", "claude-test"); const completion = createCompletionFake(); const pi = createExtensionApiFake(); const ctx = createContextFake([currentModel, configuredModel]); askLlm(pi, { completeSimple: completion.completeSimple }); await getAskCommand(pi).handler("Use configured runtime", ctx); expect(completion.calls[0]?.model).toBe(configuredModel); expect(completion.calls[0]?.context.systemPrompt).toBe( "Custom system prompt", ); expect(completion.calls[0]?.options?.reasoning).toBe("high"); }); }); test("includes loaded project context files in ask-llm system prompt", async () => { // Purpose: ask-llm must preserve Pi-loaded project rules for the one-off model request. // Input and expected output: contextFiles with AGENTS.md and CLAUDE.md are appended to the ask-llm system prompt. // Edge case: project context comes from before_agent_start state, not from the conversation branch. // Dependencies: fake before_agent_start event, fake model registry, and fake completion function. await withIsolatedAgentDir(async () => { const model = createModel("openai", "gpt-test"); const completion = createCompletionFake(); const pi = createExtensionApiFake(); const ctx = createContextFake([model]); askLlm(pi, { completeSimple: completion.completeSimple }); emitBeforeAgentStartHandlers(pi, { systemPromptOptions: { contextFiles: [ { path: "/tmp/project/AGENTS.md", content: "Project rule: use the project validation scripts.", }, { path: "/tmp/project/CLAUDE.md", content: "Project rule: keep docs current.", }, ], }, }); await getAskCommand(pi).handler("Use project context", ctx); expect(completion.calls).toHaveLength(1); expect(completion.calls[0]?.context.systemPrompt).toContain( "# Project Context", ); expect(completion.calls[0]?.context.systemPrompt).toContain( "## /tmp/project/AGENTS.md", ); expect(completion.calls[0]?.context.systemPrompt).toContain( "Project rule: use the project validation scripts.", ); expect(completion.calls[0]?.context.systemPrompt).toContain( "## /tmp/project/CLAUDE.md", ); expect(completion.calls[0]?.context.systemPrompt).toContain( "Project rule: keep docs current.", ); }); }); test("replays persisted context projection state before calling ask-llm", async () => { // Purpose: ask-llm input must match the projected task state when context-projection has recorded omitted tool results. // Input and expected output: valid projection config plus persisted state replaces old tool output with the recorded placeholder. // Edge case: the one-off ask question is appended after projection replay. // Dependencies: temp context-projection config, fake model registry, fake completion function, and fake session entries. await withIsolatedAgentDir(async (agentDir) => { await writeProjectionConfig(agentDir, { enabled: true }); const placeholder = "[projected old output]"; const model = createModel("openai", "gpt-test"); const completion = createCompletionFake(); const pi = createExtensionApiFake(); const entries = [ createSessionMessageEntry("1", null, "hello"), createMessageEntry( "2", "1", createAssistantToolCallMessage("old-tool"), ), createMessageEntry( "3", "2", createToolResultMessage("old-tool", "old full tool output"), ), createProjectionStateEntry("4", "3", placeholder, "3"), ]; const ctx = createContextFake([model], "Question from editor", entries); askLlm(pi, { completeSimple: completion.completeSimple }); await getAskCommand(pi).handler("Should we proceed?", ctx); expect(completion.calls).toHaveLength(1); const askMessages = JSON.stringify(completion.calls[0]?.context.messages); expect(askMessages).toContain(placeholder); expect(askMessages).not.toContain("old full tool output"); expect(completion.calls[0]?.context.messages.at(-1)?.content).toBe( [ USER_QUESTION_OPEN_TAG, "Should we proceed?", USER_QUESTION_CLOSE_TAG, ].join("\n"), ); }); }); test("shows the pending request loader inside the ask dialog frame", async () => { // Purpose: the loading state must keep the same centered dialog frame used by ask question and answer views. // Input and expected output: an inline question starts a pending model call, and the loader render contains the ask dialog frame. // Edge case: the request is still pending, so no answer dialog has replaced the loader. // Dependencies: this test uses fake custom UI and a manually released model completion. await withIsolatedAgentDir(async () => { let releaseCompletion: ((message: AssistantMessage) => void) | undefined; const completionPromise = new Promise((resolve) => { releaseCompletion = resolve; }); const model = createModel("openai", "gpt-test"); const completionCalls: CompletionCall[] = []; const pi = createExtensionApiFake(); const ctx = createContextFake([model]); askLlm(pi, { completeSimple: (calledModel, context, options) => { completionCalls.push({ model: calledModel as Model, context, options, }); return completionPromise; }, }); const commandPromise = getAskCommand(pi).handler("Wait for this", ctx); const loaderComponent = await waitForCustomComponent(ctx, 0); const loaderRender = loaderComponent.render?.(60).join("\n") ?? ""; expect(completionCalls).toHaveLength(1); expect(loaderRender).toContain("┏"); expect(loaderRender).toContain("┃"); expect(loaderRender).toContain("Asking LLM"); expect(loaderRender).toContain("Esc/Ctrl+C: cancel"); releaseCompletion?.( createAssistantResponse(model, { kind: "response", content: [{ type: "text", text: "Released answer" }], }), ); await commandPromise; }); }); test("shows a scrollable centered result dialog with the question and answer", async () => { // Purpose: the result dialog must show both sides of the ask exchange without overflowing the overlay height. // Input and expected output: a long answer renders within the terminal row budget and can scroll to later content. // Edge case: the question and answer are longer than the dialog can show at once. // Dependencies: this test uses fake model completion and fake custom UI input. await withIsolatedAgentDir(async () => { const model = createModel("openai", "gpt-test"); const longAnswer = Array.from( { length: 20 }, (_, index) => `answer line ${index + 1}`, ).join("\n"); const completion = createCompletionFake(longAnswer); const pi = createExtensionApiFake(); const ctx = createContextFake( [model], "Question from editor", [], { ok: true, apiKey: "ask-llm-api-key", headers: { "x-ask-llm": "enabled" }, }, true, false, ); askLlm(pi, { completeSimple: completion.completeSimple }); const commandPromise = getAskCommand(pi).handler( "Explain this long question with enough detail to require scrolling.", ctx, ); const answerComponent = await waitForCustomComponent(ctx, 1); const firstRender = answerComponent.render?.(50) ?? []; expect(ctx.customOptions).toHaveLength(2); for (const options of ctx.customOptions) { expect(options).toMatchObject({ overlay: true, overlayOptions: { anchor: "center" }, }); } expect(firstRender.length).toBeLessThanOrEqual(12); expect(firstRender.join("\n")).toContain("Explain this long question"); expect(firstRender.join("\n")).toContain("answer line 1"); expect(firstRender.join("\n")).not.toContain("answer line 20"); await answerComponent.handleInput?.("\x1b[F"); const lastRender = answerComponent.render?.(50).join("\n") ?? ""; expect(lastRender).toContain("answer line 20"); await answerComponent.handleInput?.("\r"); await commandPromise; }); }); test("copies the rendered answer with Ctrl+Y without closing", async () => { // Purpose: the focused answer UI must let users copy the exact model answer without closing the dialog. // Input and expected output: Ctrl+Y copies the answer once, and Enter closes the already rendered answer view. // Edge case: copy is independent from close keys, so the command promise remains pending after Ctrl+Y. // Dependencies: this test uses fake model completion, fake clipboard dependency, and fake custom UI input. await withIsolatedAgentDir(async () => { const model = createModel("openai", "gpt-test"); const completion = createCompletionFake("Copyable answer"); const clipboardWrites: string[] = []; const pi = createExtensionApiFake(); const ctx = createContextFake( [model], "Question from editor", [], { ok: true, apiKey: "ask-llm-api-key", headers: { "x-ask-llm": "enabled" }, }, true, false, ); askLlm(pi, { completeSimple: completion.completeSimple, copyToClipboard: async (text: string) => { clipboardWrites.push(text); }, }); let commandResolved = false; const commandPromise = getAskCommand(pi) .handler("Copy this", ctx) .then(() => { commandResolved = true; }); const answerComponent = await waitForCustomComponent(ctx, 1); expect(answerComponent.render?.(100).join("\n")).toContain("Ctrl+Y"); await answerComponent.handleInput?.("\x19"); await Promise.resolve(); expect(clipboardWrites).toEqual(["Copyable answer"]); expect(ctx.notifications).toEqual([ { message: "Answer copied to clipboard", type: "info" }, ]); expect(commandResolved).toBe(false); await answerComponent.handleInput?.("\r"); await commandPromise; expect(commandResolved).toBe(true); }); }); test("reports clipboard copy failures without closing the answer", async () => { // Purpose: clipboard failures must be visible to the user and must not close the answer dialog. // Input and expected output: Ctrl+Y with a failing clipboard dependency reports one scoped warning. // Edge case: the answer remains open after copy failure and still closes on Enter. // Dependencies: this test uses fake model completion, fake clipboard dependency, and fake custom UI input. await withIsolatedAgentDir(async () => { const model = createModel("openai", "gpt-test"); const completion = createCompletionFake("Copyable answer"); const pi = createExtensionApiFake(); const ctx = createContextFake( [model], "Question from editor", [], { ok: true, apiKey: "ask-llm-api-key", headers: { "x-ask-llm": "enabled" }, }, true, false, ); askLlm(pi, { completeSimple: completion.completeSimple, copyToClipboard: async () => { throw new Error("clipboard unavailable"); }, }); let commandResolved = false; const commandPromise = getAskCommand(pi) .handler("Copy this", ctx) .then(() => { commandResolved = true; }); const answerComponent = await waitForCustomComponent(ctx, 1); await answerComponent.handleInput?.("\x19"); await Promise.resolve(); expect(ctx.notifications).toEqual([ { message: "[ask-llm] failed to copy answer to clipboard: clipboard unavailable", type: "warning", }, ]); expect(commandResolved).toBe(false); await answerComponent.handleInput?.("\r"); await commandPromise; expect(commandResolved).toBe(true); }); }); test("reports invalid config without calling the model", async () => { // Purpose: invalid ask-llm config must fail inside ask-llm only and avoid unsafe provider calls. // Input and expected output: unsupported config keys produce one warning and no completion request. // Edge case: the command remains registered so the user can see the scoped config issue. // Dependencies: this test uses temp config, fake UI notifications, and fake completion observation. await withIsolatedAgentDir(async (agentDir) => { await writeConfig(agentDir, { enabled: true, unsupported: true }); const model = createModel("openai", "gpt-test"); const completion = createCompletionFake(); const pi = createExtensionApiFake(); const ctx = createContextFake([model]); askLlm(pi, { completeSimple: completion.completeSimple }); await getAskCommand(pi).handler("Will not call", ctx); expect(completion.calls).toEqual([]); expect(ctx.notifications).toEqual([ { message: "[ask-llm] config contains unsupported keys", type: "warning", }, ]); }); }); test("reports unreadable custom system prompt without calling the model", async () => { // Purpose: ask-llm must reject unreadable custom prompt files before a provider request. // Input and expected output: an absolute missing prompt path produces one scoped warning and no completion request. // Edge case: an absolute path reaches prompt loading instead of config validation. // Dependencies: this test uses temp config, fake UI notifications, and fake completion observation. await withIsolatedAgentDir(async (agentDir) => { await writeConfig(agentDir, { enabled: true, systemPromptFile: join(agentDir, "missing-system.md"), }); const model = createModel("openai", "gpt-test"); const completion = createCompletionFake(); const pi = createExtensionApiFake(); const ctx = createContextFake([model]); askLlm(pi, { completeSimple: completion.completeSimple }); await getAskCommand(pi).handler("Will not call", ctx); expect(completion.calls).toEqual([]); expect(ctx.notifications).toEqual([ { message: expect.stringContaining( "[ask-llm] failed to read system prompt:", ), type: "warning", }, ]); }); }); test("reports empty custom system prompt without calling the model", async () => { // Purpose: ask-llm must reject empty custom prompt files before a provider request. // Input and expected output: whitespace-only prompt text produces one scoped warning and no completion request. // Edge case: whitespace is trimmed before the empty-prompt decision. // Dependencies: this test uses temp config, temp prompt file, fake UI notifications, and fake completion observation. await withIsolatedAgentDir(async (agentDir) => { const promptFile = join(agentDir, "empty-system.md"); await writeFile(promptFile, " "); await writeConfig(agentDir, { enabled: true, systemPromptFile: promptFile, }); const model = createModel("openai", "gpt-test"); const completion = createCompletionFake(); const pi = createExtensionApiFake(); const ctx = createContextFake([model]); askLlm(pi, { completeSimple: completion.completeSimple }); await getAskCommand(pi).handler("Will not call", ctx); expect(completion.calls).toEqual([]); expect(ctx.notifications).toEqual([ { message: "[ask-llm] system prompt must not be empty", type: "warning", }, ]); }); }); test("reports missing configured model without calling the model", async () => { // Purpose: ask-llm must resolve configured provider/model IDs through the pi model registry before a provider request. // Input and expected output: an unknown configured model produces one scoped warning and no completion request. // Edge case: the current model exists but must not be used when config names another model. // Dependencies: this test uses temp config, fake model registry, fake UI notifications, and fake completion observation. await withIsolatedAgentDir(async (agentDir) => { await writeConfig(agentDir, { enabled: true, model: { id: "anthropic/missing-model" }, }); const currentModel = createModel("openai", "gpt-test"); const completion = createCompletionFake(); const pi = createExtensionApiFake(); const ctx = createContextFake([currentModel]); askLlm(pi, { completeSimple: completion.completeSimple }); await getAskCommand(pi).handler("Will not call", ctx); expect(completion.calls).toEqual([]); expect(ctx.notifications).toEqual([ { message: "[ask-llm] model anthropic/missing-model was not found", type: "warning", }, ]); }); }); test("rejects input that exceeds the model context window", async () => { // Purpose: ask-llm must fail before provider execution when the exact provider input is too large. // Input and expected output: a tiny model context window rejects a normal ask request without a completion call. // Edge case: rejection happens after model resolution but before provider execution. // Dependencies: this test uses fake model registry, fake completion function, and fake UI notifications. await withIsolatedAgentDir(async () => { const model = { ...createModel("openai", "gpt-test"), contextWindow: 1 }; const completion = createCompletionFake(); const pi = createExtensionApiFake(); const ctx = createContextFake([model]); askLlm(pi, { completeSimple: completion.completeSimple }); await getAskCommand(pi).handler("Will not fit", ctx); expect(completion.calls).toEqual([]); expect(ctx.notifications).toEqual([ { message: "[ask-llm] ask-llm input exceeds model context window", type: "warning", }, ]); }); }); test("retries retryable provider failures before showing the answer", async () => { // Purpose: ask-llm must retry transient provider failures inside command execution. // Input and expected output: first provider call throws a network error, second call renders the recovered answer. // Edge case: zero retry delay keeps the test deterministic and proves maxRetries controls provider calls. // Dependencies: temp config, fake model registry, and fake completeSimple sequence. await withIsolatedAgentDir(async (agentDir) => { await writeConfig(agentDir, { enabled: true, retry: { enabled: true, maxRetries: 2, baseDelayMs: 0 }, }); const completion = createCompletionSequenceFake([ { kind: "throw", error: new Error("network error: fetch failed") }, { kind: "response", content: [{ type: "text", text: "answer after retry" }], }, ]); const pi = createExtensionApiFake(); const ctx = createContextFake([createModel("openai", "gpt-test")]); askLlm(pi, { completeSimple: completion.completeSimple }); await getAskCommand(pi).handler("Retry this", ctx); expect(completion.calls).toHaveLength(2); expect(ctx.renderedCustomOutputs.join("\n")).toContain( "answer after retry", ); }); }); test("retries retryable provider error responses before showing the answer", async () => { // Purpose: provider responses with stopReason error must use the same retry path as thrown transient errors. // Input and expected output: first response has retryable error metadata, second response renders visible text. // Edge case: completeSimple resolves successfully but the assistant response marks the provider call as failed. // Dependencies: temp config, fake model registry, and fake completeSimple sequence. await withIsolatedAgentDir(async (agentDir) => { await writeConfig(agentDir, { enabled: true, retry: { enabled: true, maxRetries: 2, baseDelayMs: 0 }, }); const completion = createCompletionSequenceFake([ { kind: "response", content: [], stopReason: "error", errorMessage: "provider returned error 503", }, { kind: "response", content: [{ type: "text", text: "answer after error retry" }], }, ]); const pi = createExtensionApiFake(); const ctx = createContextFake([createModel("openai", "gpt-test")]); askLlm(pi, { completeSimple: completion.completeSimple }); await getAskCommand(pi).handler("Retry this", ctx); expect(completion.calls).toHaveLength(2); expect(ctx.renderedCustomOutputs.join("\n")).toContain( "answer after error retry", ); }); }); test("does not retry aborted ask-llm requests", async () => { // Purpose: cancellation must stop ask-llm retry instead of starting another provider call. // Input and expected output: completeSimple throws AbortError and ask-llm reports one scoped warning. // Edge case: retry config allows retries, so abort classification is the only reason no retry happens. // Dependencies: temp config, fake model registry, and fake completeSimple sequence. await withIsolatedAgentDir(async (agentDir) => { await writeConfig(agentDir, { enabled: true, retry: { enabled: true, maxRetries: 2, baseDelayMs: 0 }, }); const abortError = new Error("user aborted ask-llm request"); abortError.name = "AbortError"; const completion = createCompletionSequenceFake([ { kind: "throw", error: abortError }, ]); const pi = createExtensionApiFake(); const ctx = createContextFake([createModel("openai", "gpt-test")]); askLlm(pi, { completeSimple: completion.completeSimple }); await getAskCommand(pi).handler("Abort this", ctx); expect(completion.calls).toHaveLength(1); expect(ctx.notifications).toEqual([ { message: "[ask-llm] Ask LLM request failed: user aborted ask-llm request", type: "warning", }, ]); }); }); test("rejects invalid ask-llm retry config", async () => { // Purpose: retry config is external JSON and must fail closed when numeric limits are invalid. // Input and expected output: negative maxRetries produces one scoped warning before provider calls. // Edge case: the config object uses only supported keys except the invalid retry field value. // Dependencies: temp config, fake model registry, fake completion function, and in-memory UI notifications. await withIsolatedAgentDir(async (agentDir) => { await writeConfig(agentDir, { enabled: true, retry: { enabled: true, maxRetries: -1, baseDelayMs: 0 }, }); const completion = createCompletionFake(); const pi = createExtensionApiFake(); const ctx = createContextFake([createModel("openai", "gpt-test")]); askLlm(pi, { completeSimple: completion.completeSimple }); await getAskCommand(pi).handler("Will not call", ctx); expect(completion.calls).toEqual([]); expect(ctx.notifications).toEqual([ { message: "[ask-llm] retry.maxRetries must be a non-negative integer", type: "warning", }, ]); }); }); test("reports unavailable model auth without calling the model", async () => { // Purpose: ask-llm must stop before provider calls when pi cannot provide model auth. // Input and expected output: model registry auth failure produces one scoped warning and no completion request. // Edge case: auth failure happens after model resolution but before prompt execution. // Dependencies: this test uses fake model auth, fake UI notifications, and fake completion observation. await withIsolatedAgentDir(async () => { const model = createModel("openai", "gpt-test"); const completion = createCompletionFake(); const pi = createExtensionApiFake(); const ctx = createContextFake([model], "Question from editor", [], { ok: false, error: "missing token", }); askLlm(pi, { completeSimple: completion.completeSimple }); await getAskCommand(pi).handler("Will not call", ctx); expect(completion.calls).toEqual([]); expect(ctx.notifications).toEqual([ { message: "[ask-llm] model auth unavailable: missing token", type: "warning", }, ]); }); }); test("reports provider error responses when retry is disabled", async () => { // Purpose: ask-llm must report provider-level error stop reasons when retry is disabled. // Input and expected output: a provider error response produces one scoped warning after one completion request. // Edge case: the provider error message is used when present. // Dependencies: this test uses temp config, fake model completion, and fake UI notifications. await withIsolatedAgentDir(async (agentDir) => { await writeConfig(agentDir, { enabled: true, retry: { enabled: false, maxRetries: 0, baseDelayMs: 0 }, }); const model = createModel("openai", "gpt-test"); const completion = createCompletionFake("Ignored answer", { stopReason: "error", errorMessage: "provider rejected request", }); const pi = createExtensionApiFake(); const ctx = createContextFake([model]); askLlm(pi, { completeSimple: completion.completeSimple }); await getAskCommand(pi).handler("Call provider", ctx); expect(completion.calls).toHaveLength(1); expect(ctx.notifications).toEqual([ { message: "[ask-llm] Ask LLM request failed: provider rejected request", type: "warning", }, ]); }); }); test("reports empty text responses", async () => { // Purpose: ask-llm must not show a blank answer when the provider response has no visible text. // Input and expected output: whitespace-only answer text produces one scoped warning after one completion request. // Edge case: response text is trimmed before the empty-response decision. // Dependencies: this test uses fake model completion and fake UI notifications. await withIsolatedAgentDir(async () => { const model = createModel("openai", "gpt-test"); const completion = createCompletionFake(" "); const pi = createExtensionApiFake(); const ctx = createContextFake([model]); askLlm(pi, { completeSimple: completion.completeSimple }); await getAskCommand(pi).handler("Call provider", ctx); expect(completion.calls).toHaveLength(1); expect(ctx.notifications).toEqual([ { message: "[ask-llm] model response did not contain text", type: "warning", }, ]); }); }); test("bundled default system prompt documents the user question tag", async () => { // Purpose: ask-llm must keep default prompt text in the extension prompt directory. // Input and expected output: the bundled prompt file is readable and documents the tag used by request-building logic. // Edge case: this test checks only prompt text that is part of the provider request contract. // Dependencies: this test reads only the package-owned default prompt file. const prompt = await readFile( join(import.meta.dir, "prompts", "system.md"), "utf8", ); expect(prompt).toContain(USER_QUESTION_OPEN_TAG); expect(prompt).toContain(USER_QUESTION_CLOSE_TAG); }); });