import { describe, expect, test } from "vitest"; import { Agent, createTool } from "./index.js"; import type { DataModelFromSchemaDefinition, ApiFromModules, ActionBuilder, MutationBuilder, } from "convex/server"; import { anyApi, actionGeneric, mutationGeneric } from "convex/server"; import { v } from "convex/values"; import { defineSchema } from "convex/server"; import { stepCountIs, type LanguageModelUsage } from "ai"; import { components, initConvexTest } from "./setup.test.js"; import { z } from "zod/v4"; import { mockModel } from "./mockModel.js"; import type { UsageHandler } from "./types.js"; const schema = defineSchema({}); type DataModel = DataModelFromSchemaDefinition; const action = actionGeneric as ActionBuilder; const mutation = mutationGeneric as MutationBuilder; // Tool that always requires approval const deleteFileTool = createTool({ description: "Delete a file", inputSchema: z.object({ filename: z.string() }), needsApproval: () => true, execute: async (_ctx, input) => `Deleted: ${input.filename}`, }); // Track usage handler calls to verify the full flow is exercised const usageCalls: LanguageModelUsage[] = []; const testUsageHandler: UsageHandler = async (_ctx, args) => { usageCalls.push(args.usage); }; function getApprovalIdFromSavedMessages( savedMessages: | Array<{ message?: { content: unknown }; }> | undefined, ): string { const approvalRequest = savedMessages ?.flatMap((savedMessage) => Array.isArray(savedMessage.message?.content) ? savedMessage.message.content : [], ) .find((part) => { const maybeApproval = part as { type?: unknown }; return maybeApproval.type === "tool-approval-request"; }) as { approvalId?: unknown } | undefined; if (typeof approvalRequest?.approvalId !== "string") { throw new Error("No approval request found in saved messages"); } return approvalRequest.approvalId; } // Second tool that also requires approval const renameFileTool = createTool({ description: "Rename a file", inputSchema: z.object({ oldName: z.string(), newName: z.string(), }), needsApproval: () => true, execute: async (_ctx, input) => `Renamed: ${input.oldName} → ${input.newName}`, }); // --- Agents (separate mock model instances to avoid shared callIndex) --- const approvalAgent = new Agent(components.agent, { name: "approval-test", instructions: "You delete files when asked.", tools: { deleteFile: deleteFileTool }, languageModel: mockModel({ contentSteps: [ // Step 1: model makes a tool call (LanguageModelV3 uses `input` as JSON string) [ { type: "tool-call", toolCallId: "tc-approve", toolName: "deleteFile", input: JSON.stringify({ filename: "test.txt" }), }, ], // Step 2: after tool execution, model responds with text [{ type: "text", text: "Done! I deleted test.txt." }], ], }), stopWhen: stepCountIs(5), usageHandler: testUsageHandler, }); const denialAgent = new Agent(components.agent, { name: "denial-test", instructions: "You delete files when asked.", tools: { deleteFile: deleteFileTool }, languageModel: mockModel({ contentSteps: [ [ { type: "tool-call", toolCallId: "tc-deny", toolName: "deleteFile", input: JSON.stringify({ filename: "secret.txt" }), }, ], [{ type: "text", text: "OK, I won't delete that file." }], ], }), stopWhen: stepCountIs(5), usageHandler: testUsageHandler, }); // --- Test helpers --- export const testApproveFlow = action({ args: {}, handler: async (ctx) => { const { thread } = await approvalAgent.createThread(ctx, { userId: "u1" }); // Step 1: Generate text — model returns tool call, SDK sees needsApproval → stops const result1 = await thread.generateText({ prompt: "Delete test.txt", }); const approvalId = getApprovalIdFromSavedMessages(result1.savedMessages); // Step 2: Approve the tool call const { messageId } = await ctx.runMutation( anyApi["approval.test"].submitApprovalForApprovalAgent, { threadId: thread.threadId, approvalId }, ); // Step 3: Continue generation — SDK executes tool, model responds const result2 = await thread.generateText({ promptMessageId: messageId, }); // Verify thread has all messages persisted const allMessages = await approvalAgent.listMessages(ctx, { threadId: thread.threadId, paginationOpts: { cursor: null, numItems: 20 }, }); return { approvalId, firstText: result1.text, secondText: result2.text, firstSavedCount: result1.savedMessages?.length ?? 0, secondSavedCount: result2.savedMessages?.length ?? 0, totalThreadMessages: allMessages.page.length, threadMessageRoles: allMessages.page.map((m) => m.message?.role), usageCallCount: usageCalls.length, // Verify usage data includes detail fields (AI SDK v6) lastUsage: usageCalls.at(-1), }; }, }); export const testDenyFlow = action({ args: {}, handler: async (ctx) => { const { thread } = await denialAgent.createThread(ctx, { userId: "u2" }); // Step 1: Generate — model returns tool call, approval requested const result1 = await thread.generateText({ prompt: "Delete secret.txt", }); const approvalId = getApprovalIdFromSavedMessages(result1.savedMessages); // Step 2: Deny the tool call const { messageId } = await ctx.runMutation( anyApi["approval.test"].submitDenialForDenialAgent, { threadId: thread.threadId, approvalId, reason: "This file is important", }, ); // Step 3: Continue generation — SDK creates execution-denied, model responds const result2 = await thread.generateText({ promptMessageId: messageId, }); // Verify thread state const allMessages = await denialAgent.listMessages(ctx, { threadId: thread.threadId, paginationOpts: { cursor: null, numItems: 20 }, }); return { approvalId, firstText: result1.text, secondText: result2.text, totalThreadMessages: allMessages.page.length, threadMessageRoles: allMessages.page.map((m) => m.message?.role), usageCallCount: usageCalls.length, lastUsage: usageCalls.at(-1), }; }, }); export const testApproveFlowWithInterveningMessage = action({ args: {}, handler: async (ctx) => { const { thread } = await approvalAgent.createThread(ctx, { userId: "u3" }); const result1 = await thread.generateText({ prompt: "Delete test.txt", }); const approvalId = getApprovalIdFromSavedMessages(result1.savedMessages); const approvalRequest = ( await approvalAgent.listMessages(ctx, { threadId: thread.threadId, paginationOpts: { cursor: null, numItems: 20 }, }) ).page.find((m) => { const content = m.message?.content; return ( Array.isArray(content) && content.some( (p) => p.type === "tool-approval-request" && p.approvalId === approvalId, ) ); }); if (!approvalRequest) { throw new Error("Approval request message not found"); } const intervening = await approvalAgent.saveMessage(ctx, { threadId: thread.threadId, prompt: "Intervening user message", skipEmbeddings: true, }); const { messageId } = await ctx.runMutation( anyApi["approval.test"].submitApprovalForApprovalAgent, { threadId: thread.threadId, approvalId }, ); const result2 = await thread.generateText({ promptMessageId: messageId, }); const allMessages = await approvalAgent.listMessages(ctx, { threadId: thread.threadId, paginationOpts: { cursor: null, numItems: 40 }, }); const approvalResponse = allMessages.page.find((m) => m._id === messageId); if (!approvalResponse) { throw new Error("Saved approval response message not found"); } return { secondText: result2.text, approvalResponseOrder: approvalResponse.order, approvalRequestId: approvalRequest._id, approvalRequestOrder: approvalRequest.order, interveningOrder: intervening.message.order, }; }, }); // Agent that calls two tools in one step, both needing approval const multiToolAgent = new Agent(components.agent, { name: "multi-tool-test", instructions: "You manage files.", tools: { deleteFile: deleteFileTool, renameFile: renameFileTool }, languageModel: mockModel({ contentSteps: [ // Step 1: model calls two tools at once [ { type: "tool-call", toolCallId: "tc-multi-1", toolName: "deleteFile", input: JSON.stringify({ filename: "old.txt" }), }, { type: "tool-call", toolCallId: "tc-multi-2", toolName: "renameFile", input: JSON.stringify({ oldName: "a.txt", newName: "b.txt" }), }, ], // Step 2: after both tools execute, model responds [{ type: "text", text: "Done! Deleted old.txt and renamed a.txt to b.txt." }], ], }), stopWhen: stepCountIs(5), usageHandler: testUsageHandler, }); export const testMultiToolApproveFlow = action({ args: {}, handler: async (ctx) => { const { thread } = await multiToolAgent.createThread(ctx, { userId: "u-multi", }); // Step 1: Generate — model calls two tools, both need approval const result1 = await thread.generateText({ prompt: "Delete old.txt and rename a.txt to b.txt", }); // Extract both approval IDs const approvalParts = result1.savedMessages ?.flatMap((m) => Array.isArray(m.message?.content) ? (m.message.content as unknown[]) : [], ) .filter( ( p, ): p is { type: "tool-approval-request"; approvalId: string; toolCallId: string; } => (p as { type?: string }).type === "tool-approval-request", ); if (!approvalParts || approvalParts.length !== 2) { throw new Error( `Expected 2 approval requests, got ${approvalParts?.length ?? 0}`, ); } // Approve both tool calls const { messageId: _msgId1 } = await ctx.runMutation( anyApi["approval.test"].submitApprovalForMultiToolAgent, { threadId: thread.threadId, approvalId: approvalParts[0].approvalId }, ); const { messageId: msgId2 } = await ctx.runMutation( anyApi["approval.test"].submitApprovalForMultiToolAgent, { threadId: thread.threadId, approvalId: approvalParts[1].approvalId }, ); // Continue generation with the last approval message const result2 = await thread.generateText({ promptMessageId: msgId2, }); const allMessages = await multiToolAgent.listMessages(ctx, { threadId: thread.threadId, paginationOpts: { cursor: null, numItems: 40 }, }); return { approvalCount: approvalParts.length, firstText: result1.text, secondText: result2.text, threadMessageRoles: allMessages.page.map((m) => m.message?.role), // Check that both approvals were merged into one tool message toolMessageCount: allMessages.page.filter( (m) => m.message?.role === "tool", ).length, }; }, }); export const submitApprovalForMultiToolAgent = mutation({ args: { threadId: v.string(), approvalId: v.string(), }, handler: async (ctx, { threadId, approvalId }) => { return multiToolAgent.approveToolCall(ctx, { threadId, approvalId }); }, }); export const submitApprovalForApprovalAgent = mutation({ args: { threadId: v.string(), approvalId: v.string(), reason: v.optional(v.string()) }, handler: async (ctx, { threadId, approvalId, reason }) => { return approvalAgent.approveToolCall(ctx, { threadId, approvalId, reason }); }, }); export const submitDenialForDenialAgent = mutation({ args: { threadId: v.string(), approvalId: v.string(), reason: v.optional(v.string()) }, handler: async (ctx, { threadId, approvalId, reason }) => { return denialAgent.denyToolCall(ctx, { threadId, approvalId, reason }); }, }); const testApi: ApiFromModules<{ fns: { testApproveFlow: typeof testApproveFlow; testDenyFlow: typeof testDenyFlow; testApproveFlowWithInterveningMessage: typeof testApproveFlowWithInterveningMessage; testMultiToolApproveFlow: typeof testMultiToolApproveFlow; submitApprovalForApprovalAgent: typeof submitApprovalForApprovalAgent; submitApprovalForMultiToolAgent: typeof submitApprovalForMultiToolAgent; submitDenialForDenialAgent: typeof submitDenialForDenialAgent; }; }>["fns"] = anyApi["approval.test"] as any; describe("Tool Approval Workflow", () => { test("approve: generate → approval request → approve → tool executes → final text", async () => { usageCalls.length = 0; const t = initConvexTest(schema); const result = await t.action(testApi.testApproveFlow, {}); expect(result.approvalId).toBeDefined(); // First call produces no text (just a tool call) expect(result.firstText).toBe(""); // Second call produces the final text expect(result.secondText).toBe("Done! I deleted test.txt."); // First call: user message + assistant (tool-call + approval-request) expect(result.firstSavedCount).toBeGreaterThanOrEqual(2); // Second call: tool-result + assistant text expect(result.secondSavedCount).toBeGreaterThanOrEqual(1); // Thread should have (ascending): user, assistant(tool-call+approval), // tool(approval-response), tool(tool-result), assistant(text) // listMessages returns descending order: expect(result.threadMessageRoles).toEqual([ "assistant", // final text "tool", // tool-result "tool", // approval-response "assistant", // tool-call + approval-request "user", // prompt ]); // Usage handler should be called for each generateText call expect(result.usageCallCount).toBeGreaterThanOrEqual(2); // Usage data should include AI SDK v6 detail fields expect(result.lastUsage).toBeDefined(); expect(result.lastUsage!.inputTokenDetails).toBeDefined(); expect(result.lastUsage!.outputTokenDetails).toBeDefined(); }); test("deny: generate → approval request → deny → model acknowledges denial", async () => { usageCalls.length = 0; const t = initConvexTest(schema); const result = await t.action(testApi.testDenyFlow, {}); expect(result.approvalId).toBeDefined(); expect(result.firstText).toBe(""); expect(result.secondText).toBe("OK, I won't delete that file."); // Same message ordering as approve flow: // user, assistant(tool-call+approval), tool(denial-response), // tool(execution-denied result), assistant(text) expect(result.threadMessageRoles).toEqual([ "assistant", "tool", "tool", "assistant", "user", ]); // Usage handler exercised expect(result.usageCallCount).toBeGreaterThanOrEqual(2); expect(result.lastUsage!.inputTokenDetails).toBeDefined(); expect(result.lastUsage!.outputTokenDetails).toBeDefined(); }); test("multi-tool: approve two tool calls from the same step", async () => { usageCalls.length = 0; const t = initConvexTest(schema); const result = await t.action(testApi.testMultiToolApproveFlow, {}); expect(result.approvalCount).toBe(2); expect(result.firstText).toBe(""); expect(result.secondText).toBe( "Done! Deleted old.txt and renamed a.txt to b.txt.", ); // Both approval responses should be merged into one tool message // (write-time merge in respondToToolCallApproval via findApprovalContext) // Thread: user, assistant(2 tool-calls + 2 approvals), // tool(2 approval-responses merged), tool(2 tool-results), assistant(text) expect(result.threadMessageRoles).toEqual([ "assistant", // final text "tool", // tool-results "tool", // approval-responses (merged) "assistant", // tool-calls + approval-requests "user", // prompt ]); }); test("approve remains valid with an intervening thread message", async () => { usageCalls.length = 0; const t = initConvexTest(schema); const result = await t.action(testApi.testApproveFlowWithInterveningMessage, {}); expect(result.secondText).toBe("Done! I deleted test.txt."); expect(result.approvalResponseOrder).toBe(result.approvalRequestOrder); expect(result.interveningOrder).toBeGreaterThan(result.approvalResponseOrder); }); });