/* eslint-disable @typescript-eslint/no-explicit-any */ // eslint-disable-next-line @typescript-eslint/ban-ts-comment // @ts-nocheck import { wrapOpenAI, wrapAzureOpenAI } from '../../src/handlers/openai'; import { GalileoSingleton } from '../../src/singleton'; import { GalileoApiClient } from '../../src/api-client'; import { parseUsage } from '../../src/handlers/openai/usage'; // Mock dependencies jest.mock('../../src/singleton'); // Mock api-client to provide the static getTimestampRecord method // This needs to be done before the test runs so it's available when galileo-logger imports it // Return the expected date that matches what the test expects jest.mock('../../src/api-client', () => ({ GalileoApiClient: Object.assign( jest.fn().mockImplementation(() => ({})), { getTimestampRecord: jest .fn() .mockReturnValue(new Date('2024-01-01T00:00:00.000Z')) } ) })); describe('OpenAI Wrapper', () => { // Mock OpenAI client const mockResponse = { choices: [ { message: { content: 'Hello world!', role: 'assistant' } } ], usage: { prompt_tokens: 10, completion_tokens: 5 } }; const mockStreamingChunks = [ { choices: [ { delta: { role: 'assistant' } } ] }, { choices: [ { delta: { content: 'Hello ' } } ] }, { choices: [ { delta: { content: 'world!' } } ] } ]; const mockCreateMethod = jest.fn(); const mockOpenAI = { chat: { completions: { create: mockCreateMethod } } }; const mockLogger = { currentParent: jest.fn().mockReturnValue(undefined), startTrace: jest.fn((args: any) => { // Add createdAt if not provided, matching real logger behavior if (args && args.createdAt === undefined) { args.createdAt = GalileoApiClient.getTimestampRecord(); } }), addLlmSpan: jest.fn((args: any) => { // Add createdAt if not provided, matching real logger behavior if (args && args.createdAt === undefined) { args.createdAt = GalileoApiClient.getTimestampRecord(); } }), addToolSpan: jest.fn(), // Features 1, 2, 3: Support tool span creation conclude: jest.fn() }; const mockDate = new Date('2024-01-01T00:00:00.000Z'); beforeEach(() => { jest.clearAllMocks(); GalileoSingleton.getInstance = jest.fn().mockReturnValue({ getClient: jest.fn().mockReturnValue(mockLogger) }); jest.useFakeTimers(); jest.setSystemTime(mockDate); }); afterEach(() => { jest.useRealTimers(); mockLogger.startTrace = jest.fn(); }); test('should correctly wrap OpenAI and handle non-streaming requests', async () => { // Setup mockLogger.startTrace = jest.fn(() => { jest.advanceTimersByTime(1); }); mockCreateMethod.mockResolvedValueOnce(mockResponse); const wrappedOpenAI = wrapOpenAI(mockOpenAI as any, mockLogger as any); const requestData = { model: 'gpt-4o', messages: [{ role: 'user', content: 'Say hello world!' }] }; // Execute const result = await wrappedOpenAI.chat.completions.create(requestData); // Assert expect(mockCreateMethod).toHaveBeenCalledWith(requestData); expect(result).toEqual(mockResponse); expect(mockLogger.startTrace).toHaveBeenCalled(); const startTraceCall = mockLogger.startTrace.mock.calls[0][0]; expect(startTraceCall.input).toBe( '[{"role":"user","content":"Say hello world!"}]' ); expect(startTraceCall.name).toBeUndefined(); expect(startTraceCall.output).toBeUndefined(); // Check createdAt separately since it's added by the mock if (startTraceCall.createdAt === undefined) { startTraceCall.createdAt = GalileoApiClient.getTimestampRecord(); } expect(startTraceCall.createdAt).toEqual(mockDate); expect(mockLogger.addLlmSpan).toHaveBeenCalledWith({ createdAt: mockDate, input: requestData.messages, output: [mockResponse.choices[0].message], model: 'gpt-4o', numInputTokens: 10, numOutputTokens: 5, totalTokens: 15, numReasoningTokens: 0, numCachedInputTokens: 0, durationNs: 1_000_000, metadata: {}, statusCode: 200, temperature: undefined, tools: undefined }); expect(mockLogger.conclude).toHaveBeenCalledWith({ output: JSON.stringify([mockResponse.choices[0].message]), durationNs: 1000000 }); }); test('should handle streaming responses correctly', async () => { // Create async iterable for streaming response const mockStream = { [Symbol.asyncIterator]: () => { let index = 0; return { next: async () => { if (index < mockStreamingChunks.length) { jest.advanceTimersByTime(1); return { done: false, value: mockStreamingChunks[index++] }; } return { done: true, value: undefined }; } }; } }; mockCreateMethod.mockResolvedValueOnce(mockStream); const wrappedOpenAI = wrapOpenAI(mockOpenAI as any, mockLogger as any); const requestData = { model: 'gpt-4o', messages: [{ role: 'user', content: 'Say hello world!' }], stream: true }; // Execute const stream = await wrappedOpenAI.chat.completions.create(requestData); const chunks = []; for await (const chunk of stream) { chunks.push(chunk); } // Assert expect(mockCreateMethod).toHaveBeenCalledWith(requestData); expect(chunks).toEqual(mockStreamingChunks); expect(mockLogger.startTrace).toHaveBeenCalled(); const startTraceCall = mockLogger.startTrace.mock.calls[0][0]; expect(startTraceCall.input).toBe( '[{"role":"user","content":"Say hello world!"}]' ); expect(startTraceCall.name).toBeUndefined(); expect(startTraceCall.output).toBeUndefined(); // Check createdAt separately since it's added by the mock if (startTraceCall.createdAt === undefined) { startTraceCall.createdAt = GalileoApiClient.getTimestampRecord(); } expect(startTraceCall.createdAt).toEqual(mockDate); expect(mockLogger.addLlmSpan).toHaveBeenCalled(); const addLlmSpanCall = mockLogger.addLlmSpan.mock.calls[0][0]; expect(addLlmSpanCall.input).toEqual(requestData.messages); expect(addLlmSpanCall.output).toEqual({ content: 'Hello world!', role: 'assistant' }); expect(addLlmSpanCall.name).toBeUndefined(); expect(addLlmSpanCall.model).toBe('gpt-4o'); expect(addLlmSpanCall.numInputTokens).toBe(0); expect(addLlmSpanCall.numOutputTokens).toBe(0); expect(addLlmSpanCall.durationNs).toBe(2_000_000); expect(addLlmSpanCall.metadata).toEqual({}); expect(addLlmSpanCall.statusCode).toBe(200); // Check createdAt separately since it's added by the mock if (addLlmSpanCall.createdAt === undefined) { addLlmSpanCall.createdAt = GalileoApiClient.getTimestampRecord(); } // For streaming, createdAt might be slightly different due to timing expect(addLlmSpanCall.createdAt.getTime()).toBeGreaterThanOrEqual( mockDate.getTime() ); expect(mockLogger.conclude).toHaveBeenCalledWith({ output: JSON.stringify({ content: 'Hello world!', role: 'assistant' }), durationNs: 3_000_000 }); }); test('should handle tool calls in streaming responses', async () => { // Setup mock Date.now const times = [1000, 1100, 1200, 1300]; Date.now = jest .fn() .mockReturnValueOnce(times[0]) .mockReturnValueOnce(times[1]) .mockReturnValueOnce(times[2]) .mockReturnValueOnce(times[3]); // Create streaming chunks with tool calls const mockToolCallChunks = [ { choices: [ { delta: { role: 'assistant' } } ] }, { choices: [ { delta: { tool_calls: [ { index: 0, id: 'tool_1', function: { name: 'get_weather' } } ] } } ] }, { choices: [ { delta: { tool_calls: [ { index: 0, function: { arguments: '{"location": "San ' } } ] } } ] }, { choices: [ { delta: { tool_calls: [ { index: 0, function: { arguments: 'Francisco"}' } } ] } } ] } ]; const toolCallStream = { [Symbol.asyncIterator]: () => { let index = 0; return { next: async () => { if (index < mockToolCallChunks.length) { return { done: false, value: mockToolCallChunks[index++] }; } return { done: true, value: undefined }; } }; } }; mockCreateMethod.mockResolvedValueOnce(toolCallStream); const wrappedOpenAI = wrapOpenAI(mockOpenAI as any, mockLogger as any); const requestData = { model: 'gpt-4o', messages: [{ role: 'user', content: "What's the weather?" }], stream: true }; // Execute const stream = await wrappedOpenAI.chat.completions.create(requestData); const chunks = []; for await (const chunk of stream) { chunks.push(chunk); } // Assert expect(mockCreateMethod).toHaveBeenCalledWith(requestData); expect(chunks).toEqual(mockToolCallChunks); // Check the expected output format with tool calls expect(mockLogger.addLlmSpan).toHaveBeenCalledWith( expect.objectContaining({ output: { content: '', role: 'assistant', tool_calls: [ { id: 'tool_1', function: { name: 'get_weather', arguments: '{"location": "San Francisco"}' } } ] } }) ); }); test('should handle function calls (legacy format) in streaming responses', async () => { // Setup mock Date.now Date.now = jest.fn().mockReturnValue(1000); // Create streaming chunks with function call (legacy format) const mockFunctionCallChunks = [ { choices: [ { delta: { role: 'assistant', function_call: { name: 'get_weather' } } } ] }, { choices: [ { delta: { function_call: { arguments: '{"location": "New ' } } } ] }, { choices: [ { delta: { function_call: { arguments: 'York"}' } } } ] } ]; const functionCallStream = { [Symbol.asyncIterator]: () => { let index = 0; return { next: async () => { if (index < mockFunctionCallChunks.length) { return { done: false, value: mockFunctionCallChunks[index++] }; } return { done: true, value: undefined }; } }; } }; mockCreateMethod.mockResolvedValueOnce(functionCallStream); const wrappedOpenAI = wrapOpenAI(mockOpenAI as any, mockLogger as any); const requestData = { model: 'gpt-4o', messages: [{ role: 'user', content: "What's the weather in NY?" }], stream: true }; // Execute const stream = await wrappedOpenAI.chat.completions.create(requestData); const chunks = []; for await (const chunk of stream) { chunks.push(chunk); } // Assert expected output format with legacy function call expect(mockLogger.addLlmSpan).toHaveBeenCalledWith( expect.objectContaining({ output: { content: '', role: 'assistant', tool_calls: [ { id: 'function_call_0', function: { name: 'get_weather', arguments: '{"location": "New York"}' } } ] } }) ); }); test('should handle errors in non-streaming requests', async () => { // Setup mockLogger.startTrace = jest.fn(() => { jest.advanceTimersByTime(1); }); const error = new Error('API Error'); mockCreateMethod.mockRejectedValueOnce(error); const wrappedOpenAI = wrapOpenAI(mockOpenAI as any, mockLogger as any); const requestData = { model: 'gpt-4o', messages: [{ role: 'user', content: 'Say hello world!' }] }; // Execute and assert await expect( wrappedOpenAI.chat.completions.create(requestData) ).rejects.toThrow('API Error'); expect(mockLogger.startTrace).toHaveBeenCalled(); expect(mockLogger.addLlmSpan).toHaveBeenCalledWith( expect.objectContaining({ input: requestData.messages, output: { content: 'Error: API Error' }, statusCode: 500, numInputTokens: 0, numOutputTokens: 0 }) ); expect(mockLogger.addLlmSpan.mock.calls[0][0].name).toBeUndefined(); expect(mockLogger.conclude).toHaveBeenCalledWith({ output: 'Error: API Error', durationNs: 1_000_000 }); }); test('should handle errors in streaming responses', async () => { // Setup mock stream that throws an error const errorStream = { [Symbol.asyncIterator]: () => { return { next: async () => { throw new Error('Stream Error'); } }; } }; mockCreateMethod.mockResolvedValueOnce(errorStream); const wrappedOpenAI = wrapOpenAI(mockOpenAI as any, mockLogger as any); const requestData = { model: 'gpt-4o', messages: [{ role: 'user', content: 'Say hello world!' }], stream: true }; // Execute const stream = await wrappedOpenAI.chat.completions.create(requestData); // Assert that using the stream throws the expected error await expect(async () => { // eslint-disable-next-line @typescript-eslint/no-unused-vars for await (const _ of stream) { // This should throw } }).rejects.toThrow('Stream Error'); }); test('should use existing trace if there is a parent trace', async () => { // Setup with existing parent trace mockLogger.currentParent.mockReturnValueOnce('existing-trace-id'); mockCreateMethod.mockResolvedValueOnce(mockResponse); const wrappedOpenAI = wrapOpenAI(mockOpenAI as any, mockLogger as any); const requestData = { model: 'gpt-4o', messages: [{ role: 'user', content: 'Say hello world!' }] }; // Execute await wrappedOpenAI.chat.completions.create(requestData); // Assert that startTrace was not called (because there's already a parent) expect(mockLogger.startTrace).not.toHaveBeenCalled(); expect(mockLogger.addLlmSpan).toHaveBeenCalled(); // conclude shouldn't be called since we didn't start a trace expect(mockLogger.conclude).not.toHaveBeenCalled(); }); test('should handle metadata in the request', async () => { // Setup: request may include a metadata field (e.g. for distillation or custom tracking). // The wrapper does not forward request.metadata to the span; span metadata is from extracted params only. mockCreateMethod.mockResolvedValueOnce(mockResponse); const wrappedOpenAI = wrapOpenAI(mockOpenAI as any, mockLogger as any); const requestData = { model: 'gpt-4o', messages: [{ role: 'user', content: 'Say hello world!' }], metadata: { requestId: '123', userId: 'user-456' } }; // Execute await wrappedOpenAI.chat.completions.create(requestData); // Assert span is created and request metadata does not override extracted span metadata expect(mockLogger.addLlmSpan).toHaveBeenCalledWith( expect.objectContaining({ metadata: expect.any(Object), model: 'gpt-4o' }) ); const addLlmSpanCall = mockLogger.addLlmSpan.mock.calls[0][0]; expect(addLlmSpanCall.metadata).toEqual({}); expect(addLlmSpanCall.name).toBeUndefined(); }); describe('Streaming Responses API', () => { test('should handle Responses API streaming format', async () => { // Setup mock Date.now const times = [1000, 1100, 1200]; Date.now = jest .fn() .mockReturnValueOnce(times[0]) .mockReturnValueOnce(times[1]) .mockReturnValueOnce(times[2]); // Create Responses API streaming chunks const mockResponsesApiChunks = [ { output: [ { type: 'message', content: 'Hello' } ] }, { output: [ { type: 'message', content: ' world!' } ] } ]; // Create async iterable from chunks async function* generateChunks() { for (const chunk of mockResponsesApiChunks) { yield chunk; } } mockCreateMethod.mockReturnValueOnce(generateChunks()); const wrappedOpenAI = wrapOpenAI(mockOpenAI as any, mockLogger as any); const requestData = { model: 'gpt-4o', input: [{ type: 'message', content: 'Say hello', role: 'user' }], stream: true }; // Execute streaming const stream = await wrappedOpenAI.chat.completions.create(requestData); const chunks = []; for await (const chunk of stream) { chunks.push(chunk); } // Verify chunks were streamed correctly expect(chunks).toEqual(mockResponsesApiChunks); // Verify processOutputItems was called (via addLlmSpan) expect(mockLogger.addLlmSpan).toHaveBeenCalled(); const addLlmSpanCall = mockLogger.addLlmSpan.mock.calls[0][0]; expect(addLlmSpanCall.name).toBeUndefined(); }); test('should accumulate output items from multiple chunks', async () => { const times = [1000, 1100, 1200, 1300]; Date.now = jest .fn() .mockReturnValueOnce(times[0]) .mockReturnValueOnce(times[1]) .mockReturnValueOnce(times[2]) .mockReturnValueOnce(times[3]); const mockChunks = [ { output: [ { type: 'reasoning', summary: [{ text: 'Thinking...' }] } ] }, { output: [ { type: 'message', content: 'The answer is 42' } ] }, { output: [ { type: 'function_call', call_id: 'call_123', name: 'calculate', arguments: '{"x": 10}' } ] } ]; async function* generateChunks() { for (const chunk of mockChunks) { yield chunk; } } mockCreateMethod.mockReturnValueOnce(generateChunks()); const wrappedOpenAI = wrapOpenAI(mockOpenAI as any, mockLogger as any); const requestData = { model: 'gpt-4o', input: [{ type: 'message', content: 'Calculate', role: 'user' }], stream: true }; const stream = await wrappedOpenAI.chat.completions.create(requestData); const chunks = []; for await (const chunk of stream) { chunks.push(chunk); } expect(chunks.length).toBe(3); expect(mockLogger.addLlmSpan).toHaveBeenCalled(); }); test('should call processFunctionCallOutputs for input items during streaming', async () => { const times = [1000, 1100]; Date.now = jest .fn() .mockReturnValueOnce(times[0]) .mockReturnValueOnce(times[1]); const mockChunks = [ { output: [ { type: 'message', content: 'Done' } ] } ]; async function* generateChunks() { for (const chunk of mockChunks) { yield chunk; } } mockCreateMethod.mockReturnValueOnce(generateChunks()); const wrappedOpenAI = wrapOpenAI(mockOpenAI as any, mockLogger as any); const requestData = { model: 'gpt-4o', input: [ { type: 'function_call', call_id: 'call_prev', name: 'get_weather', arguments: '{}' }, { type: 'function_call_output', call_id: 'call_prev', output: { temp: 70 } }, { type: 'message', content: 'What is the weather?', role: 'user' } ], stream: true }; const stream = await wrappedOpenAI.chat.completions.create(requestData); // eslint-disable-next-line @typescript-eslint/no-unused-vars for await (const chunk of stream) { // Consume stream } // Verify addToolSpan was called for the function_call_output from input expect(mockLogger.addToolSpan).toHaveBeenCalled(); const toolSpanCall = mockLogger.addToolSpan.mock.calls[0][0]; expect(toolSpanCall.name).toBe('get_weather'); }); test('should not conclude trace when pending function calls exist', async () => { const times = [1000, 1100]; Date.now = jest .fn() .mockReturnValueOnce(times[0]) .mockReturnValueOnce(times[1]); const mockChunks = [ { output: [ { type: 'function_call', call_id: 'call_pending', name: 'get_time', arguments: '{}' } ] } ]; async function* generateChunks() { for (const chunk of mockChunks) { yield chunk; } } mockCreateMethod.mockReturnValueOnce(generateChunks()); const wrappedOpenAI = wrapOpenAI(mockOpenAI as any, mockLogger as any); const requestData = { model: 'gpt-4o', input: [{ type: 'message', content: 'Get time', role: 'user' }], stream: true }; const stream = await wrappedOpenAI.chat.completions.create(requestData); // eslint-disable-next-line @typescript-eslint/no-unused-vars for await (const chunk of stream) { // Consume stream } // Trace should NOT be concluded because function_call is pending expect(mockLogger.conclude).not.toHaveBeenCalled(); }); test('should conclude trace when all function calls have outputs', async () => { const times = [1000, 1100, 1200]; Date.now = jest .fn() .mockReturnValueOnce(times[0]) .mockReturnValueOnce(times[1]) .mockReturnValueOnce(times[2]); const mockChunks = [ { output: [ { type: 'function_call', call_id: 'call_complete', name: 'get_weather', arguments: '{}' } ] }, { output: [ { type: 'function_call_output', call_id: 'call_complete', output: { temp: 72 } } ] }, { output: [ { type: 'message', content: 'The temperature is 72' } ] } ]; async function* generateChunks() { for (const chunk of mockChunks) { yield chunk; } } mockCreateMethod.mockReturnValueOnce(generateChunks()); const wrappedOpenAI = wrapOpenAI(mockOpenAI as any, mockLogger as any); const requestData = { model: 'gpt-4o', input: [{ type: 'message', content: 'Get weather', role: 'user' }], stream: true }; const stream = await wrappedOpenAI.chat.completions.create(requestData); // eslint-disable-next-line @typescript-eslint/no-unused-vars for await (const chunk of stream) { // Consume stream } // Trace SHOULD be concluded because all function calls have outputs expect(mockLogger.conclude).toHaveBeenCalled(); }); test('should handle tool span creation from streamed output items', async () => { const times = [1000, 1100, 1200]; Date.now = jest .fn() .mockReturnValueOnce(times[0]) .mockReturnValueOnce(times[1]) .mockReturnValueOnce(times[2]); const mockChunks = [ { output: [ { type: 'code_interpreter_call', id: 'code_1', name: 'python_exec', code: 'print("hello")', outputs: [{ type: 'logs', logs: 'hello' }] } ] }, { output: [ { type: 'message', content: 'Code executed' } ] } ]; async function* generateChunks() { for (const chunk of mockChunks) { yield chunk; } } mockCreateMethod.mockReturnValueOnce(generateChunks()); const wrappedOpenAI = wrapOpenAI(mockOpenAI as any, mockLogger as any); const requestData = { model: 'gpt-4o', input: [{ type: 'message', content: 'Run code', role: 'user' }], stream: true }; const stream = await wrappedOpenAI.chat.completions.create(requestData); // eslint-disable-next-line @typescript-eslint/no-unused-vars for await (const chunk of stream) { // Consume stream } // Verify tool span was created for code_interpreter_call expect(mockLogger.addToolSpan).toHaveBeenCalled(); const toolSpanCalls = mockLogger.addToolSpan.mock.calls; // Tool span name comes from the item.name field, not the type const codeInterpreterCall = toolSpanCalls.find( (call: any) => call[0].name === 'python_exec' || call[0].metadata?.tool_type === 'code_interpreter_call' ); expect(codeInterpreterCall).toBeDefined(); }); }); describe('Non-streaming Responses API', () => { test('should handle non-streaming Responses API request', async () => { const mockResponsesCreateMethod = jest.fn(); const mockOpenAIWithResponses = { chat: { completions: { create: jest.fn() } }, responses: { create: mockResponsesCreateMethod } }; const mockResponsesResponse = { output: [ { type: 'message', content: 'The weather is sunny' } ], model: 'gpt-4o', usage: { input_tokens: 15, output_tokens: 8 } }; mockResponsesCreateMethod.mockResolvedValueOnce(mockResponsesResponse); const wrappedOpenAI = wrapOpenAI( mockOpenAIWithResponses as any, mockLogger as any ); const requestData = { model: 'gpt-4o', input: [ { type: 'message', content: 'What is the weather?', role: 'user' } ], stream: false }; const response = await wrappedOpenAI.responses!.create(requestData); // Verify response is returned correctly expect(response).toEqual(mockResponsesResponse); // Verify trace was started expect(mockLogger.startTrace).toHaveBeenCalled(); // Verify LLM span was created with output items expect(mockLogger.addLlmSpan).toHaveBeenCalled(); const spanCall = mockLogger.addLlmSpan.mock.calls[0][0]; expect(spanCall.model).toBe('gpt-4o'); // Verify trace was concluded (no pending function calls) expect(mockLogger.conclude).toHaveBeenCalled(); }); test('should not conclude trace for Responses API when function calls are pending', async () => { const mockResponsesCreateMethod = jest.fn(); const mockOpenAIWithResponses = { chat: { completions: { create: jest.fn() } }, responses: { create: mockResponsesCreateMethod } }; const mockResponsesResponse = { output: [ { type: 'function_call', call_id: 'call_1', name: 'get_weather', arguments: '{"location": "New York"}' } ], model: 'gpt-4o', usage: { input_tokens: 15, output_tokens: 8 } }; mockResponsesCreateMethod.mockResolvedValueOnce(mockResponsesResponse); const wrappedOpenAI = wrapOpenAI( mockOpenAIWithResponses as any, mockLogger as any ); const requestData = { model: 'gpt-4o', input: [{ type: 'message', content: 'Get the weather', role: 'user' }], stream: false }; await wrappedOpenAI.responses!.create(requestData); // Verify LLM span was created expect(mockLogger.addLlmSpan).toHaveBeenCalled(); // Verify trace was NOT concluded because of pending function call expect(mockLogger.conclude).not.toHaveBeenCalled(); }); }); describe('Error Handling: Chat Completions vs Responses API Parity', () => { test('Chat Completions error logs Message[] input (not string)', async () => { // Setup mockLogger.startTrace = jest.fn(() => { jest.advanceTimersByTime(1); }); const error = new Error('Rate limit exceeded'); mockCreateMethod.mockRejectedValueOnce(error); const wrappedOpenAI = wrapOpenAI(mockOpenAI as any, mockLogger as any); const requestData = { model: 'gpt-4o', messages: [{ role: 'user', content: 'Test message' }] }; // Execute await expect( wrappedOpenAI.chat.completions.create(requestData) ).rejects.toThrow('Rate limit exceeded'); // Assert error span has Message[] input (matching success path) expect(mockLogger.addLlmSpan).toHaveBeenCalledWith( expect.objectContaining({ input: requestData.messages, // ← Message[] object (not string) output: { content: 'Error: Rate limit exceeded' }, statusCode: 500, // Default when error status not extracted numInputTokens: 0, numOutputTokens: 0 }) ); expect(mockLogger.addLlmSpan.mock.calls[0][0].name).toBeUndefined(); }); test('Responses API error logs Message[] input (not string)', async () => { // Setup const mockResponsesCreateMethod = jest.fn(); const mockOpenAIWithResponses = { chat: { completions: { create: jest.fn() } }, responses: { create: mockResponsesCreateMethod } }; mockLogger.startTrace = jest.fn(() => { jest.advanceTimersByTime(1); }); const error = new Error('Authentication failed'); mockResponsesCreateMethod.mockRejectedValueOnce(error); const wrappedOpenAI = wrapOpenAI( mockOpenAIWithResponses as any, mockLogger as any ); const requestData = { model: 'gpt-4o', input: [{ type: 'message', content: 'Test input', role: 'user' }] }; // Execute await expect( wrappedOpenAI.responses!.create(requestData) ).rejects.toThrow('Authentication failed'); // Assert error span has Message[] input (matching success path) const addLlmSpanCall = mockLogger.addLlmSpan.mock.calls[0][0]; expect(addLlmSpanCall).toMatchObject({ output: { content: 'Error: Authentication failed' }, statusCode: 500, // Default when error status not extracted numInputTokens: 0, numOutputTokens: 0 }); expect(addLlmSpanCall.name).toBeUndefined(); // Verify input is Message[] (converted from input) expect(Array.isArray(addLlmSpanCall.input)).toBe(true); expect(addLlmSpanCall.input[0]).toHaveProperty('role'); expect(addLlmSpanCall.input[0]).toHaveProperty('content'); }); test('Error span input format matches success span for Chat Completions', async () => { // This test verifies the Python parity principle: // Success and error paths should use same input format const mockCreateMethod1 = jest.fn(); const mockCreateMethod2 = jest.fn(); const wrappedOpenAI1 = wrapOpenAI( { chat: { completions: { create: mockCreateMethod1 } } } as any, mockLogger as any ); const wrappedOpenAI2 = wrapOpenAI( { chat: { completions: { create: mockCreateMethod2 } } } as any, mockLogger as any ); const requestData = { model: 'gpt-4o', messages: [{ role: 'user', content: 'Hello' }] }; // Scenario 1: Success mockCreateMethod1.mockResolvedValueOnce({ choices: [{ message: { role: 'assistant', content: 'Hi' } }], usage: { prompt_tokens: 5, completion_tokens: 2 } }); mockLogger.startTrace = jest.fn(() => { jest.advanceTimersByTime(1); }); await wrappedOpenAI1.chat.completions.create(requestData); const successCall = mockLogger.addLlmSpan.mock.calls[0][0]; // Scenario 2: Error jest.clearAllMocks(); mockLogger.startTrace = jest.fn(() => { jest.advanceTimersByTime(1); }); mockCreateMethod2.mockRejectedValueOnce(new Error('Failed')); await expect( wrappedOpenAI2.chat.completions.create(requestData) ).rejects.toThrow(); const errorCall = mockLogger.addLlmSpan.mock.calls[0][0]; // Both should have same input format (Message[]) expect(successCall.input).toEqual(errorCall.input); expect(Array.isArray(successCall.input)).toBe(true); expect(Array.isArray(errorCall.input)).toBe(true); }); test('Error with null messages returns empty array (safe)', async () => { // Edge case: what if .messages is explicitly null? mockLogger.startTrace = jest.fn(() => { jest.advanceTimersByTime(1); }); const error = new Error('API Error'); mockCreateMethod.mockRejectedValueOnce(error); const wrappedOpenAI = wrapOpenAI(mockOpenAI as any, mockLogger as any); const requestData = { model: 'gpt-4o', messages: null as any // ← Edge case }; // Execute await expect( wrappedOpenAI.chat.completions.create(requestData) ).rejects.toThrow('API Error'); // Verify error span was created with safe empty input const errorCall = mockLogger.addLlmSpan.mock.calls[0][0]; expect(errorCall.input).toEqual([]); // ← Safe fallback expect(Array.isArray(errorCall.input)).toBe(true); }); test('Error with undefined input returns empty array (safe)', async () => { // Edge case: what if input is completely missing? const mockResponsesCreateMethod = jest.fn(); const mockOpenAIWithResponses = { chat: { completions: { create: jest.fn() } }, responses: { create: mockResponsesCreateMethod } }; mockLogger.startTrace = jest.fn(() => { jest.advanceTimersByTime(1); }); const error = new Error('Error'); mockResponsesCreateMethod.mockRejectedValueOnce(error); const wrappedOpenAI = wrapOpenAI( mockOpenAIWithResponses as any, mockLogger as any ); const requestData = { model: 'gpt-4o' // ← No input field }; // Execute await expect( wrappedOpenAI.responses!.create(requestData) ).rejects.toThrow(); // Verify error span was created with safe empty input const errorCall = mockLogger.addLlmSpan.mock.calls[0][0]; expect(errorCall.input).toEqual([]); // ← Safe fallback expect(Array.isArray(errorCall.input)).toBe(true); }); test('Responses API error with function_call input converts to Message[]', async () => { // Verify complex input types are properly converted const mockResponsesCreateMethod = jest.fn(); const mockOpenAIWithResponses = { chat: { completions: { create: jest.fn() } }, responses: { create: mockResponsesCreateMethod } }; mockLogger.startTrace = jest.fn(() => { jest.advanceTimersByTime(1); }); const error = new Error('Processing error'); mockResponsesCreateMethod.mockRejectedValueOnce(error); const wrappedOpenAI = wrapOpenAI( mockOpenAIWithResponses as any, mockLogger as any ); const requestData = { model: 'gpt-4o', input: [ { type: 'message', content: 'Call the tool', role: 'user' }, { type: 'function_call', call_id: 'call_1', name: 'get_data', arguments: '{}' } ] }; // Execute await expect( wrappedOpenAI.responses!.create(requestData) ).rejects.toThrow(); // Verify error span input was converted to Message[] const errorCall = mockLogger.addLlmSpan.mock.calls[0][0]; expect(Array.isArray(errorCall.input)).toBe(true); expect(errorCall.input.length).toBe(2); expect(errorCall.input[0]).toHaveProperty('role'); expect(errorCall.input[0]).toHaveProperty('content'); expect(errorCall.input[1]).toHaveProperty('role', 'assistant'); // function_call converted }); }); describe('Azure OpenAI Wrapper', () => { test('wrapAzureOpenAI is alias for wrapOpenAI', () => { // Verify wrapAzureOpenAI exists and is a function expect(typeof wrapAzureOpenAI).toBe('function'); // Verify it's the same as wrapOpenAI expect(wrapAzureOpenAI).toBe(wrapOpenAI); }); test('wrapAzureOpenAI works the same as wrapOpenAI', async () => { mockCreateMethod.mockResolvedValueOnce(mockResponse); const wrappedAzureOpenAI = wrapAzureOpenAI( mockOpenAI as any, mockLogger as any ); const requestData = { model: 'gpt-4o', messages: [{ role: 'user', content: 'Say hello!' }] }; const response = await wrappedAzureOpenAI.chat.completions.create(requestData); // Verify response handling works expect(mockLogger.addLlmSpan).toHaveBeenCalled(); expect(mockLogger.conclude).toHaveBeenCalled(); expect(response).toEqual(mockResponse); }); }); }); describe('parseUsage', () => { test('returns zeros for null/undefined', () => { expect(parseUsage(null)).toEqual({ inputTokens: 0, outputTokens: 0, totalTokens: null, reasoningTokens: 0, cachedTokens: 0, rejectedPredictionTokens: 0 }); expect(parseUsage(undefined)).toEqual({ inputTokens: 0, outputTokens: 0, totalTokens: null, reasoningTokens: 0, cachedTokens: 0, rejectedPredictionTokens: 0 }); }); test('parses Chat Completions format (prompt_tokens/completion_tokens)', () => { const result = parseUsage({ prompt_tokens: 100, completion_tokens: 50, total_tokens: 150 }); expect(result.inputTokens).toBe(100); expect(result.outputTokens).toBe(50); expect(result.totalTokens).toBe(150); }); test('parses Responses API format (input_tokens/output_tokens)', () => { const result = parseUsage({ input_tokens: 80, output_tokens: 40, total_tokens: 120 }); expect(result.inputTokens).toBe(80); expect(result.outputTokens).toBe(40); expect(result.totalTokens).toBe(120); }); test('computes total_tokens when missing', () => { const result = parseUsage({ prompt_tokens: 10, completion_tokens: 5 }); expect(result.totalTokens).toBe(15); }); test('extracts reasoning_tokens from output_tokens_details', () => { const result = parseUsage({ input_tokens: 50, output_tokens: 100, total_tokens: 150, output_tokens_details: { reasoning_tokens: 30 } }); expect(result.reasoningTokens).toBe(30); }); test('extracts reasoning_tokens from completion_tokens_details (Chat Completions)', () => { const result = parseUsage({ prompt_tokens: 20, completion_tokens: 200, total_tokens: 220, completion_tokens_details: { reasoning_tokens: 150 } }); expect(result.reasoningTokens).toBe(150); }); test('extracts cached_tokens from input_tokens_details', () => { const result = parseUsage({ input_tokens: 100, output_tokens: 50, total_tokens: 150, input_tokens_details: { cached_tokens: 20 } }); expect(result.cachedTokens).toBe(20); }); test('extracts top-level reasoning_tokens and cached_tokens when present', () => { const result = parseUsage({ prompt_tokens: 50, completion_tokens: 80, reasoning_tokens: 25, cached_tokens: 10 }); expect(result.reasoningTokens).toBe(25); expect(result.cachedTokens).toBe(10); }); test('extracts rejected_prediction_tokens from output_tokens_details', () => { const result = parseUsage({ input_tokens: 50, output_tokens: 100, output_tokens_details: { rejected_prediction_tokens: 15 } }); expect(result.rejectedPredictionTokens).toBe(15); }); test('handles plain JavaScript objects from OpenAI SDK', () => { // OpenAI JavaScript SDK returns plain objects, not Pydantic models // This test verifies we correctly handle the actual SDK response format const result = parseUsage({ input_tokens: 60, output_tokens: 40, total_tokens: 100 }); expect(result.inputTokens).toBe(60); expect(result.outputTokens).toBe(40); expect(result.totalTokens).toBe(100); }); });