import { createExperiment, getExperiment, getExperiments, runExperiment } from '../../src'; import { updateExperiment, deleteExperiment, getExperimentColumns } from '../../src/utils/experiments'; import type { ExperimentResponseType, ExperimentUpdateRequest, RunExperimentParams } from '../../src/types/experiment.types'; import type { Project } from '../../src/types/project.types'; import { ProjectTypes } from '../../src/types/project.types'; import { mockProject as commonMockProject } from '../common'; import { PromptTemplate, PromptTemplateVersion } from '../../src/types/prompt-template.types'; import { Scorer, ScorerTypes } from '../../src/types/scorer.types'; import { DatasetDBType, DatasetRow } from '../../src/types/dataset.types'; import { GalileoMetrics } from '../../src/types/metrics.types'; import { Trace } from '../../src/types'; // Create mock implementation functions const mockInit = jest.fn().mockResolvedValue(undefined); const mockGetExperiment = jest.fn(); const mockGetExperiments = jest.fn(); const mockCreateExperiment = jest.fn(); const mockUpdateExperiment = jest.fn(); const mockDeleteExperiment = jest.fn(); const mockGetProject = jest.fn(); const mockGetProjects = jest.fn(); const mockGetProjectByName = jest.fn(); const mockCreateRunScorerSettings = jest.fn(); const mockGetScorers = jest.fn(); const mockGetScorersPage = jest.fn(); const mockGetScorersPageByLabels = jest.fn(); const mockGetScorersPageByIds = jest.fn(); const mockCreatePromptRunJob = jest.fn(); const mockGetDataset = jest.fn(); const mockGetDatasets = jest.fn(); const mockGetDatasetByName = jest.fn(); const mockGetDatasetContent = jest.fn(); const mockIngestTraces = jest.fn(); const mockGetScorerVersion = jest.fn(); const mockGetGlobalProjectByName = jest.fn(); const mockListDatasetProjects = jest.fn(); const mockGetExperimentsAvailableColumns = jest.fn(); jest.mock('../../src/api-client', () => { return { GalileoApiClient: Object.assign( jest.fn().mockImplementation(() => { return { init: mockInit, getExperiment: mockGetExperiment, getExperiments: mockGetExperiments, createExperiment: mockCreateExperiment, updateExperiment: mockUpdateExperiment, deleteExperiment: mockDeleteExperiment, getProject: mockGetProject, getProjects: mockGetProjects, getProjectByName: mockGetProjectByName, createRunScorerSettings: mockCreateRunScorerSettings, getScorers: mockGetScorers, getScorersPage: mockGetScorersPage, getScorersPageByLabels: mockGetScorersPageByLabels, getScorersPageByIds: mockGetScorersPageByIds, getScorerVersion: mockGetScorerVersion, createPromptRunJob: mockCreatePromptRunJob, getDataset: mockGetDataset, getDatasets: mockGetDatasets, getDatasetByName: mockGetDatasetByName, getDatasetContent: mockGetDatasetContent, ingestTraces: mockIngestTraces, getGlobalProjectByName: mockGetGlobalProjectByName, listDatasetProjects: mockListDatasetProjects, getExperimentsAvailableColumns: mockGetExperimentsAvailableColumns }; }), { getTimestampRecord: jest.fn().mockReturnValue(new Date()) } ) }; }); const experimentId = 'exp-123'; const experimentName = 'My Test Experiment'; const projectId = 'proj-123'; const projectName = 'test-project'; const promptRunJobCreatedSuccessMessage = 'Prompt run job created'; const mockExperiment: ExperimentResponseType = { id: experimentId, name: experimentName, createdAt: '2023-01-01T00:00:00Z', updatedAt: '2023-01-01T00:00:00Z', projectId: 'proj-123', createdBy: 'user-123', taskType: 16 }; const mockExperiments: ExperimentResponseType[] = [mockExperiment]; // Example data const mockProject: Project = { id: projectId, name: projectName, type: ProjectTypes.genAI, createdBy: 'user-123', createdByUser: { id: 'user-123', email: 'test@example.com', firstName: 'Test', lastName: 'User' }, runs: [], createdAt: '2021-09-10T00:00:00Z', updatedAt: '2021-09-10T00:00:00Z' }; const mockDataset: DatasetDBType = { id: 'test-dataset-id', name: 'test-dataset', columnNames: ['input'], projectCount: 1, createdAt: '2023-01-01T00:00:00Z', updatedAt: '2023-01-01T00:00:00Z', numRows: 1, createdByUser: null, currentVersionIndex: 1, draft: false }; const mockDatasetRow: DatasetRow = { index: 0, rowId: 'row-123', values: [ '{"country":"France"}', '{"value":"Paris"}', '{"iteration":"alpha"}' ], valuesDict: { input: '{"country":"France"}', output: '{"value":"Paris"}', metadata: '{"iteration":"alpha"}' }, metadata: null }; const mockPromptTemplateVersion: PromptTemplateVersion = { id: 'prompt-template-version-123', template: [ { role: 'user', content: 'What is the capital of {{ country }}?' } ], version: 1, linesAdded: 0, linesRemoved: 0, linesEdited: 0, contentChanged: false, modelChanged: false, settingsChanged: false, settings: {}, createdAt: '2023-01-01T00:00:00Z', updatedAt: '2023-01-01T00:00:00Z', createdByUser: { id: '8b198c08-ea7f-42d2-9e8d-d2b8bcb008b0', email: 'b@b.com' } }; const mockPromptTemplate: PromptTemplate = { id: 'prompt-template-123', name: 'Test Prompt Template', template: 'What is the capital of {{ country }}?', selectedVersionId: 'prompt-template-version-123', selectedVersion: mockPromptTemplateVersion, allVersions: [mockPromptTemplateVersion], allAvailableVersions: [1], totalVersions: 1, maxVersion: 1, createdAt: '2023-01-01T00:00:00Z', updatedAt: '2023-01-01T00:00:00Z', createdByUser: { id: '8b198c08-ea7f-42d2-9e8d-d2b8bcb008b0', email: 'b@b.com' } }; const mockScorer: Scorer = { id: 'scorer-123', name: 'correctness', label: 'Correctness', scorer_type: ScorerTypes.preset, tags: [] }; // Example data for tests const EXAMPLE_EXPERIMENT: ExperimentResponseType = { id: 'a1b2c3d4-e5f6-7890-abcd-ef1234567890', name: 'Example Experiment', projectId: commonMockProject.id, createdAt: '2023-01-01T00:00:00.000000Z', updatedAt: '2023-01-01T00:00:00.000000Z', createdBy: 'user-123', taskType: 16 }; const EXAMPLE_EXPERIMENT_ID = EXAMPLE_EXPERIMENT.id; describe('experiments utility', () => { let originalEnv: Record; beforeEach(() => { // Store original env variables originalEnv = { ...process.env }; // Set required env variables process.env.GALILEO_PROJECT = 'test-project'; process.env.GALILEO_LOG_STREAM = 'test-log-stream'; // Clear all mocks before each test jest.clearAllMocks(); // Reset mock implementations to default mockInit.mockResolvedValue(undefined); mockGetExperiment.mockResolvedValue(mockExperiment); mockGetExperiments.mockResolvedValue(mockExperiments); mockCreateExperiment.mockResolvedValue(mockExperiment); mockUpdateExperiment.mockResolvedValue(mockExperiment); mockDeleteExperiment.mockResolvedValue(undefined); mockGetProject.mockResolvedValue(mockProject); mockGetProjects.mockResolvedValue([mockProject]); mockGetProjectByName.mockResolvedValue(mockProject); mockCreateRunScorerSettings.mockResolvedValue(undefined); mockGetScorers.mockResolvedValue([mockScorer]); mockGetScorersPage.mockResolvedValue({ scorers: [mockScorer], nextStartingToken: null }); mockGetScorersPageByLabels.mockResolvedValue({ scorers: [mockScorer], nextStartingToken: null }); mockGetScorersPageByIds.mockResolvedValue({ scorers: [], nextStartingToken: null }); mockGetScorerVersion.mockResolvedValue({ id: 'scorer-version-123', version: 1, scorer_id: 'scorer-123' }); mockCreatePromptRunJob.mockResolvedValue({ run_id: experimentId, project_id: mockProject.id, message: promptRunJobCreatedSuccessMessage }); mockGetDataset.mockResolvedValue(mockDataset); mockGetDatasets.mockResolvedValue([mockDataset]); mockGetDatasetByName.mockResolvedValue(mockDataset); mockGetDatasetContent.mockResolvedValue([mockDatasetRow]); mockIngestTraces.mockResolvedValue(undefined); mockGetGlobalProjectByName.mockResolvedValue(mockProject); mockListDatasetProjects.mockResolvedValue({ projects: [{ id: projectId }] }); }); afterEach(() => { // Restore original env variables process.env = originalEnv; jest.clearAllMocks(); }); describe('getExperiment', () => { const projectName = 'test-project'; const experimentId = 'exp-123'; const experimentName = 'My Test Experiment'; it('should throw an error if neither id nor name is provided', async () => { await expect(getExperiment({ projectName })).rejects.toThrow( 'To fetch an experiment with getExperiment, either id or name must be provided' ); }); it('should initialize the API client with the provided project name', async () => { // Call the function await getExperiment({ id: experimentId, projectName }); // Verify init was called with the correct project name // The implementation passes projectId, projectName, and projectScoped expect(mockInit).toHaveBeenCalledWith({ projectId: undefined, projectName, projectScoped: true }); }); it('should fetch experiment by ID when ID is provided', async () => { // Call the function const result = await getExperiment({ id: experimentId, projectName }); // Verify the correct method was called with the right ID expect(mockGetExperiment).toHaveBeenCalledWith(experimentId); expect(result).toEqual(expect.objectContaining(mockExperiment)); }); it('should fetch experiments and find by name when name is provided', async () => { // Call the function const result = await getExperiment({ name: experimentName, projectName }); // Verify getExperiments was called and the result is correct expect(mockGetExperiments).toHaveBeenCalled(); expect(mockGetExperiment).not.toHaveBeenCalled(); // Should not call getExperiment expect(result).toEqual(expect.objectContaining(mockExperiment)); }); it('should return undefined when searching by name and no matching experiment is found', async () => { // Return experiments without the one we're looking for mockGetExperiments.mockResolvedValueOnce([]); // Call the function with a non-existent name const result = await getExperiment({ name: 'Non-existent Experiment', projectName }); // Verify getExperiments was called and the result is undefined expect(mockGetExperiments).toHaveBeenCalled(); expect(result).toBeUndefined(); }); it('should prioritize ID over name when both are provided', async () => { // Call the function with both ID and name const result = await getExperiment({ id: experimentId, name: 'Different Name', projectName }); // Verify only getExperiment was called with the ID expect(mockGetExperiment).toHaveBeenCalledWith(experimentId); expect(mockGetExperiments).not.toHaveBeenCalled(); expect(result).toEqual(expect.objectContaining(mockExperiment)); }); it('should handle API errors gracefully', async () => { // Setup mock to throw an error const apiError = new Error('API connection failed'); mockGetExperiment.mockRejectedValueOnce(apiError); // Call the function and expect it to reject with the same error await expect( getExperiment({ id: experimentId, projectName }) ).rejects.toThrow(apiError); }); }); describe('getExperiments', () => { it('should return experiments', async () => { // Call the function const result = await getExperiments(projectName); // Verify the correct method was called expect(mockGetExperiments).toHaveBeenCalled(); expect(result).toEqual([mockExperiment]); }); it('should return empty array when no experiments exist', async () => { mockGetExperiments.mockResolvedValueOnce([]); const result = await getExperiments(projectName); expect(result).toEqual([]); expect(mockGetExperiments).toHaveBeenCalled(); }); it('should handle API errors gracefully', async () => { const apiError = new Error('API connection failed'); mockGetExperiments.mockRejectedValueOnce(apiError); await expect(getExperiments(projectName)).rejects.toThrow(apiError); }); }); describe('createExperiment', () => { it('should create an experiment if a valid name and projectName are provided', async () => { // Call the function const result = await createExperiment('Test Experiment', projectName); // Verify the correct method was called with the right name expect(mockCreateExperiment).toHaveBeenCalledWith( 'Test Experiment', undefined ); expect(result).toEqual(expect.objectContaining(mockExperiment)); }); it('should throw an error if name is empty', async () => { await expect(createExperiment('', projectName)).rejects.toThrow( 'A valid `name` must be provided to create an experiment' ); }); it('should pass the dataset to the api client', async () => { const dataset = { datasetId: 'dataset-id', versionIndex: 1 }; await createExperiment('Test Experiment', projectName, dataset); expect(mockCreateExperiment).toHaveBeenCalledWith( 'Test Experiment', dataset ); }); }); describe('runExperiment - prompt', () => { it('should run an experiment with a dataset ID and promptTemplate', async () => { const result = await runExperiment({ name: 'Test Experiment', datasetId: 'test-dataset-id', promptTemplate: mockPromptTemplate, projectName }); expect(result).toHaveProperty( 'message', promptRunJobCreatedSuccessMessage ); expect(mockCreateExperiment).toHaveBeenCalledWith('Test Experiment', { datasetId: 'test-dataset-id', versionIndex: 1 }); expect(mockCreatePromptRunJob).toHaveBeenCalled(); }); it('should run an experiment with a dataset ID, promptTemplate, and a metric', async () => { const result = await runExperiment({ name: 'Test Experiment', datasetId: 'test-dataset-id', promptTemplate: mockPromptTemplate, metrics: [GalileoMetrics.correctness], projectName }); expect(result).toHaveProperty( 'message', promptRunJobCreatedSuccessMessage ); expect(mockCreateExperiment).toHaveBeenCalled(); expect(mockGetScorersPageByLabels).toHaveBeenCalled(); expect(mockCreateRunScorerSettings).toHaveBeenCalled(); expect(mockCreatePromptRunJob).toHaveBeenCalled(); }); it('should run an experiment with a dataset name and promptTemplate', async () => { const result = await runExperiment({ name: 'Test Experiment', datasetName: 'test-dataset', promptTemplate: mockPromptTemplate, projectName }); expect(result).toHaveProperty( 'message', promptRunJobCreatedSuccessMessage ); expect(mockCreateExperiment).toHaveBeenCalledWith('Test Experiment', { datasetId: 'test-dataset-id', versionIndex: 1 }); expect(mockGetDatasetByName).toHaveBeenCalled(); expect(mockCreatePromptRunJob).toHaveBeenCalled(); }); it('should run an experiment with a dataset object and promptTemplate', async () => { const result = await runExperiment({ name: 'Test Experiment', dataset: mockDataset, promptTemplate: mockPromptTemplate, projectName }); expect(result).toHaveProperty( 'message', promptRunJobCreatedSuccessMessage ); expect(mockCreateExperiment).toHaveBeenCalledWith('Test Experiment', { datasetId: 'test-dataset-id', versionIndex: 1 }); expect(mockCreatePromptRunJob).toHaveBeenCalled(); }); it('should throw an error when array dataset is used with promptTemplate', async () => { await expect( runExperiment({ name: 'Test Experiment', dataset: [{ country: 'France' }], promptTemplate: mockPromptTemplate, projectName }) ).rejects.toThrow( 'Prompt template experiments cannot be run with a local dataset' ); }); }); describe('runExperiment - local', () => { const mockDate = new Date('2024-01-01T00:00:00.000Z'); beforeEach(() => { jest.clearAllMocks(); jest.useFakeTimers(); jest.setSystemTime(mockDate); }); afterEach(() => { jest.useRealTimers(); }); const identityFunction = async (input: Record) => { jest.advanceTimersByTime(1); return input; }; const verifyLocalExperimentTraces = (traces: Trace[]) => { expect(traces.length).toBe(1); expect(traces[0].input).toBe('{"country":"France"}'); expect(traces[0].output).toEqual('{"country":"France"}'); expect(traces[0].name).toBe('My Test Experiment'); expect(traces[0].metrics).toEqual({ durationNs: 1_000_000 }); expect(traces[0].datasetInput).toBe('{"country":"France"}'); expect(traces[0].datasetOutput).toBe('{"value":"Paris"}'); expect(traces[0].datasetMetadata).toEqual({ iteration: 'alpha' }); const spans = traces[0].spans; expect(spans.length).toBe(1); expect(spans[0].type).toBe('workflow'); expect(spans[0].input).toBe('{"country":"France"}'); expect(spans[0].output).toEqual('{"country":"France"}'); expect(spans[0].name).toBe('My Test Experiment'); expect(spans[0].metrics).toEqual({ durationNs: 1_000_000 }); expect(spans[0].datasetInput).toBe('{"country":"France"}'); expect(spans[0].datasetOutput).toBe('{"value":"Paris"}'); expect(spans[0].datasetMetadata).toEqual({ iteration: 'alpha' }); }; it('should run an experiment with a dataset ID and a function', async () => { const result = await runExperiment({ name: 'Test Experiment', datasetId: 'test-dataset-id', function: identityFunction, projectName }); // The actual message includes experiment name and URL expect(result.message).toContain( 'has completed and results are available at' ); expect(mockCreateExperiment).toHaveBeenCalled(); expect(mockIngestTraces).toHaveBeenCalled(); verifyLocalExperimentTraces(mockIngestTraces.mock.calls[0][0].traces); }); it('should run an experiment with a dataset name and a function', async () => { const result = await runExperiment({ name: 'Test Experiment', datasetName: 'test-dataset', function: identityFunction, projectName }); // The actual message includes experiment name and URL expect(result.message).toContain( 'has completed and results are available at' ); expect(mockCreateExperiment).toHaveBeenCalled(); expect(mockGetDatasetByName).toHaveBeenCalled(); expect(mockIngestTraces).toHaveBeenCalled(); verifyLocalExperimentTraces(mockIngestTraces.mock.calls[0][0].traces); }); it('should run an experiment with a dataset object and a function', async () => { const result = await runExperiment({ name: 'Test Experiment', dataset: mockDataset, function: identityFunction, projectName }); // The actual message includes experiment name and URL expect(result.message).toContain( 'has completed and results are available at' ); expect(mockCreateExperiment).toHaveBeenCalled(); expect(mockIngestTraces).toHaveBeenCalled(); verifyLocalExperimentTraces(mockIngestTraces.mock.calls[0][0].traces); }); it('should handle string metric names', async () => { const result = await runExperiment({ name: 'Test Experiment', datasetId: 'test-dataset-id', promptTemplate: mockPromptTemplate, metrics: ['Correctness'], // String metric name (label value) projectName }); expect(result).toHaveProperty( 'message', promptRunJobCreatedSuccessMessage ); expect(mockCreateExperiment).toHaveBeenCalled(); expect(mockGetScorersPageByLabels).toHaveBeenCalled(); // Verify the correct scorer was found by name expect(mockCreateRunScorerSettings).toHaveBeenCalled(); expect(mockCreatePromptRunJob).toHaveBeenCalled(); }); it('should handle object metrics without version', async () => { const result = await runExperiment({ name: 'Test Experiment', datasetId: 'test-dataset-id', promptTemplate: mockPromptTemplate, metrics: [{ name: 'Correctness' }], projectName }); expect(result).toHaveProperty( 'message', promptRunJobCreatedSuccessMessage ); expect(mockCreateExperiment).toHaveBeenCalled(); expect(mockGetScorersPageByLabels).toHaveBeenCalled(); expect(mockCreateRunScorerSettings).toHaveBeenCalled(); expect(mockCreatePromptRunJob).toHaveBeenCalled(); }); it('should handle object metrics with version', async () => { // Setup specific mock for this test only mockGetScorerVersion.mockResolvedValueOnce({ id: 'scorer-version-123', version: 3, scorer_id: 'scorer-123' }); const result = await runExperiment({ name: 'Test Experiment', datasetId: 'test-dataset-id', promptTemplate: mockPromptTemplate, metrics: [{ name: 'Correctness', version: 3 }], // Object metric with version projectName }); expect(result).toHaveProperty( 'message', promptRunJobCreatedSuccessMessage ); expect(mockCreateExperiment).toHaveBeenCalled(); expect(mockGetScorersPageByLabels).toHaveBeenCalled(); expect(mockGetScorerVersion).toHaveBeenCalledWith('scorer-123', 3); expect(mockCreateRunScorerSettings).toHaveBeenCalled(); expect(mockCreatePromptRunJob).toHaveBeenCalled(); }); it('should handle multiple metrics with mixed formats', async () => { mockGetScorersPageByLabels.mockResolvedValue({ scorers: [ { id: 'scorer-Correctness', name: 'correctness', label: 'Correctness', scorer_type: ScorerTypes.preset }, { id: 'scorer-Toxicity', name: 'toxicity', label: 'Toxicity', scorer_type: ScorerTypes.preset } ], nextStartingToken: null }); const result = await runExperiment({ name: 'Test Experiment', datasetId: 'test-dataset-id', promptTemplate: mockPromptTemplate, metrics: [ 'Correctness', { name: 'Toxicity' }, { name: 'Correctness', version: 3 } ], projectName }); expect(result).toHaveProperty( 'message', promptRunJobCreatedSuccessMessage ); expect(mockCreateExperiment).toHaveBeenCalled(); expect(mockGetScorersPageByLabels).toHaveBeenCalled(); expect(mockCreateRunScorerSettings).toHaveBeenCalled(); expect(mockCreatePromptRunJob).toHaveBeenCalled(); }); }); describe('runExperiment - errors', () => { it('should throw an error when neither function nor promptTemplate is provided', async () => { // Test invalid params that should trigger validation error // This intentionally omits both 'function' and 'promptTemplate' to test validation await expect( runExperiment({ name: 'Test Experiment', datasetId: 'test-dataset-id', projectName } as unknown as RunExperimentParams>) ).rejects.toThrow( 'Experiment not properly configured for either function or prompt template processing.' ); }); }); describe('getExperiments with projectId', () => { it('should return experiments when projectId is provided', async () => { const result = await getExperiments( commonMockProject.name as string, commonMockProject.id ); expect(result).toEqual([mockExperiment]); expect(mockGetExperiments).toHaveBeenCalled(); }); }); describe('createExperiment with metrics', () => { it('should create an experiment with metrics', async () => { const result = await createExperiment( 'Test Experiment', projectName, undefined, [GalileoMetrics.correctness] ); expect(result).toEqual(expect.objectContaining(mockExperiment)); expect(mockCreateExperiment).toHaveBeenCalled(); }); }); describe('updateExperiment', () => { it('should update an experiment by id with projectId', async () => { const updateRequest: ExperimentUpdateRequest = { name: 'Updated Experiment Name' }; const result = await updateExperiment({ id: EXAMPLE_EXPERIMENT_ID, projectId: commonMockProject.id, updateRequest }); expect(result).toEqual(expect.objectContaining(mockExperiment)); expect(mockUpdateExperiment).toHaveBeenCalledWith( EXAMPLE_EXPERIMENT_ID, updateRequest ); }); it('should update an experiment by id with projectName', async () => { const updateRequest: ExperimentUpdateRequest = { name: 'Updated Experiment Name' }; const result = await updateExperiment({ id: EXAMPLE_EXPERIMENT_ID, projectName: commonMockProject.name as string, updateRequest }); expect(result).toEqual(expect.objectContaining(mockExperiment)); expect(mockUpdateExperiment).toHaveBeenCalledWith( EXAMPLE_EXPERIMENT_ID, updateRequest ); }); it('should throw an error when both projectId and projectName are missing', async () => { const updateRequest: ExperimentUpdateRequest = { name: 'Updated Experiment Name' }; await expect( updateExperiment({ id: EXAMPLE_EXPERIMENT_ID, updateRequest } as { id: string; updateRequest: ExperimentUpdateRequest }) ).rejects.toThrow( 'Either projectId or projectName must be provided to update an experiment' ); }); }); describe('deleteExperiment', () => { it('should delete an experiment by id with projectId', async () => { await deleteExperiment({ id: EXAMPLE_EXPERIMENT_ID, projectId: commonMockProject.id }); expect(mockDeleteExperiment).toHaveBeenCalledWith(EXAMPLE_EXPERIMENT_ID); }); it('should throw an error when projectId is missing', async () => { await expect( deleteExperiment({ id: EXAMPLE_EXPERIMENT_ID, projectId: undefined as unknown as string }) ).rejects.toThrow( 'Experiment id and projectId are required to delete an experiment' ); }); it('should throw an error when id is missing', async () => { await expect( deleteExperiment({ id: undefined as unknown as string, projectId: commonMockProject.id }) ).rejects.toThrow( 'Experiment id and projectId are required to delete an experiment' ); }); }); describe('metricAggregates', () => { const scorerUuid = '550e8400-e29b-41d4-a716-446655440000'; const mockAgg = { avg: 0.85, count: 8, p90: 0.92 }; it('should populate metricAggregates from structuredAggregateMetrics on getExperiment', async () => { // Given: API returns an experiment with structuredAggregateMetrics keyed by UUID const experimentWithMetrics = { ...mockExperiment, structuredAggregateMetrics: { [scorerUuid]: mockAgg } }; mockGetExperiment.mockResolvedValue(experimentWithMetrics); // When: getExperiment is called const result = await getExperiment({ id: EXAMPLE_EXPERIMENT_ID, projectName: 'test-project' }); // Then: metricAggregates mirrors structuredAggregateMetrics expect(result?.metricAggregates).toEqual({ [scorerUuid]: mockAgg }); }); it('should populate metricAggregates from structuredAggregateMetrics on createExperiment', async () => { // Given: API returns an experiment with structuredAggregateMetrics const experimentWithMetrics = { ...mockExperiment, structuredAggregateMetrics: { [scorerUuid]: mockAgg, cost: { avg: 0.01 } } }; mockCreateExperiment.mockResolvedValue(experimentWithMetrics); // When: createExperiment is called const result = await createExperiment('new-experiment', 'test-project'); // Then: both UUID and system metric keys are present expect(result.metricAggregates).toEqual({ [scorerUuid]: mockAgg, cost: { avg: 0.01 } }); }); it('should set metricAggregates to undefined when structuredAggregateMetrics is absent', async () => { // Given: experiment has no structuredAggregateMetrics mockGetExperiment.mockResolvedValue({ ...mockExperiment }); // When: getExperiment is called const result = await getExperiment({ id: EXAMPLE_EXPERIMENT_ID, projectName: 'test-project' }); // Then: metricAggregates is undefined expect(result?.metricAggregates).toBeUndefined(); }); }); describe('aggregateMetrics deprecation', () => { it('should still return the original value when aggregateMetrics is accessed (backward-compat)', async () => { // Given: API returns an experiment with aggregateMetrics const experimentWithMetrics = { ...mockExperiment, aggregateMetrics: { average_correctness: 0.9 } }; mockGetExperiment.mockResolvedValue(experimentWithMetrics); // When: getExperiment is called and aggregateMetrics is accessed const result = await getExperiment({ id: EXAMPLE_EXPERIMENT_ID, projectName: 'test-project' }); // Then: the value is still accessible (backward-compatible) // Note: sdkLogger.warn fires internally but is not easily assertable without // mocking the galileo-generated module at module load time. expect(result?.aggregateMetrics).toEqual({ average_correctness: 0.9 }); }); it('should expose aggregateMetrics as a getter (not a plain property)', async () => { // Given: API returns an experiment mockGetExperiment.mockResolvedValue(mockExperiment); // When: the enriched experiment is returned const result = await getExperiment({ id: EXAMPLE_EXPERIMENT_ID, projectName: 'test-project' }); // Then: aggregateMetrics is defined as a getter on the object const descriptor = Object.getOwnPropertyDescriptor( result, 'aggregateMetrics' ); expect(descriptor?.get).toBeDefined(); }); }); describe('getExperimentColumns', () => { const mockColumns = { columns: [ { id: 'metrics/550e8400-e29b-41d4-a716-446655440000', label: 'Correctness', category: 'metric', dataType: 'floating_point', metricKeyAlias: 'correctness' }, { id: 'metrics/duration_ns', label: 'Latency', category: 'metric', dataType: 'integer', metricKeyAlias: null } ] }; it('should return the columns from the API', async () => { // Given: API returns available columns including UUID-keyed metric columns mockGetExperimentsAvailableColumns.mockResolvedValue(mockColumns); // When: getExperimentColumns is called const result = await getExperimentColumns({ projectName: 'test-project' }); // Then: the columns are returned and the API was called expect(result).toEqual(mockColumns); expect(mockGetExperimentsAvailableColumns).toHaveBeenCalledTimes(1); }); it('should expose metricKeyAlias on UUID-keyed columns', async () => { // Given: a column with metricKeyAlias set mockGetExperimentsAvailableColumns.mockResolvedValue(mockColumns); // When: getExperimentColumns is called const result = await getExperimentColumns({ projectName: 'test-project' }); // Then: the UUID column has metricKeyAlias and the system metric column has null alias const cols = result.columns ?? []; const uuidCol = cols.find( (c) => c.id === 'metrics/550e8400-e29b-41d4-a716-446655440000' ); const sysCol = cols.find((c) => c.id === 'metrics/duration_ns'); expect(uuidCol?.metricKeyAlias).toBe('correctness'); expect(sysCol?.metricKeyAlias).toBeNull(); }); }); describe('getMetricAggregate', () => { const scorerUuid = '550e8400-e29b-41d4-a716-446655440000'; const mockAgg = { avg: 0.85, count: 8, p90: 0.92 }; const mockColumns = { columns: [ { id: `metrics/${scorerUuid}`, label: 'Correctness', category: 'metric', dataType: 'floating_point', metricKeyAlias: 'correctness' } ] }; async function experimentWithMetrics() { const experimentWithMetrics = { ...mockExperiment, projectId: commonMockProject.id, structuredAggregateMetrics: { [scorerUuid]: mockAgg } }; mockGetExperiment.mockResolvedValue(experimentWithMetrics); mockGetExperimentsAvailableColumns.mockResolvedValue(mockColumns); return await getExperiment({ id: EXAMPLE_EXPERIMENT_ID, projectName: 'test-project' }); } it('should return undefined when metricAggregates is not populated', async () => { // Given: experiment with no structuredAggregateMetrics mockGetExperiment.mockResolvedValue({ ...mockExperiment, projectId: commonMockProject.id }); const result = await getExperiment({ id: EXAMPLE_EXPERIMENT_ID, projectName: 'test-project' }); // When/Then: getMetricAggregate returns undefined expect(await result?.getMetricAggregate?.('Correctness')).toBeUndefined(); }); it('should look up by UUID string directly without calling columns API', async () => { // Given: experiment with UUID-keyed metrics const experiment = await experimentWithMetrics(); // When: looking up by raw UUID const result = await experiment?.getMetricAggregate?.(scorerUuid); // Then: aggregate returned without calling getExperimentsAvailableColumns expect(result).toEqual(mockAgg); expect(mockGetExperimentsAvailableColumns).not.toHaveBeenCalled(); }); it('should look up by GalileoMetrics value (human-readable label)', async () => { // Given: experiment with UUID-keyed metrics and GalileoMetrics.correctness == "Correctness" const experiment = await experimentWithMetrics(); // When: looking up by GalileoMetrics value (which IS the label string) const result = await experiment?.getMetricAggregate?.('Correctness'); // Then: aggregate returned via label match expect(result).toEqual(mockAgg); }); it('should look up by metricKeyAlias as fallback', async () => { // Given: experiment with UUID-keyed metrics const experiment = await experimentWithMetrics(); // When: looking up by legacy snake_case alias const result = await experiment?.getMetricAggregate?.('correctness'); // Then: aggregate returned via alias fallback expect(result).toEqual(mockAgg); }); it('should return undefined for an unknown metric', async () => { // Given: experiment with UUID-keyed metrics const experiment = await experimentWithMetrics(); // When: looking up a metric that does not exist const result = await experiment?.getMetricAggregate?.('Toxicity'); // Then: undefined is returned expect(result).toBeUndefined(); }); }); });