import { OpenAssistantTool } from '@openassistant/utils'; import { z } from 'zod'; import { GetValues } from '../types'; export type DataClassifyFunctionArgs = z.ZodObject<{ datasetName: z.ZodString; variableName: z.ZodString; method: z.ZodEnum<['quantile', 'natural breaks', 'equal interval', 'percentile', 'box', 'standard deviation', 'unique values']>; k: z.ZodOptional; hinge: z.ZodOptional; }>; export type DataClassifyLlmResult = { success: boolean; result?: { datasetName: string; variableName: string; method: string; k?: number; hinge?: number; breaks: number[]; }; error?: string; instruction?: string; }; export type DataClassifyAdditionalData = { originalDatasetName: string; variableName: string; method: string; k: number; hinge?: number; breaks: number[]; }; export type DataClassifyFunctionContext = { getValues: GetValues; }; /** * ## dataClassify Tool * * This tool is used to classify numerical data into k bins or classes using various statistical methods. * It returns break points that can be used to categorize continuous data into discrete intervals. * * ### Classification Methods * * The classification method can be one of the following types: * - **quantile**: Divides data into equal-sized groups based on quantiles * - **natural breaks**: Uses Jenks' algorithm to minimize within-group variance * - **equal interval**: Creates intervals of equal width across the data range * - **percentile**: Uses percentile-based breaks (25th, 50th, 75th percentiles) * - **box**: Uses box plot statistics (hinge = 1.5 or 3.0) * - **standard deviation**: Creates breaks based on standard deviation intervals * - **unique values**: Returns all unique values in the dataset * * ### Parameters * - `datasetName`: Name of the dataset containing the variable * - `variableName`: Name of the numerical variable to classify * - `method`: Classification method (see above) * - `k`: Number of bins/classes (required for quantile, natural breaks, equal interval) * - `hinge`: Hinge value for box method (default: 1.5) * * **Example user prompts:** * - "Can you classify the population data into 5 classes using natural breaks?" * - "Classify the income variable using quantile method with 4 bins" * - "Use box plot method to classify the housing prices" * * ### Example * ```typescript * import { dataClassify } from "@openassistant/geoda"; * import { convertToVercelAiTool } from "@openassistant/utils"; * * const classifyTool = { * ...dataClassify, * context: { * getValues: async (datasetName: string, variableName: string) => { * // Implementation to retrieve values from your data source * return [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20]; * }, * }, * }; * * // Usage with AI model * const result = await generateText({ * model: yourModel, * prompt: 'Can you classify the population data into 5 classes using natural breaks?', * tools: { dataClassify: convertToVercelAiTool(classifyTool) }, * }); * ``` */ export declare const dataClassify: OpenAssistantTool; export type DataClassifyTool = typeof dataClassify; export declare function runDataClassify({ datasetName, variableName, method, k, hinge, getValues, }: { datasetName: string; variableName: string; method: string; k: number; hinge?: number; getValues: GetValues; }): Promise<{ llmResult: { success: boolean; result: any; error?: undefined; }; additionalData: any; } | { llmResult: { success: boolean; error: string; result?: undefined; }; additionalData?: undefined; }>;