// SPDX-License-Identifier: MIT // Copyright contributors to the openassistant project import { OpenAssistantTool, OpenAssistantExecuteFunctionResult, generateId, } from '@openassistant/utils'; import { CheckGeometryType, SpatialGeometry, spatialJoin as spatialJoinFunc, SpatialJoinGeometryType, } from '@geoda/core'; import { z } from 'zod'; import { binaryToGeojson } from '@loaders.gl/gis'; import { applyJoin } from './apply-join'; import { GetValues, GetGeometries } from '../types'; export type SpatialJoinFunctionArgs = z.ZodObject<{ rightDatasetName: z.ZodString; leftDatasetName: z.ZodString; joinVariables: z.ZodOptional< z.ZodArray< z.ZodObject<{ variableName: z.ZodString; operator: z.ZodEnum<['sum', 'mean', 'min', 'max', 'median', 'count', 'unique']>; }> > >; }>; export type SpatialJoinLlmResult = { success: boolean; firstTwoRows?: { [x: string]: (number | string)[]; }[]; datasetName?: string; result?: string; joinStats?: { totalCount: number; minCount: number; maxCount: number; averageCount: number; }; error?: string; }; export type SpatialJoinAdditionalData = { rightDatasetName: string; leftDatasetName: string; joinVariableNames?: string[]; joinOperators?: string[]; datasetName: string; [datasetName: string]: unknown; }; export type SpatialJoinFunctionContext = { getGeometries: GetGeometries; getValues?: GetValues; saveAsDataset?: (datasetName: string, data: Record) => void; }; /** * ## spatialJoin Tool * * This tool performs spatial join operations between two geometric datasets, combining attributes based on spatial relationships. * It's useful for overlaying different types of spatial data and aggregating values across spatial boundaries. * * ### Spatial Join Operations * * The tool supports various join operations for aggregating values: * - **sum**: Sum of values in overlapping geometries * - **mean**: Average of values in overlapping geometries * - **min**: Minimum value in overlapping geometries * - **max**: Maximum value in overlapping geometries * - **median**: Median value in overlapping geometries * - **count**: Count of overlapping geometries * - **unique**: Count of unique values in overlapping geometries * * ### Parameters * - `rightDatasetName`: Name of the dataset providing the geometries to join (source) * - `leftDatasetName`: Name of the dataset receiving the joined data (target) * - `joinVariables`: Array of variables to join with their aggregation operators (optional) * * **Example user prompts:** * - "Can you join the population data with county boundaries?" * - "Join crime incidents to police districts using sum aggregation" * - "Overlay school locations with census tracts and count schools per tract" * * ### Example * ```typescript * import { spatialJoin } from "@openassistant/geoda"; * import { convertToVercelAiTool } from "@openassistant/utils"; * * const joinTool = { * ...spatialJoin, * context: { * getGeometries: async (datasetName: string) => { * // Implementation to retrieve geometries from your data source * return geometries; * }, * getValues: async (datasetName: string, variableName: string) => { * // Implementation to retrieve values from your data source * return [100, 200, 150, 300, 250]; * }, * }, * }; * * const result = await generateText({ * model: openai('gpt-4.1', { apiKey: key }), * prompt: 'Can you join the population data with county boundaries?', * tools: { spatialJoin: convertToVercelAiTool(joinTool) }, * }); * ``` * * :::note * The left dataset and right dataset should be different. joinVariables should come from the right dataset. * ::: */ export const spatialJoin: OpenAssistantTool< SpatialJoinFunctionArgs, SpatialJoinLlmResult, SpatialJoinAdditionalData, SpatialJoinFunctionContext > = { name: 'spatialJoin', description: `Spatial join geometries two geometric datasets. For example: 1. to get the number of points in polygons, "right dataset = points" and "left dataset = polygons" 2. to check which point belongs to which polygon, "right dataset = polygons" and "left dataset = points" IMPORTANT: 1. left dataset and right dataset should be different. 2. joinVariables should come from the right dataset.`, parameters: z.object({ rightDatasetName: z.string(), leftDatasetName: z.string(), joinVariables: z .array( z.object({ variableName: z.string(), operator: z.enum([ 'sum', 'mean', 'min', 'max', 'median', 'count', 'unique', ]), }) ) .optional(), }), execute: executeSpatialJoin, context: { getGeometries: () => { throw new Error('getGeometries() of SpatialJoinTool is not implemented'); }, getValues: () => { throw new Error('getValues() of SpatialJoinTool is not implemented'); }, saveAsDataset: () => { throw new Error('saveAsDataset() of SpatialJoinTool is not implemented'); }, }, }; export type SpatialJoinTool = typeof spatialJoin; type SpatialJoinArgs = { rightDatasetName: string; leftDatasetName: string; joinVariables: { variableName: string; operator: string; }[]; }; function isSpatialJoinArgs(args: unknown): args is SpatialJoinArgs { return ( typeof args === 'object' && args !== null && 'rightDatasetName' in args && 'leftDatasetName' in args && typeof args.rightDatasetName === 'string' && typeof args.leftDatasetName === 'string' ); } function isSpatialJoinContext( context: unknown ): context is SpatialJoinFunctionContext { return ( typeof context === 'object' && context !== null && 'getGeometries' in context && typeof context.getGeometries === 'function' ); } async function executeSpatialJoin( args: z.infer, options?: { toolCallId: string; abortSignal?: AbortSignal; context?: SpatialJoinFunctionContext; } ): Promise< OpenAssistantExecuteFunctionResult< SpatialJoinLlmResult, SpatialJoinAdditionalData > > { if (!isSpatialJoinArgs(args)) { throw new Error('Invalid arguments for spatialJoin tool'); } if (!options?.context || !isSpatialJoinContext(options.context)) { throw new Error('Invalid context for spatialJoin tool'); } const { rightDatasetName, leftDatasetName, joinVariables } = args; const { getGeometries, getValues } = options.context; return runSpatialJoin({ rightDatasetName, leftDatasetName, joinVariables, getGeometries, getValues: getValues!, }); } export async function runSpatialJoin({ rightDatasetName, leftDatasetName, joinVariables, getGeometries, getValues, }: { rightDatasetName: string; leftDatasetName: string; joinVariables?: { variableName: string; operator: string; }[]; getGeometries: GetGeometries; getValues: GetValues; }) { try { // Get geometries from both datasets const rightGeometries = await getGeometries(rightDatasetName); const leftGeometries = await getGeometries(leftDatasetName); if (!rightGeometries || rightGeometries.length === 0) { throw new Error('First dataset geometries not found'); } if (!leftGeometries || leftGeometries.length === 0) { throw new Error('Second dataset geometries not found'); } const result = await spatialJoinFunc({ leftGeometries, rightGeometries, }); // get basic statistics of the result for LLM const basicStatistics = getBasicStatistics(result); const joinValues: Record = { Count: result.map((row) => row.length), }; // get the values of the left dataset if joinVariableNames is provided if (joinVariables && joinVariables.length > 0) { await Promise.all( joinVariables.map(async ({ variableName, operator }) => { try { const values = (await getValues( rightDatasetName, variableName )) as number[]; try { // apply join to values in each row const joinedValues = result.map((row) => applyJoin( operator, row.map((index) => values[index]) ) ); joinValues[variableName] = joinedValues; } catch { // if the join operator is not supported, return an array with first value of the variable if exists joinValues[variableName] = values.length > 0 ? [values[0]] : []; } } catch (error) { throw new Error( `Error applying join operator to variable ${variableName}: ${error}` ); } }) ); } // append joinValues to the left geometries const leftGeometriesWithJoinValues = appendJoinValuesToGeometries( leftGeometries, joinValues ); // cache the joined dataset const outputDatasetName = `join_${generateId()}`; // joinValues is a record of variable names and their values // return the first 10 rows of each variable, including the variable name const columns = Object.keys(joinValues); const firstTwoRows = columns.map((column) => ({ [column]: joinValues[column].slice(0, 2), })); return { llmResult: { success: true, firstTwoRows, datasetName: outputDatasetName, result: `Spatial count function executed successfully and the joined dataset is saved in DatasetName: ${outputDatasetName}.`, joinStats: basicStatistics, }, additionalData: { joinResult: result, joinValues, rightDatasetName, leftDatasetName, joinVariables, datasetName: outputDatasetName, [outputDatasetName]: leftGeometriesWithJoinValues, }, }; } catch (error) { return { llmResult: { success: false, error: error instanceof Error ? error.message : 'Unknown error occurred', }, }; } } /** * Get basic statistics of the result * @param result - the result of the spatial join * @returns - the basic statistics of the result */ export function getBasicStatistics(result: number[][]) { const totalCount = result.length; let minCount = Infinity; let maxCount = -Infinity; let sumCount = 0; for (let i = 0; i < result.length; i++) { const rowLength = result[i].length; minCount = Math.min(minCount, rowLength); maxCount = Math.max(maxCount, rowLength); sumCount += rowLength; } return { totalCount, minCount, maxCount, averageCount: sumCount / totalCount, }; } export function appendJoinValuesToGeometries( geometries: SpatialGeometry, joinValues: Record ) { const geometryType = CheckGeometryType(geometries); const variableNames = Object.keys(joinValues); switch (geometryType) { case SpatialJoinGeometryType.BinaryFeatureCollection: { // convert binary to geojson and flatten the results const features: GeoJSON.Feature[] = geometries.flatMap((binary) => { const result = binaryToGeojson(binary); return Array.isArray(result) ? result : [result]; }); // create a geojson feature collection and append joinValues to the features const featureCollection: GeoJSON.FeatureCollection = { type: 'FeatureCollection', features: features.map((feature, index) => ({ ...feature, properties: { ...feature.properties, ...Object.fromEntries( variableNames.map((name) => [name, joinValues[name][index]]) ), }, })), }; return { type: 'geojson', content: featureCollection, }; } case SpatialJoinGeometryType.GeoJsonFeature: { // append joinValues to the properties of the features const featuresWithJoinValues = geometries.map((feature, index) => ({ ...feature, properties: { ...feature.properties, ...Object.fromEntries( variableNames.map((name) => [name, joinValues[name][index]]) ), }, })); const result = { type: 'FeatureCollection', features: featuresWithJoinValues, }; return { type: 'geojson', content: result, }; } case SpatialJoinGeometryType.ArcLayerData: { // return a csv style array of features with joinValues const featuresWithJoinValues = geometries.map((feature, index) => [ // feature is ArcLayerData: {source: [], target: [], index: number} feature, ...variableNames.map((name) => joinValues[name][index]), ]); // append the variable names to the first row featuresWithJoinValues.unshift(['geometry', ...variableNames]); return { type: 'rowObjects', content: featuresWithJoinValues, }; } case SpatialJoinGeometryType.PointLayerData: { // return a csv style array of features with joinValues const featuresWithJoinValues = geometries.map((feature, index) => [ // feature is PointLayerData: {position: [], index: number} feature, ...variableNames.map((name) => joinValues[name][index]), ]); // append the variable names to the first row featuresWithJoinValues.unshift(['geometry', ...variableNames]); // convert geometries to FeatureCollection const result: GeoJSON.FeatureCollection = { type: 'FeatureCollection', features: geometries.map((feature, index) => ({ type: 'Feature' as const, geometry: { type: 'Point' as const, coordinates: feature.position, }, properties: Object.fromEntries( variableNames.map((name) => [name, joinValues[name][index]]) ), })), }; return { type: 'geojson', content: result, }; } default: throw new Error('Unsupported geometry type'); } }