/** * @fileoverview Shared building blocks for `find_*` tools — Zod fragments for * common inputs (alias, loadLimit, extraFilters), utilities to merge named * filters with the passthrough map, a generic distribution aggregator, and * the spillover handler that turns a "too many rows" result into a canvas * dataframe handle. * * @module mcp-server/tools/shared/find-helpers */ import { type Context, type HandlerContext, z } from '@cyanheads/mcp-ts-core'; import type { ServerConfig } from '../../../config/server-config.js'; import type { BrapiClient, BrapiEnvelope, BrapiPagination, BrapiRequestOptions, ResolvedAuth } from '../../../services/brapi-client/index.js'; import type { BrapiDialect, DialectAdaptation } from '../../../services/brapi-dialect/index.js'; import type { CanvasBridge, RegisterDataframeResult } from '../../../services/canvas-bridge/index.js'; import type { CapabilityProfile } from '../../../services/capability-registry/types.js'; import type { RegisteredServer } from '../../../services/server-registry/index.js'; /** True when the thrown value is an upstream 404 surfaced by the BrAPI client. */ export declare function isUpstreamNotFound(err: unknown): boolean; /** * Resolve a registered BrAPI connection by alias, throwing the calling tool's * `'unknown_alias'` contract on miss so the wire-level error carries * `data.recovery.hint` from the tool's `errors[]` entry. Use this instead of * `registry.get(...)` whenever the calling tool/resource declares the * `unknown_alias` reason — `registry.get` throws a bare `notFound()` factory * that the contract recovery resolver can't reach. * * Every caller MUST declare an `errors[]` entry whose `reason` is * `'unknown_alias'`; the type signature enforces this at compile time. */ export declare function requireRegisteredConnection(ctx: HandlerContext<'unknown_alias'>, alias: string | undefined): Promise; /** Upper cap on how many rows we'll pull for canvas dataframe spillover per call. */ export declare const MAX_SPILLOVER_ROWS = 50000; /** Hard cap on how many BrAPI pages we'll traverse when building a dataframe. */ export declare const MAX_SPILLOVER_PAGES = 50; export declare const AliasInput: z.ZodOptional; export declare const LoadLimitInput: z.ZodOptional; export declare const ExtraFiltersInput: z.ZodOptional>; /** * Merge named params with the user-supplied extraFilters map. Named params * win on conflict; conflicts are surfaced as warnings. */ export declare function mergeFilters(named: Record, extra: Record | undefined, warnings: string[]): Record; /** * Apply the dialect's GET-filter adapter, append warnings, and fail with a * typed `all_filters_dropped` error when the dialect dropped every supplied * filter — the call would otherwise silently widen to the unfiltered baseline. * Tools that call this MUST declare `'all_filters_dropped'` in their `errors[]` * contract; the helper looks up the recovery hint via `ctx.recoveryFor` and * spreads it into `data` so the wire-level shape stays consistent across the * find_* surface. The bare-baseline call (no filters supplied) is exempt: * `dropped` is empty so the all-dropped predicate is false. * * Returns the full {@link DialectAdaptation} so callers can read * `requiresEscalation` and prefer POST /search when GET would lose multi-value * semantics (#15). Most call sites destructure `.filters` for the route * resolver and forward `.requiresEscalation` verbatim. */ export declare function applyDialectFiltersOrFail(ctx: HandlerContext<'all_filters_dropped'>, dialect: BrapiDialect, endpoint: string, filters: Readonly>, warnings: string[]): DialectAdaptation; export type FindRoute = { filters: Record; kind: 'get'; path: string; service: string; } | { kind: 'search'; noun: string; searchBody: Record; service: string; }; export interface ResolveFindRouteInput { dialect: BrapiDialect; endpoint: string; filters: Record; profile: CapabilityProfile; /** * Set by the dialect adapter when at least one multi-value array filter * was downcast to a single scalar on the GET wire. When true and a working * POST `/search/{noun}` is advertised, the route resolver escalates to * search to preserve the original multi-value semantics — without this * the agent would silently see a result set narrowed to the first value * only. See {@link DialectAdaptation.requiresEscalation}. */ requiresEscalation?: boolean; searchBody: Record; searchNoun?: string; service?: string; warnings: string[]; } /** * Select the transport for a curated find tool from the advertised capability * profile. GET stays the default because it has the widest real-world support * and can run through dialect-specific query-string adapters. When a server * exposes only POST `/search/{noun}`, the same semantic filters are sent as a * search body instead. * * Escalation: when the dialect signals `requiresEscalation` (one or more * multi-value filters got downcast on the GET path) AND POST /search is * available and not disabled, the resolver picks search — the search body * preserves the multi-value semantics the GET wire shape would have lost. */ export declare function resolveFindRoute(input: ResolveFindRouteInput): FindRoute; /** Build request options with optional auth. */ export declare function buildRequestOptions(connection: RegisteredServer, params?: BrapiRequestOptions['params'], overrides?: BrapiRequestOptions): BrapiRequestOptions; /** * Build request options for a companion enrichment call (FK lookup, count * probe, preflight). Companions are non-critical — they decorate the response * but never gate it — so they get a tighter wall-clock budget and zero * retries: a slow upstream surfaces as a single warning instead of stretching * the response by 4× the per-attempt timeout. * * The dialect is threaded through so the BrapiClient translates plural ID * filters (`studyDbIds`, `trialDbIds`, …) at the wire edge — the v0.4.7 fix * for the foundational dialect-bypass class of bug. Warnings (dialect drops, * downcasts) flow into the same array the tool surfaces in its envelope. */ export declare function companionRequestOptions(connection: RegisteredServer, dialect: BrapiDialect, config: ServerConfig, warnings: string[], params?: BrapiRequestOptions['params']): BrapiRequestOptions; /** * Read `metadata.pagination.totalCount` from a BrAPI list endpoint with a * `pageSize: 1` probe. Returns `undefined` when the server omits the count. * Shared by the companion-count probes on the `get_*` tools. */ export declare function fetchTotalCount(client: BrapiClient, baseUrl: string, path: string, ctx: Context, options: BrapiRequestOptions): Promise; /** Input to {@link fetchValidatedScopedCount}. */ export interface ValidatedScopedCountInput { client: BrapiClient; connection: RegisteredServer; ctx: Context; dialect: BrapiDialect; /** Output-field name used in the omission warning (e.g. `observationCount`). */ label: string; /** Endpoint path to count against (e.g. `/observations`). */ path: string; /** Human label for the scope in warnings (e.g. `study filter`). */ scopeDescription: string; /** The scoping filter to apply (e.g. `{ studyDbIds: [studyDbId] }`). */ scopeFilter: Record; /** Dialect resource segment for filter adaptation (e.g. `observations`). */ service: string; warnings: string[]; } /** * Fetch a study/germplasm-scoped total count, omitting it when the upstream * can't actually scope the query — so a per-entity count is never silently * reported as the server-wide total. Two guards, in order: * * 1. **Dropped filter** — if the dialect drops the scope filter entirely * (the server doesn't honor it), the count would be the global total; * skip the probe and warn. * 2. **Ignored filter** — run the scoped probe and an unfiltered baseline in * parallel; if the totals match, the upstream silently ignored the filter * (accepted it on the wire but returned the unfiltered set), so the * "scoped" count is really the global total. Drop it and warn. * * Returns the trustworthy scoped total, or `undefined` (with a warning pushed * onto `warnings`) when neither guard can vouch for it. Both probes run at * `pageSize: 1` under the companion budget (tight timeout, no retries) — these * counts decorate a response, never gate it, so a slow or non-conforming * upstream surfaces as one missing count rather than a wrong one or a stall. * * The scope filter is adapted through the dialect here (so per-server key * translation / drops take effect) and sent with `buildRequestOptions` — never * re-threaded through the client, which would double-apply the dialect. */ export declare function fetchValidatedScopedCount(input: ValidatedScopedCountInput): Promise; /** * Compute a frequency distribution for one field across a result set. * Accepts a field accessor that may return a scalar or array; arrays are * exploded. Returns `{value -> count}` sorted by count desc. */ export declare function computeDistribution(rows: readonly T[], accessor: (row: T) => string | readonly string[] | undefined | null): Record; /** * Render the standardized header line for `find_*` tools. When a dataframe * spillover is present, surfaces the dataframe row count alongside the * in-context count and the upstream total — `{returned} of {total}` alone * hides the middle number and confuses readers when filters miss server- * side and the dataframe row count diverges from both. */ export declare function renderFindHeader(opts: { noun: string; alias: string; returnedCount: number; totalCount?: number; dataframe?: { rowCount: number; expiresAt?: string; } | undefined; }): string; /** * Render the filters-sent-to-server block, optionally translating * server-side keys to the user-facing parameter names declared by the * tool. Server keys without a user-facing alias (e.g. anything from * `extraFilters`) are rendered as-is. The label deliberately says "sent * to server" rather than "applied" — this is the wire-shape payload, not * a verified honor list. Drift between requested and honored values * surfaces in the warnings produced by `checkFilterMatchRates`. */ export declare function renderAppliedFilters(filters: Record, serverToUser?: Record): string; /** * Render a distributions block in markdown. When `truncated` metadata is * supplied and `truncated.truncated` is true, a caveat line is prepended so * readers know the distribution covers only the fetched subset. */ export declare function renderDistributions(distributions: Record>, truncated?: { truncated: boolean; rowCount: number; totalCount: number; }): string; /** * Project a `DataframeHandle | undefined` to the `truncated` metadata shape * expected by `renderDistributions`. Returns `undefined` when no dataframe is * present or when the dataframe was not truncated, so the caller doesn't need * to guard — just pass the result straight through. */ export declare function truncationMeta(dataframe: DataframeHandle | undefined): { truncated: boolean; rowCount: number; totalCount: number; } | undefined; export interface LoadedRows { /** True when we pulled a single page and the server has more. */ hasMore: boolean; /** Pages actually consumed — useful for telemetry. */ pagesFetched: number; rows: T[]; /** Total rows advertised by the server (may be larger than `rows.length`). */ totalCount?: number; } /** * Build a row mapper that runs `dialect.normalizeRow` per row, when the * dialect declares one. Returns `undefined` on dialects with no normalizer * — caller can pass the result straight to a `MaybeSpillInput.rowMapper` * field with no extra branching. */ export declare function dialectRowMapper>(dialect: BrapiDialect, endpoint: string): ((row: T) => T) | undefined; /** * Pull rows up to `loadLimit` on a single page. If the server reports more * rows than the limit, leave the rest behind — callers decide whether to * spill via `spillToCanvas`. */ export declare function loadInitialPage(client: BrapiClient, connection: RegisteredServer, path: string, filters: Record, loadLimit: number, ctx: Context): Promise>; /** * Pull the first page for either a GET list endpoint or POST /search route. * Optionally applies a per-row normalizer (typically built via * `dialectRowMapper`) to each row before returning so the caller sees the * canonical shape that matches what the dataframe materialization will hold. */ export declare function loadInitialFindPage(client: BrapiClient, connection: RegisteredServer, route: FindRoute, loadLimit: number, ctx: Context, options?: { normalizeRow?: (row: T) => T; }): Promise>; export interface SpillInput { bridge: CanvasBridge; client: BrapiClient; connection: RegisteredServer; ctx: Context; filters: Record; /** First-page rows already loaded. Avoids a re-fetch. */ firstPage: T[]; loadLimit: number; /** * Optional dialect-level per-row normalizer applied to each spilled page's * rows immediately after `extractRows` — used to coerce server-specific * "missing" encodings (e.g. SGN's `null` → `undefined`) into the canonical * shape the schemas expect. First-page rows are normalized by the loader * before they reach `spillToCanvas`; this is the parallel hook for the * page walk. */ normalizeRow?: (row: T) => T; /** Optional request overrides for spillover page pulls. */ pageRequestOptions?: BrapiRequestOptions; path: string; /** Optional route selected by resolveFindRoute; defaults to GET path + filters. */ route?: FindRoute; /** * Optional client-side predicate applied to every row (first-page + spilled) * before persistence. When present, only rows that pass are registered on * the canvas and returned in `fullRows`. The unfiltered upstream total is * preserved separately on the LoadedRows envelope so distributions and * headers can still report the true upstream size. */ rowFilter?: (row: T) => boolean; /** * Optional client-side transform applied to every row (first-page + spilled) * before any predicate filter runs. Used to normalize sparse / duplicated * upstream payloads (e.g. dedup'ing CassavaBase's 11×-repeated synonym * arrays) so the in-context view and the canvas dataframe see the same * cleaned shape. Runs before `rowFilter`. */ rowMapper?: (row: T) => T; source: string; /** Total reported by the server on the first page. */ totalCount: number; } export interface SpillResult { dataframe: RegisterDataframeResult; /** Rows that were registered (post-filter when `rowFilter` was supplied). */ fullRows: T[]; pagesFetched: number; } /** * Shape of the dataframe handle returned inline by `find_*` tools. The * dataframe is the canvas table holding every row beyond `loadLimit`; query * it with `brapi_dataframe_query` for SQL access or describe it with * `brapi_dataframe_describe` for schema + provenance. */ export declare const DataframeHandleSchema: z.ZodObject<{ tableName: z.ZodString; rowCount: z.ZodNumber; columns: z.ZodArray; columnLegend: z.ZodOptional>; createdAt: z.ZodString; expiresAt: z.ZodString; truncated: z.ZodOptional; maxRows: z.ZodOptional; totalCount: z.ZodOptional; }, z.core.$strip>; export type DataframeHandle = z.infer; /** * Render a DataframeHandle as bullet lines, matching the existing find_* tool * format. Centralized so the truncated/maxRows fields surface consistently. * `expiresAt` is paired with a human-readable `expires in Xh / Xd` so the * agent doesn't have to subtract dates to know when the handle goes stale. */ export declare function renderDataframeHandle(handle: DataframeHandle): string[]; /** * Render an absolute `expiresAt` timestamp as a relative human label * (`expires in 24h`, `expires in 30m`, `expired 5m ago`). Coarsens to the * most useful unit so the value reads at a glance. */ export declare function formatExpiresIn(expiresAt: string, now?: Date): string; /** Project a `RegisterDataframeResult` to the inline handle shape. */ export declare function toDataframeHandle(result: RegisterDataframeResult, totalCount?: number): DataframeHandle; export interface MaybeSpillInput { bridge: CanvasBridge; client: BrapiClient; connection: RegisteredServer; ctx: Context; filters: Record; firstPage: LoadedRows; loadLimit: number; /** * Optional dialect-level per-row normalizer applied to each spilled page's * rows. Mirrors `SpillInput.normalizeRow` — forwarded verbatim. First-page * rows must already be normalized by the loader; this only catches the * page walk. */ normalizeRow?: (row: T) => T; path: string; route?: FindRoute; /** * Optional client-side predicate applied to every row before persistence. * Forwarded to `spillToCanvas`. When present and no spillover happens, the * first-page rows are also filtered before being returned. */ rowFilter?: (row: T) => boolean; /** * Optional client-side transform applied to every row (first-page + spilled) * before persistence and before any `rowFilter`. See {@link SpillInput.rowMapper}. */ rowMapper?: (row: T) => T; source: string; /** * Optional request overrides for spillover page pulls. Useful when a tool * wants dataframe materialization to run under a tighter latency budget * than the first page. */ spillRequestOptions?: BrapiRequestOptions; /** Optional warning sink. When supplied, spillover failures degrade to rows-only output. */ warnings?: string[]; } export interface MaybeSpillResult { dataframe?: DataframeHandle; /** Row set after `rowFilter` (when supplied), spilled or first-page only. */ fullRows: T[]; } /** * Wrap `spillToCanvas` with the "only spill when hasMore and totalCount > * loadLimit" guard that every `find_*` tool replicates. When no spillover is * needed, returns the first-page rows untouched. When it is, materializes * the union as a canvas dataframe and returns both the full set and the * handle. */ export declare function maybeSpill>(input: MaybeSpillInput): Promise>; /** * Pull every remaining page up to MAX_SPILLOVER_* caps, then materialize the * union as a canvas dataframe. Returns the dataframe metadata plus the full * row set (so callers can compute honest distributions from the whole result). */ export declare function spillToCanvas>(input: SpillInput): Promise>; /** BrAPI list endpoints return `{data: T[], ...}`. Some omit the wrapper. */ export interface BrapiListResult { data?: T[]; [key: string]: unknown; } export declare function extractRows(result: BrapiListResult | T[]): T[]; /** * Extract a homogeneous record-row set from a raw BrAPI envelope `result`. * Returns the array when `result` is itself a list of objects, or when * `result.data` is — covering both bare-array and BrAPI-list-envelope shapes. * Returns `null` for non-list shapes (single object, scalar, primitive * arrays) so callers — notably `raw_get` / `raw_search` — can skip spillover * and pass the upstream payload through unchanged. Empty arrays are list- * shaped but carry no rows; the caller uses `length === 0` to decide whether * to register a dataframe. */ export declare function extractListRows>(result: unknown): T[] | null; /** Return the input as a non-empty string, or undefined. Used in distribution accessors. */ export declare function asString(value: unknown): string | undefined; /** * Drop entries from a `synonyms` array that are structurally identical to an * entry already kept. Some Breedbase deployments (notably CassavaBase) return * each registered synonym repeated 11× per germplasm record — the bloat is * purely upstream and carries no information beyond the unique set. * * Identity is taken via a stable serialization (keys sorted recursively) so * that two entries with the same fields but different property insertion * order are recognized as duplicates — observed live: CassavaBase emits two * key orderings on the same record. Two entries that genuinely differ on * any field (e.g. same `synonym` text but different `type`) still hash to * distinct keys and are both kept. When no duplicates are present the * input row is returned by reference — the helper is allocation-free on * the common case. */ export declare function dedupSynonymsByIdentity>(row: T): T; /** Return the input as a non-empty string array, or undefined. */ export declare function asStringArray(value: unknown): string[] | undefined; /** * Axis interpretation for GeoJSON Point coordinates: * - `spec` — RFC 7946 standard `[lon, lat, alt?]` (default) * - `swapped` — non-conformant `[lat, lon, alt?]` deployments (e.g. the * BrAPI Community Test Server). `find_locations` falls back to this * reading when a bbox filter under spec ordering returns zero matches. */ export type CoordinateAxisOrder = 'spec' | 'swapped'; /** * Extract WGS84 coordinates from a BrAPI v2 record. Modern servers carry * coordinates as a GeoJSON Feature (`coordinates.geometry.coordinates = * [lon, lat, alt?]`); some legacy and mixed-mode servers also expose * top-level `latitude`/`longitude`/`altitude`. Returns `undefined` only * when both shapes are missing or malformed. Accepts `unknown` so callers * can pass Zod-passthrough rows without an explicit cast. * * `axisOrder` controls how a GeoJSON `Point.coordinates` array is read. * Legacy top-level `latitude`/`longitude` fields are unambiguous by name * and are not affected. */ export declare function extractCoordinates(record: unknown, axisOrder?: CoordinateAxisOrder): { latitude: number; longitude: number; altitude?: number; } | undefined; /** * True when a BrAPI record carries a GeoJSON `Point` geometry with a * 2- or 3-element numeric coordinate array. Used by the bbox swap-on-zero * heuristic to decide whether retrying with axes swapped is worth doing — * Polygon-only or geometry-less rows can't be reinterpreted. */ export declare function hasPointGeometry(record: unknown): boolean; /** * One filter → distribution mapping for `checkFilterMatchRates`. Used to * detect upstream servers that silently ignore a filter and return the * unfiltered set instead of the requested subset. */ export interface FilterMatchCheck { /** Compare case-insensitively (default: false). */ caseInsensitive?: boolean; /** Distribution computed from the returned rows for the corresponding field. */ distribution: Record; /** User-facing filter name (e.g. "seasons"). Surfaces in the warning. */ paramName: string; /** Requested values from the agent. Undefined or empty means skip this check. */ requestedValues: readonly (string | number | boolean)[] | undefined; /** * Warn when any returned row carries a value outside the requested set. * Equality filters should usually set this because one matching row is not * enough evidence that the upstream honored the filter. */ requireEveryRowMatch?: boolean; } /** * Verify that requested filter values appear in the returned distributions. * When the upstream silently drops a filter, all requested values miss the * distribution — the warning lets the agent (and the user) know the result * set may not actually match the query. Skips checks where the distribution * is empty (no signal) or where no values were requested. */ export declare function checkFilterMatchRates(warnings: string[], fullRowCount: number, checks: readonly FilterMatchCheck[]): void; /** * Build a `FilterMatchCheck` for an FK identifier filter — the user's input * values are compared against a fresh distribution computed over `fieldName` * on the returned rows. Surfaces silently-ignored FK filters as warnings * without polluting the public `result.distributions` shape with raw DbId * frequencies (which carry no semantic meaning to the agent). */ export declare function fkMatchCheck(paramName: string, requestedValues: readonly (string | number | boolean)[] | undefined, rows: readonly Record[], fieldName: string, options?: { requireEveryRowMatch?: boolean; }): FilterMatchCheck; /** * Generate `FilterMatchCheck` entries for every key in `extraFilters` that * can be cross-referenced against a top-level column on the returned rows. * Catches the wrong-results class of bug where the agent passes a filter the * upstream silently ignores (e.g. `locationName` on `/studies` — not a valid * filter key, server returns the unfiltered baseline). The named-param * verification path treats requested values as authoritative; this helper * extends that same check across `extraFilters` so the post-hoc validator * runs uniformly regardless of how the filter entered the call. * * Column inference is intentionally narrow — exact match, then a trailing-`s` * strip (`locationDbIds → locationDbId`, `locationNames → locationName`). * Keys that don't resolve to a column produce a single grouped warning * ("could not verify these extraFilters keys: …") so the agent knows the * trace can't speak to whether they were honored. */ export declare function buildExtraFilterChecks(extraFilters: Record | undefined, rows: readonly Record[], warnings: string[]): FilterMatchCheck[]; export interface RefinementHintOptions { /** * Filter parameter names available on the calling tool. Used to suggest * concrete narrowers when no distribution has enough cardinality to * pick a specific value. Skipped when omitted. */ availableFilters?: readonly string[]; } /** * Compose a refinement hint for a too-large result set. Picks the highest- * cardinality non-empty distribution to suggest as a narrower. Returns * undefined when the result set fits under `loadLimit`. When distributions * are too sparse to surface a specific value, falls back to suggesting * the tool's available filter parameters by name. */ export declare function buildRefinementHint(totalCount: number, loadLimit: number, distributions: Record>, options?: RefinementHintOptions): string | undefined; /** * Collect `key=value` strings for every top-level key in a passthrough row * that was not explicitly rendered by the caller. Ensures format() / * structuredContent parity — server fields beyond the declared schema are * still emitted to text-only clients (Claude Desktop sees content[] only). * Large nested objects (over `MAX_INLINE_JSON` chars when stringified) collapse * to a size-aware placeholder; the full payload remains in `structuredContent`. */ export declare function collectPassthroughParts(row: Record, renderedKeys: ReadonlySet): string[]; /** * Append `- **key:** value` lines for every top-level key in a passthrough * record that was not explicitly rendered. Companion to * `collectPassthroughParts` for detail-view (get_*) tools that use a * line-per-field layout instead of bullet-part lists. Honors the same inline * JSON cap. */ export declare function appendPassthroughLines(lines: string[], record: Record, renderedKeys: ReadonlySet): void; export type { BrapiEnvelope, BrapiPagination, ResolvedAuth, ServerConfig }; //# sourceMappingURL=find-helpers.d.ts.map