import { type AbortSignal } from "bare-abort-controller";
import type { RequestContext, RequestKind } from "./request-context";
import type { Logger } from "../../../logging/types";
/**
 * Outcome the caller declares when terminating a request through
 * `registry.end(...)`. The registry maps it to a terminal `RequestState`
 * before disposing the scope so observers see a coherent final state.
 */
export type RequestOutcome = "completed" | "failed" | "cancelled";
export interface BeginOpts {
    /** Stable identity. Caller-provided so the client and server agree. */
    requestId: string;
    kind: RequestKind;
    modelId?: string;
    /**
     * Optional parent abort signal — typically the worker-level "shutdown"
     * signal. When the parent aborts, the request's own signal aborts too.
     * Composes through a `addEventListener("abort", ...)` hook so cancelling
     * the parent does not require iterating the registry.
     */
    parentSignal?: AbortSignal;
}
export interface CancelByRequestId {
    requestId: string;
    reason?: string;
}
export interface CancelByModelId {
    modelId: string;
    kind?: RequestKind;
    reason?: string;
}
export type CancelTarget = CancelByRequestId | CancelByModelId;
/**
 * Per-kind admission rule. Kinds without a registered policy have no
 * admission control (every `begin(...)` is accepted as long as the
 * request id is unique).
 *
 * The policy turns admission into a per-`(kind, modelId)` FIFO queue with
 * a configurable concurrency limit. A second concurrent request to the
 * same `(kind, modelId)` *waits its turn* (default) rather than colliding
 * on the single native llama.cpp context. The limit is forward-compatible
 * with addon-side continuous batching: bump `maxConcurrentPerModel` to the
 * addon's slot count to flip serial execution to N-way concurrent without
 * any further SDK change.
 *
 * Requests without a `modelId` are never gated (there is no per-model
 * identity to serialize against).
 */
export interface ConcurrencyPolicy {
    kind: RequestKind;
    /**
     * Max simultaneously in-flight requests per `(kind, modelId)`.
     * Default: `Infinity` (no admission limit). Set `1` to serialize, or
     * `N` for N-way concurrency (the future continuous-batching slot
     * count). Non-finite values disable gating entirely.
     */
    maxConcurrentPerModel?: number;
    /**
     * What a `begin(...)` does when the `(kind, modelId)` is at capacity:
     *  - `"queue"` (default): wait FIFO for a slot to free.
     *  - `"reject"`: throw `RequestRejectedByPolicyError` immediately
     *    (the legacy `oneAtATimePerModel` behavior).
     */
    onOverflow?: "queue" | "reject";
    /**
     * Max waiters allowed to queue per `(kind, modelId)` before further
     * begins reject with `RequestRejectedByPolicyError`. Bounds memory so a
     * runaway client can't grow the queue without limit. Default: `64`.
     * Only consulted when `onOverflow` is `"queue"`.
     */
    maxQueueDepthPerModel?: number;
    /**
     * Reject a waiter that has waited longer than this many milliseconds
     * with `RequestRejectedByPolicyError`. Default: `undefined` (wait
     * indefinitely for a slot).
     */
    queueTimeoutMs?: number;
    /**
     * @deprecated Back-compat alias. `true` normalizes to
     * `{ maxConcurrentPerModel: 1, onOverflow: "reject" }`; `false` (or
     * omitted) leaves admission unlimited. Ignored when
     * `maxConcurrentPerModel` is set explicitly.
     */
    oneAtATimePerModel?: boolean;
}
/**
 * `ManagedRequestContext` is the value `begin(...)` resolves to. It extends
 * `RequestContext` with an async-dispose method so handlers can write:
 *
 *   await using ctx = await registry.begin({ ... });
 *
 * On dispose the scope unwinds (LIFO cleanup) and the registry slot is
 * freed. If the handler doesn't override `ctx.state` before unwinding,
 * the registry derives the terminal state from `signal.aborted` —
 * `"cancelled"` when an abort was recorded, `"completed"` otherwise.
 */
export interface ManagedRequestContext extends RequestContext {
    [Symbol.asyncDispose](): Promise<void>;
}
export interface RequestRegistry {
    /**
     * Open a new request. Returns a promise because admission may *queue*:
     * when a concurrency policy caps the `(kind, modelId)` and it's at
     * capacity with `onOverflow: "queue"`, the promise resolves once a slot
     * frees (FIFO). Callers write `await using ctx = await registry.begin(...)`.
     *
     * Rejects with:
     *  - `RequestIdConflictError` if `requestId` is already present
     *    (UUIDv4 collision is astronomically unlikely; the guard exists
     *    so a buggy client retry sending the same id can't silently
     *    overwrite an in-flight request).
     *  - `RequestRejectedByPolicyError` if a concurrency policy was
     *    registered for `opts.kind` and the new request can't be admitted:
     *    `onOverflow: "reject"` at capacity, the queue depth cap is
     *    exceeded, or the waiter's `queueTimeoutMs` elapsed.
     */
    begin(opts: BeginOpts): Promise<ManagedRequestContext>;
    /**
     * Register or replace the concurrency policy for a `RequestKind`.
     * Subsequent `begin(...)` calls for that kind run the policy before
     * allocating a controller / scope. One policy per kind — calling
     * twice replaces the previous declaration.
     *
     * @example
     *   r.policy({ kind: "completion", maxConcurrentPerModel: 1, onOverflow: "reject" });
     *   await using a = await r.begin({ requestId: "r-1", kind: "completion", modelId: "m1" });
     *   await r.begin({ requestId: "r-2", kind: "completion", modelId: "m1" });
     *   // → rejects with RequestRejectedByPolicyError (code 52420)
     */
    policy(opts: ConcurrencyPolicy): void;
    /** Look up an in-flight request by id. */
    get(requestId: string): RequestContext | null;
    /**
     * Snapshot of currently-tracked requests. Useful for diagnostics /
     * structured logs ("which requests are in flight right now?"). Returns
     * a fresh array; mutations on it are not observed by the registry.
     */
    list(): RequestContext[];
    /**
     * Cancel matching requests. Returns the number of contexts whose abort
     * was triggered by *this* call (already-cancelled contexts are skipped
     * so callers can rely on the count to log "n requests cancelled" once).
     *
     * For `{ modelId }` and an optional `kind`, cancels every active
     * request that matches the predicate. This is the broad-cancel path
     * the pre-registry `cancel({ modelId })` API maps to.
     */
    cancel(target: CancelTarget): number;
    /**
     * Cancel every active request — the worker-shutdown / model-unload
     * sweep. The reason is forwarded to each request as the abort reason
     * so handler logs can distinguish a normal cancel from a sweep.
     * Resolves once all targeted contexts have flipped to `"cancelling"`;
     * scope unwinding still happens on each handler's own dispose path.
     */
    cancelAll(reason: "shutdown" | "modelUnload"): Promise<void>;
    /**
     * Mark a request finished and dispose its scope. Equivalent to
     * `await ctx[Symbol.asyncDispose]()` with an explicit outcome.
     * Idempotent — calling `end` after a scope dispose is a no-op.
     */
    end(requestId: string, outcome: RequestOutcome): Promise<void>;
}
export declare function createRequestRegistry(options?: {
    /** Defaults to `getServerLogger()`. Tests inject a stub. */
    logger?: Logger;
}): RequestRegistry;
/**
 * Test-only knobs exported for `request-registry.test.ts` so the bound
 * assertions can pin the documented limits without re-reading them via
 * fragile string comparison. **Not part of the public SDK surface.**
 *
 * @internal
 */
export declare const __requestRegistryTestHooks: {
    cancelBeforeBeginMaxEntries: number;
    cancelBeforeBeginTtlMs: number;
    defaultMaxQueueDepthPerModel: number;
};
//# sourceMappingURL=request-registry.d.ts.map