/// <reference types="@webgpu/types" />
import { HasReactive } from "@reactively/decorate";
import { Cache, ComposableShader, ValueOrFn } from "../util/Util.js";
import { WorkgroupScan } from "./WorkgroupScan.js";
import { BinOpModule } from "../util/BinOpModules.js";
/** Parameters to construct a {@link PrefixScan} instance.  */
export interface PrefixScanArgs {
    device: GPUDevice;
    /**
     * Source data to be scanned.
     *
     * A function returning the source buffer will be executed lazily,
     * and reexecuted if the function's `@reactively` source values change.
     */
    source: ValueOrFn<GPUBuffer>;
    /** {@inheritDoc PrefixScan#binOps} */
    binOps?: BinOpModule;
    /** {@inheritDoc PrefixScan#exclusive} */
    exclusive?: boolean;
    /** {@inheritDoc PrefixScan#initialValue} */
    initialValue?: number;
    /** {@inheritDoc PrefixScan#label} */
    label?: string;
    /** {@inheritDoc PrefixScan#forceWorkgroupLength} */
    forceWorkgroupLength?: number;
    /** {@inheritDoc PrefixScan#maxWorkgroups} */
    maxWorkgroups?: number;
    /** cache for GPUComputePipeline */
    pipelineCache?: <T extends object>() => Cache<T>;
}
/**
 * A cascade of shaders to do a prefix scan operation, based on a shader that
 * does a prefix scan of a workgroup sized chunk of data (e.g. 64 or 256 elements).
 *
 * The scan operation is parameterized by the module mechanism. The user can
 * instantiate a PrefixScan with sum to get prefix-sum, or use another module for
 * other parallel scan applications.
 *
 * For small data sets that fit in workgroup, only a single shader pass is needed.
 * For larger data sets, a sequence of shaders is orchestrated as follows:
 *
 *   1. One shader does a prefix scan on each workgroup sized chunk of data.
 *     It emits a partial prefix sum for each workgroup and single block level sum from each workgroup
 *   2. Another instance of the same shader does a prefix scan on the block sums from the previous shader.
 *     The end result is a set of block level prefix sums
 *   3. A final shader sums the block prefix sums back with the partial prefix sums
 *
 * For for very large data sets, steps 2 and 3 repeat heirarchically.
 * Each level of summing reduces the data set by a factor of the workgroup size.
 * So three levels handles e.g. 16M elements (256 ** 3) if the workgroup size is 256.
 *
 * @typeParam T - Type of elements returned from the scan
 */
export declare class PrefixScan<T = number> extends HasReactive implements ComposableShader {
    /** customize the type of scan (e.g. prefix sum on 32 bit floats) */
    binOps: BinOpModule;
    /** Source data to be scanned */
    source: GPUBuffer;
    /** Debug label attached to gpu objects for error reporting */
    label?: string;
    /** Override to set compute workgroup size e.g. for testing.
      @defaultValue max workgroup size of the `GPUDevice`
      */
    forceWorkgroupLength?: number;
    /** Override to set max number of workgroups for dispatch e.g. for testing.
      @defaultValue maxComputeWorkgroupsPerDimension from the `GPUDevice`
      */
    maxWorkgroups?: number;
    /** Inclusive scan accumulates a binary operation across all source elements.
     * Exclusive scan accumulates a binary operation across source elements, using initialValue
     * as the first element and stopping before the final source element.
     *
     * @defaultValue false (inclusive scan).
     */
    exclusive: boolean;
    /** Initial value for exclusive scan
     * @defaultValue 0
     */
    initialValue?: number;
    /** start index in src buffer of range to scan (0 if undefined) */
    /** end index (exclusive) in src buffer (src.length if undefined) */
    device: GPUDevice;
    private usageContext;
    /** cache for GPUComputePipeline or GPURenderPipeline */
    private pipelineCache?;
    /** Create a new scanner
     * @param args
     */
    constructor(args: PrefixScanArgs);
    commands(commandEncoder: GPUCommandEncoder): void;
    /** Release the scanResult buffer for destruction. */
    destroy(): void;
    /** Execute the prefix scan immediately and copy the results back to the CPU.
     * (results are copied from the {@link PrefixScan.result} GPUBuffer)
     * @returns the scanned result in an array
     */
    scan(): Promise<number[]>;
    /** Buffer containing results of the scan after the shader has run. */
    get result(): GPUBuffer;
    private get shaders();
    /** @internal */
    get _sourceScan(): WorkgroupScan;
    /**
     * Shaders to scan intermediate block sums.
     * Multiple levels of scanning may be required for large sums.
     * @internal
     */
    get _blockScans(): WorkgroupScan[];
    private get sourceSize();
    private get fitsInWorkGroup();
    private get workgroupLength();
    /** shader passes to apply block level sums to prefixes within the block */
    private get applyScans();
}
/**
 * TBD:
 *  . generator for one workgroup size? - I don't understand this one fully.
 *  . support for a debug error context
 *  . sharing bind groups? - no proposal here
 */