/// import { HasReactive } from "@reactively/decorate"; import { Cache, ComposableShader, ValueOrFn } from "../util/Util.js"; import { WorkgroupScan } from "./WorkgroupScan.js"; import { BinOpModule } from "../util/BinOpModules.js"; /** Parameters to construct a {@link PrefixScan} instance. */ export interface PrefixScanArgs { device: GPUDevice; /** * Source data to be scanned. * * A function returning the source buffer will be executed lazily, * and reexecuted if the function's `@reactively` source values change. */ source: ValueOrFn; /** {@inheritDoc PrefixScan#binOps} */ binOps?: BinOpModule; /** {@inheritDoc PrefixScan#exclusive} */ exclusive?: boolean; /** {@inheritDoc PrefixScan#initialValue} */ initialValue?: number; /** {@inheritDoc PrefixScan#label} */ label?: string; /** {@inheritDoc PrefixScan#forceWorkgroupLength} */ forceWorkgroupLength?: number; /** {@inheritDoc PrefixScan#maxWorkgroups} */ maxWorkgroups?: number; /** cache for GPUComputePipeline */ pipelineCache?: () => Cache; } /** * A cascade of shaders to do a prefix scan operation, based on a shader that * does a prefix scan of a workgroup sized chunk of data (e.g. 64 or 256 elements). * * The scan operation is parameterized by the module mechanism. The user can * instantiate a PrefixScan with sum to get prefix-sum, or use another module for * other parallel scan applications. * * For small data sets that fit in workgroup, only a single shader pass is needed. * For larger data sets, a sequence of shaders is orchestrated as follows: * * 1. One shader does a prefix scan on each workgroup sized chunk of data. * It emits a partial prefix sum for each workgroup and single block level sum from each workgroup * 2. Another instance of the same shader does a prefix scan on the block sums from the previous shader. * The end result is a set of block level prefix sums * 3. A final shader sums the block prefix sums back with the partial prefix sums * * For for very large data sets, steps 2 and 3 repeat heirarchically. * Each level of summing reduces the data set by a factor of the workgroup size. * So three levels handles e.g. 16M elements (256 ** 3) if the workgroup size is 256. * * @typeParam T - Type of elements returned from the scan */ export declare class PrefixScan extends HasReactive implements ComposableShader { /** customize the type of scan (e.g. prefix sum on 32 bit floats) */ binOps: BinOpModule; /** Source data to be scanned */ source: GPUBuffer; /** Debug label attached to gpu objects for error reporting */ label?: string; /** Override to set compute workgroup size e.g. for testing. @defaultValue max workgroup size of the `GPUDevice` */ forceWorkgroupLength?: number; /** Override to set max number of workgroups for dispatch e.g. for testing. @defaultValue maxComputeWorkgroupsPerDimension from the `GPUDevice` */ maxWorkgroups?: number; /** Inclusive scan accumulates a binary operation across all source elements. * Exclusive scan accumulates a binary operation across source elements, using initialValue * as the first element and stopping before the final source element. * * @defaultValue false (inclusive scan). */ exclusive: boolean; /** Initial value for exclusive scan * @defaultValue 0 */ initialValue?: number; /** start index in src buffer of range to scan (0 if undefined) */ /** end index (exclusive) in src buffer (src.length if undefined) */ device: GPUDevice; private usageContext; /** cache for GPUComputePipeline or GPURenderPipeline */ private pipelineCache?; /** Create a new scanner * @param args */ constructor(args: PrefixScanArgs); commands(commandEncoder: GPUCommandEncoder): void; /** Release the scanResult buffer for destruction. */ destroy(): void; /** Execute the prefix scan immediately and copy the results back to the CPU. * (results are copied from the {@link PrefixScan.result} GPUBuffer) * @returns the scanned result in an array */ scan(): Promise; /** Buffer containing results of the scan after the shader has run. */ get result(): GPUBuffer; private get shaders(); /** @internal */ get _sourceScan(): WorkgroupScan; /** * Shaders to scan intermediate block sums. * Multiple levels of scanning may be required for large sums. * @internal */ get _blockScans(): WorkgroupScan[]; private get sourceSize(); private get fitsInWorkGroup(); private get workgroupLength(); /** shader passes to apply block level sums to prefixes within the block */ private get applyScans(); } /** * TBD: * . generator for one workgroup size? - I don't understand this one fully. * . support for a debug error context * . sharing bind groups? - no proposal here */