///
import { HasReactive } from "@reactively/decorate";
import { Cache, ComposableShader, ValueOrFn } from "../util/Util.js";
import { WorkgroupScan } from "./WorkgroupScan.js";
import { BinOpModule } from "../util/BinOpModules.js";
/** Parameters to construct a {@link PrefixScan} instance. */
export interface PrefixScanArgs {
device: GPUDevice;
/**
* Source data to be scanned.
*
* A function returning the source buffer will be executed lazily,
* and reexecuted if the function's `@reactively` source values change.
*/
source: ValueOrFn;
/** {@inheritDoc PrefixScan#binOps} */
binOps?: BinOpModule;
/** {@inheritDoc PrefixScan#exclusive} */
exclusive?: boolean;
/** {@inheritDoc PrefixScan#initialValue} */
initialValue?: number;
/** {@inheritDoc PrefixScan#label} */
label?: string;
/** {@inheritDoc PrefixScan#forceWorkgroupLength} */
forceWorkgroupLength?: number;
/** {@inheritDoc PrefixScan#maxWorkgroups} */
maxWorkgroups?: number;
/** cache for GPUComputePipeline */
pipelineCache?: () => Cache;
}
/**
* A cascade of shaders to do a prefix scan operation, based on a shader that
* does a prefix scan of a workgroup sized chunk of data (e.g. 64 or 256 elements).
*
* The scan operation is parameterized by the module mechanism. The user can
* instantiate a PrefixScan with sum to get prefix-sum, or use another module for
* other parallel scan applications.
*
* For small data sets that fit in workgroup, only a single shader pass is needed.
* For larger data sets, a sequence of shaders is orchestrated as follows:
*
* 1. One shader does a prefix scan on each workgroup sized chunk of data.
* It emits a partial prefix sum for each workgroup and single block level sum from each workgroup
* 2. Another instance of the same shader does a prefix scan on the block sums from the previous shader.
* The end result is a set of block level prefix sums
* 3. A final shader sums the block prefix sums back with the partial prefix sums
*
* For for very large data sets, steps 2 and 3 repeat heirarchically.
* Each level of summing reduces the data set by a factor of the workgroup size.
* So three levels handles e.g. 16M elements (256 ** 3) if the workgroup size is 256.
*
* @typeParam T - Type of elements returned from the scan
*/
export declare class PrefixScan extends HasReactive implements ComposableShader {
/** customize the type of scan (e.g. prefix sum on 32 bit floats) */
binOps: BinOpModule;
/** Source data to be scanned */
source: GPUBuffer;
/** Debug label attached to gpu objects for error reporting */
label?: string;
/** Override to set compute workgroup size e.g. for testing.
@defaultValue max workgroup size of the `GPUDevice`
*/
forceWorkgroupLength?: number;
/** Override to set max number of workgroups for dispatch e.g. for testing.
@defaultValue maxComputeWorkgroupsPerDimension from the `GPUDevice`
*/
maxWorkgroups?: number;
/** Inclusive scan accumulates a binary operation across all source elements.
* Exclusive scan accumulates a binary operation across source elements, using initialValue
* as the first element and stopping before the final source element.
*
* @defaultValue false (inclusive scan).
*/
exclusive: boolean;
/** Initial value for exclusive scan
* @defaultValue 0
*/
initialValue?: number;
/** start index in src buffer of range to scan (0 if undefined) */
/** end index (exclusive) in src buffer (src.length if undefined) */
device: GPUDevice;
private usageContext;
/** cache for GPUComputePipeline or GPURenderPipeline */
private pipelineCache?;
/** Create a new scanner
* @param args
*/
constructor(args: PrefixScanArgs);
commands(commandEncoder: GPUCommandEncoder): void;
/** Release the scanResult buffer for destruction. */
destroy(): void;
/** Execute the prefix scan immediately and copy the results back to the CPU.
* (results are copied from the {@link PrefixScan.result} GPUBuffer)
* @returns the scanned result in an array
*/
scan(): Promise;
/** Buffer containing results of the scan after the shader has run. */
get result(): GPUBuffer;
private get shaders();
/** @internal */
get _sourceScan(): WorkgroupScan;
/**
* Shaders to scan intermediate block sums.
* Multiple levels of scanning may be required for large sums.
* @internal
*/
get _blockScans(): WorkgroupScan[];
private get sourceSize();
private get fitsInWorkGroup();
private get workgroupLength();
/** shader passes to apply block level sums to prefixes within the block */
private get applyScans();
}
/**
* TBD:
* . generator for one workgroup size? - I don't understand this one fully.
* . support for a debug error context
* . sharing bind groups? - no proposal here
*/