import { GraphicsDevice } from 'playcanvas';
import { type Projection } from '../render/camera';
/**
 * Configuration for a `GpuSplatRasterizer`. Fixed across the lifetime of
 * a render — `numSHBands` and the group tile dimensions determine GPU
 * buffer sizes and shader uniform layouts.
 *
 * Sizes are expressed as a "group" tile rectangle (`groupTilesX ×
 * groupTilesY`). For a single-pass render the group covers the whole
 * image, so the buffers are exactly image-sized. The group abstraction
 * is retained as a hook for future subframe splitting (each subframe is
 * an independent group sharing the global depth sort) — the project
 * shader's group-AABB cull and group-pixel-origin uniforms still
 * exercise this code path.
 */
interface SplatRasterizerOptions {
    /** Number of SH bands above DC (0–3). Determines input stride. */
    numSHBands: 0 | 1 | 2 | 3;
    /**
     * Camera projection mode. Specializes the project, emit-pairs and
     * rasterize-binned shaders. `pinhole` (default) uses the classical
     * perspective + EWA Jacobian path; `equirect` uses spherical
     * (atan2/asin) screen mapping, a non-linear Jacobian, radial view
     * depth, and tile-bin / rasterize paths that wrap the X axis at the
     * ±π longitude seam.
     */
    projection: Projection;
    /** Tiles per group along X (≤ imageTilesX). Sizes runningState/output. */
    groupTilesX: number;
    /** Tiles per group along Y (≤ imageTilesY). Sizes runningState/output. */
    groupTilesY: number;
    /** Max gaussians per chunk; sizes the input + projection + pair buffers. */
    chunkCap: number;
    /**
     * Hard upper bound on per-splat tile coverage. The project shader
     * clamps `coverage[i] = min(rawBboxArea, maxCoveragePerSplat)`, so
     * the pair buffer is bounded by `chunkCap × maxCoveragePerSplat`
     * regardless of scene/screen size. If the cap ever bites, the
     * emit-pairs shader walks the bbox row-major and stops once it
     * has written `coverage[i]` pairs — i.e. it truncates the bbox at
     * its bottom-right corner.
     *
     * The orchestrator sets this to the group's full tile area so the
     * clamp is geometrically unreachable (any in-group bbox ≤ group
     * area ≤ cap), making truncation a non-issue in practice. The cap
     * is retained as a defensive ceiling on the pair buffer.
     */
    maxCoveragePerSplat: number;
    /** Output image width in pixels (constant per render). */
    imageWidth: number;
    /** Output image height in pixels (constant per render). */
    imageHeight: number;
    /** Near plane distance in world units. */
    near: number;
    /** Camera basis: rows are (right, down, forward) of the world→camera rotation. */
    rightX: number;
    rightY: number;
    rightZ: number;
    downX: number;
    downY: number;
    downZ: number;
    forwardX: number;
    forwardY: number;
    forwardZ: number;
    /** Camera eye position in world space. */
    eyeX: number;
    eyeY: number;
    eyeZ: number;
    /** Focal lengths in pixel units. */
    focalX: number;
    focalY: number;
    /**
     * Camera-space Z of the focus plane, world units. Pinhole-only;
     * unused when `projection === 'equirect'`.
     */
    focusDistance: number;
    /**
     * DoF strength as a pixel-space scalar: the CoC radius in pixels when
     * `|1 − focusDistance/cz| = 1`. `0` disables defocus. The writer
     * derives this from `--f-stop` + `--sensor-size` using the thin-lens
     * CoC formula. Pinhole-only.
     */
    apertureScale: number;
    /** RGBA background, each channel in [0, 1]. */
    bgR: number;
    bgG: number;
    bgB: number;
    bgA: number;
}
/**
 * GPU-accelerated splat rasterizer.
 *
 * Owns eight compute shaders — project, prefix-sum, emit-pairs,
 * prepare-indirect, init-tile-offsets, find-boundaries, rasterize-binned,
 * finalize-pack — a shared `ComputeRadixSort` (used in indirect mode,
 * key + value), and GPU buffers. The per-chunk pipeline is fully
 * GPU-resident: the caller never reads back coverage, sorted keys, or
 * tile offsets.
 *
 * Per-render flow:
 *   1. `beginGroup(...)` — clears the running state and sets uniforms
 *      for this group (covers the whole image for a single-pass render).
 *   2. For each chunk of depth-sorted splats: `dispatchChunk(data,
 *      chunkSize)` runs the whole tile-bin + rasterize pipeline in one
 *      submission — project + coverage → prefix-sum (writes emitOffsets
 *      + totalPairs) → emit-pairs → prepare-indirect → radix sortIndirect
 *      → init-tile-offsets → find-boundaries → rasterize-binned. No
 *      readbacks; one `submit()` per chunk to capture each compute's
 *      uniform state before the next chunk overwrites it.
 *   3. `finishGroup()` — dispatches finalize-pack and starts an async
 *      readback. Returns a `Promise<Uint8Array>` resolved when the GPU has
 *      finished writing this group's RGBA bytes.
 */
declare class GpuSplatRasterizer {
    private device;
    private options;
    private projectShader;
    private prefixSumShader;
    private emitPairsShader;
    private prepareIndirectShader;
    private initTileOffsetsShader;
    private findBoundariesShader;
    private rasterizeBinnedShader;
    private finalizeShader;
    private projectBgFormat;
    private prefixSumBgFormat;
    private emitPairsBgFormat;
    private prepareIndirectBgFormat;
    private initTileOffsetsBgFormat;
    private findBoundariesBgFormat;
    private rasterizeBinnedBgFormat;
    private finalizeBgFormat;
    private buffers;
    /**
     * Single shared `ComputeRadixSort` for the GPU tile-bin pipeline.
     * Used in key+value mode: tile-index keys + splat-index values.
     */
    private radixSort;
    /** sortIndirect numBits, derived from numTiles (multiple of 4). */
    private sortKeyBits;
    private clearStatePattern;
    /** Active group's tile dimensions, set by `beginGroup`. */
    private activeTilesX;
    private activeTilesY;
    /** Floats per gaussian in the input buffer (depends on SH band count). */
    readonly inputStride: number;
    /** Group tile dimensions (X). */
    readonly groupTilesX: number;
    /** Group tile dimensions (Y). */
    readonly groupTilesY: number;
    /** Max gaussians per chunk. */
    readonly chunkCap: number;
    /** Pixels per group axis (X). */
    readonly groupPixelW: number;
    /** Pixels per group axis (Y). */
    readonly groupPixelH: number;
    constructor(device: GraphicsDevice, options: SplatRasterizerOptions);
    /**
     * Apply the global (camera + image + background) uniforms to every
     * pipeline compute instance, plus the per-group origin/extent fields.
     *
     * The group abstraction is retained as a hook for future subframe
     * rendering — when a render is split into multiple groups, each call
     * sets the current group's pixel rectangle so the project shader's
     * AABB cull skips splats outside the group.
     *
     * @param groupX - Group index along X.
     * @param groupY - Group index along Y.
     * @param groupTilesX - Number of tiles in this group along X.
     * @param groupTilesY - Number of tiles in this group along Y.
     */
    private setUniforms;
    /**
     * Begin processing a group. Clears running state and sets uniforms.
     *
     * @param groupX - Group index along X.
     * @param groupY - Group index along Y.
     * @param groupTilesX - Number of tiles in this group along X.
     * @param groupTilesY - Number of tiles in this group along Y.
     */
    beginGroup(groupX: number, groupY: number, groupTilesX: number, groupTilesY: number): void;
    /**
     * Commit pending GPU work. Called at chunk boundaries so each chunk's
     * uniform-buffer values are captured before the next chunk overwrites
     * them — a `Compute` instance's persistent uniform buffer is updated
     * by `setParameter`, and the dispatch only captures the value on
     * submit. Within a chunk, every dispatch uses a distinct `Compute`
     * instance, so no internal submits are needed.
     */
    submit(): void;
    /**
     * Reserve a fresh sort + find-boundaries slot pair in the device's
     * indirect-dispatch buffer for this chunk. The returned indices are
     * consumed by `dispatchTileBinChunk` (internally) and exposed for
     * cross-cutting use (e.g. the radix sort needs the sort slot).
     *
     * @returns Two fresh slot indices in the device's indirect dispatch
     * buffer: one for the radix sort's indirect dispatch, one for the
     * find-boundaries indirect dispatch.
     */
    private acquireIndirectSlots;
    /**
     * Dispatch the entire per-chunk tile-bin + rasterize pipeline on the
     * GPU with zero CPU readbacks:
     *
     *   pack-and-upload → project + coverage → prefix-sum (writes
     *   emitOffsets + totalPairs) → emit-pairs (writes tileKeys +
     *   splatValues) → prepare-indirect (writes workgroup counts into
     *   the device's indirect-dispatch buffer for the sort and
     *   find-boundaries) → radix sortIndirect (key+value: tileKeys
     *   sorted, splatValues reordered) → init tile-offsets to sentinel
     *   → find-boundaries (atomicMin) → rasterize.
     *
     * All eight dispatches use distinct `Compute` instances, so their
     * persistent uniform buffers don't alias each other within a chunk;
     * a single `submit()` after the rasterize captures everything before
     * the next chunk starts overwriting `setParameter` values.
     *
     * @param chunkData - Float32Array containing `chunkSize × inputStride` floats.
     * @param chunkSize - Number of gaussians in this chunk (≤ chunkCap).
     */
    dispatchChunk(chunkData: Float32Array, chunkSize: number): void;
    /**
     * Finish processing a group. Dispatches finalize-pack and starts an
     * async readback of the group's RGBA8 pixel bytes.
     *
     * Dispatch + readback are sized to the ACTIVE group dimensions (set
     * by the most recent `beginGroup`), not the constructor-provided
     * maximum, so edge sub-frames smaller than the max don't pay for
     * unused workgroups or readback bytes.
     *
     * @returns Promise resolving to the active group's RGBA byte buffer
     * (`activeTilesX·16 × activeTilesY·16 × 4` bytes).
     */
    finishGroup(): Promise<Uint8Array>;
    /**
     * Release all GPU resources.
     */
    destroy(): void;
}
export { GpuSplatRasterizer, type SplatRasterizerOptions };
