import { GraphicsDevice } from 'playcanvas';
import type { SparseVoxelGrid } from '../voxel/sparse-voxel-grid';
/**
 * Separable 3D dilation on the GPU using a row-aligned dense bit grid
 * (1 bit per voxel, packed into u32 words; each row of bits along X starts
 * on a word boundary so per-word access is trivial). Each pass owns its
 * own `Compute` instance because their uniform buffers must not collide
 * within a single submit.
 */
declare class GpuDilation {
    private device;
    private dilateXShader;
    private dilateYZShader;
    private clearShader;
    private extractShader;
    private compactShader;
    private dilateXBindGroupFormat;
    private dilateYZBindGroupFormat;
    private clearBindGroupFormat;
    private extractBindGroupFormat;
    private compactBindGroupFormat;
    private slots;
    private srcTypesBuffer;
    private srcKeysBuffer;
    private srcLoBuffer;
    private srcHiBuffer;
    private srcTypesCapacity;
    private srcMasksCapacity;
    private srcMeta;
    /** Number of double-buffered dispatch slots. */
    static readonly NUM_SLOTS = 2;
    constructor(device: GraphicsDevice);
    private ensureSlotBuffers;
    /**
     * Dispatch a compute clear of `dst` to zero for the first `numWords` words.
     * Uses the command encoder so it's correctly ordered with subsequent
     * dilation passes (unlike `queue.writeBuffer`, which is queued separately
     * and would race against the dispatches).
     * @param slot - Per-chunk slot whose `clearCompute` pipeline is dispatched.
     * @param dst - Destination buffer to zero.
     * @param numWords - Number of leading u32 words to clear.
     */
    private dispatchClear;
    /**
     * Upload a `SparseVoxelGrid` to GPU storage buffers used by the extract
     * shader. Reuses the existing buffers if they're large enough; otherwise
     * destroys and reallocates. Designed to be called once per
     * `gpuDilate3` call (the same `src` is read across all chunks).
     * @param src - Source sparse grid to upload.
     */
    uploadSrc(src: SparseVoxelGrid): void;
    /** Free uploaded `src` buffers. Caller can call after `gpuDilate3` finishes. */
    releaseSrc(): void;
    private ensureSlotOutputBuffers;
    /**
     * Sparse-path submit. Reads from the previously-uploaded `src` (via
     * `uploadSrc`), runs extract → dilate → compact as GPU passes, and returns
     * Promises for the per-block `typesOut` (packed 2-bit) and `masksOut`
     * (lo/hi pairs). Caller integrates these into `dst` directly.
     * @param slotIdx - Round-robin slot index (`0..NUM_SLOTS-1`).
     * @param minBx - Outer chunk origin block X (in `src`'s block coords).
     * @param minBy - Outer chunk origin block Y.
     * @param minBz - Outer chunk origin block Z.
     * @param outerBx - Outer chunk size in blocks along X.
     * @param outerBy - Outer chunk size in blocks along Y.
     * @param outerBz - Outer chunk size in blocks along Z.
     * @param haloBx - Halo size in blocks along X (one side).
     * @param haloBy - Halo size in blocks along Y (one side).
     * @param haloBz - Halo size in blocks along Z (one side).
     * @param innerBx - Inner (output) region size in blocks along X.
     * @param innerBy - Inner region size in blocks along Y.
     * @param innerBz - Inner region size in blocks along Z.
     * @param halfExtentXZ - Dilation half-extent in voxels along X and Z.
     * @param halfExtentY - Dilation half-extent in voxels along Y.
     * @returns Promises for the inner region's packed types and `[lo, hi]` masks.
     */
    submitChunkSparse(slotIdx: number, minBx: number, minBy: number, minBz: number, outerBx: number, outerBy: number, outerBz: number, haloBx: number, haloBy: number, haloBz: number, innerBx: number, innerBy: number, innerBz: number, halfExtentXZ: number, halfExtentY: number): {
        types: Promise<Uint32Array>;
        masks: Promise<Uint32Array>;
    };
    private dispatchExtract;
    private dispatchCompact;
    private dispatchX;
    private dispatchYZ;
    destroy(): void;
}
export { GpuDilation };
