/**
 * WGSL sources for the 5 compute shaders that make up the GPU dilation
 * pipeline (extract → clear → dilateX → dilateZ → dilateY → compact),
 * plus the small block-type + Fibonacci-hash constants block that the
 * extract and compact shaders share. Plain TS template-string
 * composition — the constants block is interpolated into each consuming
 * shader via `${dilationConstants}` rather than going through the
 * engine's `#include` preprocessor.
 *
 * The orchestrator class lives in `gpu-dilation.ts` and imports each
 * `xxxWgsl()` generator individually.
 */
/**
 * Extract shader — converts a `SparseVoxelGrid` (uploaded as types + open-
 * addressed mask hash) directly into a row-aligned dense bit buffer for one
 * outer chunk. One thread per source block in the chunk's outer block range.
 *
 * For MIXED blocks the shader does Fibonacci-hash linear-probe lookup against
 * the uploaded `srcKeys`/`srcLo`/`srcHi` arrays (matches the CPU
 * `BlockMaskMap.slot` formula bit-for-bit). The block's 4×4 X-row pattern
 * lands in a single dense word at bit offset `(blockX*4) & 31`; multiple
 * blocks share the same dense word at non-overlapping bit positions, so the
 * write is `atomicOr`. Caller must clear the dense buffer first.
 *
 * @returns WGSL source for the extract compute shader.
 */
declare const extractWgsl: () => string;
/**
 * Compact shader — converts a dilated dense bit buffer back into per-block
 * `(type, lo, hi)` form for the chunk's INNER block region. One thread per
 * inner block; reads its 16 dense-word patterns to assemble the block's
 * 64-bit mask, classifies as EMPTY/SOLID/MIXED, and writes to two parallel
 * outputs:
 *   - `typesOut`: 2-bit-per-block packed (matches `dst.types` layout).
 *     Multiple threads write the same word, so atomicOr (caller clears).
 *   - `masksOut`: `[lo, hi]` pairs per inner block, indexed by inner-local
 *     block index. Always written (non-atomic; one thread per slot).
 *
 * @returns WGSL source for the compact compute shader.
 */
declare const compactWgsl: () => string;
/**
 * Clear shader — writes 0 to every word in the destination buffer up to
 * `numWords`. Dispatched in the same command encoder as the dilation passes
 * so it's ordered with them on the GPU; using `queue.writeBuffer` for inter-
 * pass clears would race because writes are queued separately from encoder
 * commands and execute *all writes first*, then the command buffer.
 *
 * @returns WGSL source for the clear compute shader.
 */
declare const clearWgsl: () => string;
/**
 * X-axis dilation shader — per-word.
 *
 * Each thread produces one 32-bit output word at `(xWord, y, z)` and writes
 * it directly (no atomics). The output bit at relative X position `b` (in
 * `[0, 31]`) is the OR of input bits in `[xWord*32 + b - r, xWord*32 + b + r]`.
 * For each distance `d` in `[1, r]`, the shader reads the source word(s)
 * containing bits shifted by `d`, so radii can span any number of 32-bit words.
 *
 * Bound by the chunk's `numXWords` (= ceil(nx / 32)). Out-of-bounds neighbors
 * are read as 0.
 *
 * @returns WGSL source for the X-axis dilation compute shader.
 */
declare const dilateXWgsl: () => string;
/**
 * Y/Z-axis dilation shader — per-word.
 *
 * Each thread reads up to `2 * halfExtent + 1` input words at the same
 * `xWord` along the chosen axis (Y or Z) and OR's them into one output word.
 * No bit shifts needed because words at the same `xWord` are bit-aligned
 * across rows (row stride is `numXWords` words). Caller picks the axis by
 * setting `stride` and `axisLen`:
 *  - Y-pass: `stride = numXWords`, `axisLen = ny`.
 *  - Z-pass: `stride = numXWords * ny`, `axisLen = nz`.
 *
 * @returns WGSL source for the Y/Z-axis dilation compute shader.
 */
declare const dilateYZWgsl: () => string;
export { extractWgsl, compactWgsl, clearWgsl, dilateXWgsl, dilateYZWgsl };