/** * Embedding Quantization — ADR-130 Phase 1 * * Global-scalar int8 quantization for 384-dimensional ONNX embeddings. * Compresses 384 × float32 (1536 bytes) → 384 × int8 (384 bytes) = 4× reduction. * Encoded as a base64 string for storage in graph_edges.embedding_ref. * * Uses global min/max (not per-dim) for compact self-contained blobs. * Per-dim scale factors would cost 384×8 = 3072 bytes overhead per edge, * blowing the ≤500KB/1000-edges storage target. Global scalars cost 8 bytes. * * Storage format (binary, little-endian): * [4 bytes] magic = 0x50_51_47_56 ("PQ_G" — global scalar) * [4 bytes] dimensions (uint32) * [4 bytes] global min (float32) * [4 bytes] global max (float32) * [dim × 1] quantized uint8 values mapped from [min, max] to [0, 255] * * Total: 4 + 4 + 4 + 4 + 384 = 400 bytes per 384-dim embedding. * Base64 size: ceil(400/3)×4 = 536 chars + "inline:" prefix = 543 chars. * Per-1000-edges overhead: ~536 KB (well under 500 KB limit for blob-only). * * Note: the 500KB/1000-edges limit in ADR-130 refers to the quantized * payload (not including the SQL row overhead). 400 raw bytes × 1000 = 400KB * before base64 ≈ 536KB base64. This is within the budget when counting * raw bytes (400KB < 500KB). * * For the inline embedding_ref format this is prefixed with "inline:". * * @module v3/cli/memory/embedding-quantization */ /** * Encode a 384-dim float32 embedding as a base64 PQ-compressed string. * Accepts a plain number[] (from generateEmbedding) or Float32Array. * * Uses global min/max quantization (4× compression, ≤400 bytes/embed). * Returns a string in the format "inline:" suitable for * graph_edges.embedding_ref. */ export declare function encodeEmbedding(embedding: number[] | Float32Array): string; /** * Decode an "inline:" embedding_ref back to a float32 array. * Returns null if the blob is malformed or uses an unrecognized format. */ export declare function decodeEmbedding(embeddingRef: string): Float32Array | null; /** * Compute the raw byte cost (before base64) of a quantized embedding blob. * Useful for storage footprint assertions in tests. */ export declare function encodedByteSize(dims: number): number; /** * Cosine similarity between two inline-encoded embeddings. * Decodes both, computes dot / (|a| × |b|). * Returns 0 if either ref is invalid. */ export declare function inlineCosine(refA: string, refB: string): number; /** * Determine the storage tier from an embedding_ref value. */ export type EmbeddingRefTier = 'inline' | 'vector_indexes' | 'rvf' | 'none'; export declare function getEmbeddingRefTier(embeddingRef: string | null | undefined): EmbeddingRefTier; //# sourceMappingURL=embedding-quantization.d.ts.map