/**
 * SearchEngine - Unified search engine supporting BM25, vector, and hybrid search.
 *
 * Provides search capabilities for Workspace, enabling keyword-based (BM25),
 * semantic (vector), and combined hybrid search across indexed content.
 */
import type { MastraVector } from '../../vector/index.js';
import type { LineRange } from '../line-utils.js';
import { BM25Index } from './bm25.js';
import type { BM25Config, TokenizeOptions } from './bm25.js';
/**
 * Search mode options
 */
export type SearchMode = 'vector' | 'bm25' | 'hybrid';
/**
 * Single-text embedder - takes one text and returns its embedding.
 *
 * This is the legacy embedder shape and remains the default. Each document is
 * embedded with a separate call.
 */
export interface SingleEmbedder {
    (text: string): Promise<number[]>;
}
/**
 * Batch-capable embedder - takes an array of texts and returns their embeddings
 * in the same order.
 *
 * Branded with `batch: true` so {@link SearchEngine} can detect batch support at
 * runtime and dispatch to a single batched embedder call instead of one call
 * per document. This dramatically speeds up large index rebuilds against
 * providers that support batch embedding (e.g. OpenAI's `embedMany`).
 *
 * @example
 * ```ts
 * import { embedMany } from 'ai';
 * import { openai } from '@ai-sdk/openai';
 *
 * const model = openai.embedding('text-embedding-3-small');
 * const embedder: BatchEmbedder = Object.assign(
 *   async (texts: string[]) => {
 *     const { embeddings } = await embedMany({ model, values: texts });
 *     return embeddings;
 *   },
 *   { batch: true as const, maxBatchSize: 2048 },
 * );
 * ```
 */
export interface BatchEmbedder {
    (texts: string[]): Promise<number[][]>;
    /** Brand that marks this embedder as batch-capable. */
    readonly batch: true;
    /**
     * Maximum number of texts the underlying provider accepts per call. When
     * unset, all pending texts are sent in a single request.
     */
    readonly maxBatchSize?: number;
}
/**
 * Embedder interface - either a legacy single-text embedder or a batch-capable
 * embedder branded with `batch: true`.
 */
export type Embedder = SingleEmbedder | BatchEmbedder;
/**
 * Type guard: returns true when the embedder is the batch-capable variant.
 */
export declare function isBatchEmbedder(embedder: Embedder): embedder is BatchEmbedder;
/**
 * Configuration for vector search
 */
export interface VectorConfig {
    /** Vector store for semantic search */
    vectorStore: MastraVector;
    /** Embedder function for generating vectors */
    embedder: Embedder;
    /** Index name for the vector store */
    indexName: string;
}
/**
 * Configuration for BM25 search
 */
export interface BM25SearchConfig {
    /** BM25 algorithm parameters */
    bm25?: BM25Config;
    /** Tokenization options */
    tokenize?: TokenizeOptions;
}
/**
 * A document to be indexed
 */
export interface IndexDocument {
    /** Unique identifier for this document */
    id: string;
    /** Text content to index */
    content: string;
    /** Optional metadata to store with the document */
    metadata?: Record<string, unknown>;
    /**
     * For chunked documents: the starting line number of this chunk in the original document.
     * When provided, lineRange in search results will be adjusted to reflect original document lines.
     * (1-indexed)
     */
    startLineOffset?: number;
}
/**
 * Base search result with common fields
 */
export interface SearchResult {
    /** Document identifier */
    id: string;
    /** Document content */
    content: string;
    /** Search score (0-1 for normalized results) */
    score: number;
    /** Line range where query terms appear */
    lineRange?: LineRange;
    /** Optional metadata */
    metadata?: Record<string, unknown>;
    /** Score breakdown by search type */
    scoreDetails?: {
        vector?: number;
        bm25?: number;
    };
}
/**
 * Options for searching
 */
export interface SearchOptions {
    /** Maximum number of results to return */
    topK?: number;
    /** Minimum score threshold */
    minScore?: number;
    /** Search mode: 'bm25', 'vector', or 'hybrid' */
    mode?: SearchMode;
    /** Weight for vector scores in hybrid search (0-1, default 0.5) */
    vectorWeight?: number;
    /** Filter for vector search */
    filter?: Record<string, unknown>;
}
/** Options for batch indexing */
export interface IndexManyOptions {
    /**
     * Maximum number of documents to index concurrently (embedder + vector upsert).
     * Must be a safe integer ≥ 1 (same rule as `p-map`).
     * @default 8
     */
    concurrency?: number;
    /**
     * When `true` (default), the first rejected `index` rejects the whole `indexMany` call.
     * When `false`, all documents are processed; if any failed, the promise rejects with an `AggregateError`.
     */
    stopOnError?: boolean;
}
/**
 * Configuration for SearchEngine
 */
export interface SearchEngineConfig {
    /** BM25 configuration (enables BM25 search) */
    bm25?: BM25SearchConfig;
    /** Vector configuration (enables vector search) */
    vector?: VectorConfig;
    /** Whether to use lazy vector indexing (default: false = eager) */
    lazyVectorIndex?: boolean;
}
export interface ChunkOptions {
    maxChunkChars?: number;
    overlapLines?: number;
}
export interface TextChunk {
    content: string;
    startLine: number;
}
/**
 * Split text into line-based chunks that stay within a character budget.
 *
 * Each chunk is formed by accumulating whole lines until adding the next line
 * would exceed `maxChunkChars`. Adjacent chunks share `overlapLines` lines so
 * that context around chunk boundaries is preserved for embedding quality.
 *
 * Returns the original text as a single chunk when it already fits.
 */
export declare function splitIntoChunks(text: string, options?: ChunkOptions): TextChunk[];
/**
 * Unified search engine supporting BM25, vector, and hybrid search.
 *
 * Used internally by Workspace to provide consistent search functionality.
 *
 * @example
 * ```typescript
 * const engine = new SearchEngine({
 *   bm25: { tokenize: { lowercase: true } },
 *   vector: { vectorStore, embedder, indexName: 'my-index' },
 * });
 *
 * // Index documents
 * await engine.index({ id: 'doc1', content: 'Hello world' });
 *
 * // Search
 * const results = await engine.search('hello', { mode: 'hybrid', topK: 5 });
 * ```
 */
export declare class SearchEngine {
    #private;
    constructor(config?: SearchEngineConfig);
    /**
     * Index a document for search
     */
    index(doc: IndexDocument): Promise<void>;
    /**
     * Index multiple documents (up to `concurrency` at a time when async vector work runs).
     *
     * @param docs - Documents to index
     * @param options - `p-map` options; `concurrency` defaults to 8
     */
    indexMany(docs: IndexDocument[], options?: IndexManyOptions): Promise<void>;
    /**
     * Remove a document from the index
     */
    remove(id: string): Promise<void>;
    /**
     * Remove all documents whose ID starts with the given prefix.
     * Used to remove all chunks belonging to a single source document.
     */
    removeByPrefix(prefix: string): Promise<void>;
    /**
     * Remove a source document and all of its chunked variants.
     *
     * This also attempts a metadata-based bulk delete for chunk vectors so stale
     * chunk IDs from previous process runs are cleaned up in persistent stores.
     */
    removeSource(sourceId: string): Promise<void>;
    /**
     * Clear all indexed documents
     */
    clear(): void;
    /**
     * Search for documents
     */
    search(query: string, options?: SearchOptions): Promise<SearchResult[]>;
    /**
     * Check if BM25 search is available
     */
    get canBM25(): boolean;
    /**
     * Check if vector search is available
     */
    get canVector(): boolean;
    /**
     * Check if hybrid search is available
     */
    get canHybrid(): boolean;
    /**
     * Get the BM25 index (for serialization/debugging)
     */
    get bm25Index(): BM25Index | undefined;
}
//# sourceMappingURL=search-engine.d.ts.map