/**
 * Q-Learning Router for Task Routing
 *
 * Uses reinforcement learning to optimize task routing decisions
 * based on historical performance and context.
 *
 * Features:
 * - Caching for repeated task patterns (LRU cache)
 * - Optimized state space with feature hashing
 * - Epsilon decay with exponential annealing
 * - Experience replay buffer for stable learning
 * - Model persistence to .swarm/q-learning-model.json
 *
 * @module q-learning-router
 */
/**
 * Q-Learning Router Configuration
 */
export interface QLearningRouterConfig {
    /** Learning rate (default: 0.1) */
    learningRate: number;
    /** Discount factor (default: 0.99) */
    gamma: number;
    /** Initial exploration rate (default: 1.0) */
    explorationInitial: number;
    /** Final exploration rate (default: 0.01) */
    explorationFinal: number;
    /** Exploration decay steps (default: 10000) */
    explorationDecay: number;
    /** Exploration decay type (default: 'exponential') */
    explorationDecayType: 'linear' | 'exponential' | 'cosine';
    /** Maximum states in Q-table (default: 10000) */
    maxStates: number;
    /** Number of actions/routes (default: 8) */
    numActions: number;
    /** Experience replay buffer size (default: 1000) */
    replayBufferSize: number;
    /** Mini-batch size for replay (default: 32) */
    replayBatchSize: number;
    /** Enable experience replay (default: true) */
    enableReplay: boolean;
    /** Route cache size (default: 256) */
    cacheSize: number;
    /** Cache TTL in milliseconds (default: 300000 = 5 minutes) */
    cacheTTL: number;
    /** Model persistence path (default: '.swarm/q-learning-model.json') */
    modelPath: string;
    /** Auto-save interval in updates (default: 100) */
    autoSaveInterval: number;
    /** State space dimensionality for feature hashing (default: 64) */
    stateSpaceDim: number;
}
/**
 * Route decision result
 */
export interface RouteDecision {
    /** Selected route/action */
    route: string;
    /** Confidence score (0-1) */
    confidence: number;
    /** Q-values for all routes */
    qValues: number[];
    /** Was exploration used */
    explored: boolean;
    /** Route alternatives */
    alternatives: Array<{
        route: string;
        score: number;
    }>;
}
/**
 * Q-Learning Router for intelligent task routing
 *
 * Optimized with:
 * - LRU cache for repeated task patterns
 * - Feature hashing for efficient state space
 * - Exponential epsilon decay
 * - Prioritized experience replay
 * - Model persistence
 */
export declare class QLearningRouter {
    private config;
    private qTable;
    private epsilon;
    private stepCount;
    private updateCount;
    private avgTDError;
    private ruvectorEngine;
    private useNative;
    private replayBuffer;
    private replayBufferIdx;
    private totalExperiences;
    private routeCache;
    private cacheOrder;
    private cacheHits;
    private cacheMisses;
    private featureHashCache;
    constructor(config?: Partial<QLearningRouterConfig>);
    /**
     * Initialize the router, attempting to load ruvector native module
     * and restore persisted model if available
     */
    initialize(): Promise<void>;
    /**
     * Load model from persistence file
     */
    loadModel(path?: string): Promise<boolean>;
    /**
     * Save model to persistence file
     */
    saveModel(path?: string): Promise<boolean>;
    /**
     * Route a task based on its context
     * Uses LRU cache for repeated task patterns
     */
    route(taskContext: string, explore?: boolean): RouteDecision;
    /**
     * Get cached route decision (LRU cache)
     */
    private getCachedRoute;
    /**
     * Cache a route decision (LRU eviction)
     */
    private cacheRoute;
    /**
     * Invalidate cache (call after significant Q-table updates)
     */
    invalidateCache(): void;
    /** Invalidate a single state's cached route (called after its Q-values change
     * so the next route() reflects the update immediately). */
    private invalidateCacheEntry;
    /**
     * Update Q-values based on feedback
     * Includes experience replay for stable learning
     */
    update(taskContext: string, action: string, reward: number, nextContext?: string): number;
    /**
     * Internal Q-value update
     */
    private updateQValue;
    /**
     * Add experience to circular replay buffer
     */
    private addToReplayBuffer;
    /**
     * Perform prioritized experience replay
     * Samples mini-batch from buffer and updates Q-values
     */
    private experienceReplay;
    /**
     * Sample a prioritized batch from replay buffer
     * Uses proportional prioritization
     */
    private samplePrioritizedBatch;
    /**
     * Calculate epsilon using configured decay strategy
     */
    private calculateEpsilon;
    /**
     * Get statistics including cache and replay buffer metrics
     */
    getStats(): Record<string, number>;
    /**
     * Reset the router (clears all learned data)
     */
    reset(): void;
    /**
     * Export Q-table for persistence
     */
    export(): Record<string, {
        qValues: number[];
        visits: number;
    }>;
    /**
     * Import Q-table from persistence
     */
    import(data: Record<string, {
        qValues: number[];
        visits: number;
    }>): void;
    /**
     * Legacy hash function (kept for backward compatibility)
     */
    private hashState;
    /**
     * Public state-key encoder (#2239). This is exactly what `route()` uses
     * to look up Q-values, so testing it directly is testing the binding the
     * Q-learner actually sees.
     */
    getStateKey(context: string): string;
    /**
     * Optimized state hashing using feature extraction
     * Creates a more semantic representation of the task context
     */
    private hashStateOptimized;
    /**
     * Extract feature vector from task context
     * Uses keyword matching and n-gram hashing
     */
    private extractFeatures;
    /**
     * Convert feature vector to state key
     * Uses locality-sensitive hashing for similar contexts.
     *
     * #2239 — the previous fold was `((hash << 4) ^ q[i]) & 0x7fffffff` over 16
     * 4-bit groups: each group i landed at bit 4·(15−i), so groups 0–7 (= the
     * entire keyword block, features 0–31) shifted past the 31-bit mask and were
     * discarded. Keyword-distinct tasks collapsed to one Q-state. We now use a
     * 32-bit FNV-1a hash, which mixes every quantized group losslessly into the
     * state key. Encoder version bumped to 2; see PersistedModel.encoderVersion.
     */
    private featureVectorToKey;
    /**
     * MurmurHash3 32-bit implementation for n-gram hashing
     */
    private murmurhash3;
    private getQValues;
    private getOrCreateEntry;
    private argmax;
    private softmaxConfidence;
    private pruneQTable;
}
/**
 * Factory function
 */
export declare function createQLearningRouter(config?: Partial<QLearningRouterConfig>): QLearningRouter;
//# sourceMappingURL=q-learning-router.d.ts.map