/** * Q-Learning Router for Task Routing * * Uses reinforcement learning to optimize task routing decisions * based on historical performance and context. * * Features: * - Caching for repeated task patterns (LRU cache) * - Optimized state space with feature hashing * - Epsilon decay with exponential annealing * - Experience replay buffer for stable learning * - Model persistence to .swarm/q-learning-model.json * * @module q-learning-router */ /** * Q-Learning Router Configuration */ export interface QLearningRouterConfig { /** Learning rate (default: 0.1) */ learningRate: number; /** Discount factor (default: 0.99) */ gamma: number; /** Initial exploration rate (default: 1.0) */ explorationInitial: number; /** Final exploration rate (default: 0.01) */ explorationFinal: number; /** Exploration decay steps (default: 10000) */ explorationDecay: number; /** Exploration decay type (default: 'exponential') */ explorationDecayType: 'linear' | 'exponential' | 'cosine'; /** Maximum states in Q-table (default: 10000) */ maxStates: number; /** Number of actions/routes (default: 8) */ numActions: number; /** Experience replay buffer size (default: 1000) */ replayBufferSize: number; /** Mini-batch size for replay (default: 32) */ replayBatchSize: number; /** Enable experience replay (default: true) */ enableReplay: boolean; /** Route cache size (default: 256) */ cacheSize: number; /** Cache TTL in milliseconds (default: 300000 = 5 minutes) */ cacheTTL: number; /** Model persistence path (default: '.swarm/q-learning-model.json') */ modelPath: string; /** Auto-save interval in updates (default: 100) */ autoSaveInterval: number; /** State space dimensionality for feature hashing (default: 64) */ stateSpaceDim: number; } /** * Route decision result */ export interface RouteDecision { /** Selected route/action */ route: string; /** Confidence score (0-1) */ confidence: number; /** Q-values for all routes */ qValues: number[]; /** Was exploration used */ explored: boolean; /** Route alternatives */ alternatives: Array<{ route: string; score: number; }>; } /** * Q-Learning Router for intelligent task routing * * Optimized with: * - LRU cache for repeated task patterns * - Feature hashing for efficient state space * - Exponential epsilon decay * - Prioritized experience replay * - Model persistence */ export declare class QLearningRouter { private config; private qTable; private epsilon; private stepCount; private updateCount; private avgTDError; private ruvectorEngine; private useNative; private replayBuffer; private replayBufferIdx; private totalExperiences; private routeCache; private cacheOrder; private cacheHits; private cacheMisses; private featureHashCache; constructor(config?: Partial); /** * Initialize the router, attempting to load ruvector native module * and restore persisted model if available */ initialize(): Promise; /** * Load model from persistence file */ loadModel(path?: string): Promise; /** * Save model to persistence file */ saveModel(path?: string): Promise; /** * Route a task based on its context * Uses LRU cache for repeated task patterns */ route(taskContext: string, explore?: boolean): RouteDecision; /** * Get cached route decision (LRU cache) */ private getCachedRoute; /** * Cache a route decision (LRU eviction) */ private cacheRoute; /** * Invalidate cache (call after significant Q-table updates) */ invalidateCache(): void; /** Invalidate a single state's cached route (called after its Q-values change * so the next route() reflects the update immediately). */ private invalidateCacheEntry; /** * Update Q-values based on feedback * Includes experience replay for stable learning */ update(taskContext: string, action: string, reward: number, nextContext?: string): number; /** * Internal Q-value update */ private updateQValue; /** * Add experience to circular replay buffer */ private addToReplayBuffer; /** * Perform prioritized experience replay * Samples mini-batch from buffer and updates Q-values */ private experienceReplay; /** * Sample a prioritized batch from replay buffer * Uses proportional prioritization */ private samplePrioritizedBatch; /** * Calculate epsilon using configured decay strategy */ private calculateEpsilon; /** * Get statistics including cache and replay buffer metrics */ getStats(): Record; /** * Reset the router (clears all learned data) */ reset(): void; /** * Export Q-table for persistence */ export(): Record; /** * Import Q-table from persistence */ import(data: Record): void; /** * Legacy hash function (kept for backward compatibility) */ private hashState; /** * Public state-key encoder (#2239). This is exactly what `route()` uses * to look up Q-values, so testing it directly is testing the binding the * Q-learner actually sees. */ getStateKey(context: string): string; /** * Optimized state hashing using feature extraction * Creates a more semantic representation of the task context */ private hashStateOptimized; /** * Extract feature vector from task context * Uses keyword matching and n-gram hashing */ private extractFeatures; /** * Convert feature vector to state key * Uses locality-sensitive hashing for similar contexts. * * #2239 — the previous fold was `((hash << 4) ^ q[i]) & 0x7fffffff` over 16 * 4-bit groups: each group i landed at bit 4·(15−i), so groups 0–7 (= the * entire keyword block, features 0–31) shifted past the 31-bit mask and were * discarded. Keyword-distinct tasks collapsed to one Q-state. We now use a * 32-bit FNV-1a hash, which mixes every quantized group losslessly into the * state key. Encoder version bumped to 2; see PersistedModel.encoderVersion. */ private featureVectorToKey; /** * MurmurHash3 32-bit implementation for n-gram hashing */ private murmurhash3; private getQValues; private getOrCreateEntry; private argmax; private softmaxConfidence; private pruneQTable; } /** * Factory function */ export declare function createQLearningRouter(config?: Partial): QLearningRouter; //# sourceMappingURL=q-learning-router.d.ts.map