/**
 * NLP Scoring Manager - Jaccard similarity and Shannon entropy for semantic analysis
 *
 * Implements intelligent document similarity scoring using:
 * - Jaccard similarity for vocabulary overlap
 * - Shannon entropy for information density
 * - Combined scoring for meaningful semantic relationships
 *
 * Key insights from analysis:
 * - High Jaccard (>60%) + Moderate entropy (4.5-6.0) = Same technical domain
 * - High Jaccard + Low entropy (<3.0) = Stop word pollution, superficial
 * - Low Jaccard + Similar entropy = Different domains, equally complex
 *
 * Part of Enhanced Capability Index (#1085)
 */
import { IndexConfigManager } from './config/IndexConfig.js';
/**
 * Scoring result with detailed metrics
 */
export interface ScoringResult {
    jaccard: number;
    entropy: number;
    combinedScore: number;
    interpretation: string;
    tokenCount: number;
    overlapCount: number;
}
/**
 * Pairwise similarity between two elements
 */
export interface PairwiseSimilarity {
    element1: string;
    element2: string;
    similarity: ScoringResult;
    timestamp: string;
}
/**
 * Configuration for scoring algorithm
 */
export interface ScoringConfig {
    minTokenLength: number;
    cacheExpiry: number;
    maxCacheSize: number;
    entropyBands: {
        low: number;
        moderate: number;
        high: number;
    };
    jaccardThresholds: {
        low: number;
        moderate: number;
        high: number;
    };
}
export declare class NLPScoringManager {
    private cache;
    private cacheAccessOrder;
    private config;
    private unicodeValidator;
    private cleanupInterval?;
    constructor(config?: Partial<ScoringConfig>, indexConfigManager?: IndexConfigManager);
    /**
     * Clean and tokenize text for analysis
     * Works with any language - no hardcoded stop words
     */
    private cleanAndTokenize;
    /**
     * Calculate Jaccard similarity between two text strings
     *
     * Jaccard = |A ∩ B| / |A ∪ B|
     *
     * Returns value between 0 (no overlap) and 1 (identical)
     */
    calculateJaccard(text1: string, text2: string): number;
    /**
     * Calculate Shannon entropy for text
     *
     * H(X) = -Σ p(x) * log2(p(x))
     *
     * Measures information density/vocabulary richness
     * Higher entropy = more diverse vocabulary
     */
    calculateEntropy(text: string): number;
    /**
     * Calculate combined relevance score using Jaccard and entropy
     *
     * Interprets the relationship between similarity and complexity
     */
    scoreRelevance(text1: string, text2: string): ScoringResult;
    /**
     * Build a pairwise similarity matrix for multiple texts
     *
     * Useful for clustering and relationship discovery
     */
    buildSimilarityMatrix(elements: Map<string, string>): Map<string, Map<string, ScoringResult>>;
    /**
     * Find most similar elements to a given text
     */
    findSimilar(targetText: string, candidates: Map<string, string>, topK?: number): Array<{
        name: string;
        score: ScoringResult;
    }>;
    /**
     * Extract key terms from text based on entropy contribution
     *
     * Terms that contribute most to entropy are likely important
     */
    extractKeyTerms(text: string, topK?: number): string[];
    /**
     * Add result to cache with LRU eviction
     */
    private addToCache;
    /**
     * Update access order for LRU tracking
     */
    private updateAccessOrder;
    /**
     * Clean expired cache entries
     */
    private cleanExpiredCache;
    /**
     * Clear the cache
     */
    clearCache(): void;
    /**
     * Get cache statistics
     */
    getCacheStats(): {
        size: number;
        oldestEntry: number | null;
    };
    dispose(): void;
}
//# sourceMappingURL=NLPScoringManager.d.ts.map