/**
 * @module botbuilder-choices
 */
/**
 * Copyright (c) Microsoft Corporation. All rights reserved.  
 * Licensed under the MIT License.
 */

import { Token, TokenizerFunction, defaultTokenizer } from './tokenizer';
import { ModelResult } from './modelResult';

/**
 * :package: **botbuilder-choices**
 * 
 * Basic search options used to control how choices are recognized in a users utterance. 
 */
export interface FindValuesOptions {
    /** 
     * (Optional) if true, then only some of the tokens in a value need to exist to be considered 
     * a match. The default value is "false".
     */
    allowPartialMatches?: boolean;

    /**
     * (Optional) locale/culture code of the utterance. The default is `en-US`.
     */
    locale?: string;

    /** 
     * (Optional) maximum tokens allowed between two matched tokens in the utterance. So with
     * a max distance of 2 the value "second last" would match the utterance "second from the last"
     * but it wouldn't match "Wait a second. That's not the last one is it?". 
     * The default value is "2".  
     */
    maxTokenDistance?: number;

    /**
     * (Optional) tokenizer to use when parsing the utterance and values being recognized.  
     */
    tokenizer?: TokenizerFunction;
}

/**
 * :package: **botbuilder-choices**
 * 
 * INTERNAL: Raw search result returned by `findValues()`.
 */
export interface FoundValue {
    /** 
     * The value that was matched. 
     */
    value: string;

    /**
     * The index of the value that was matched.
     */
    index: number;
    
    /** 
     * The accuracy with which the value matched the specified portion of the utterance. A
     * value of 1.0 would indicate a perfect match.
     */
    score: number;
}

/** 
 * :package: **botbuilder-choices**
 * 
 * INTERNAL: A value that can be sorted and still refer to its original position within a source 
 * array. The `findChoices()` function expands the passed in choices to individual `SortedValue`
 * instances and passes them to `findValues()`. Each individual `Choice` can result in multiple
 * synonyms that should be searched for so this data structure lets us pass each synonym as a value
 * to search while maintaining the index of the choice that value came from. 
 */
export interface SortedValue {
    /** The value that will be sorted. */
    value: string;

    /** The values original position within its unsorted array. */
    index: number;
}

/** 
 * :package: **botbuilder-choices**
 * 
 * INTERNAL: Low-level function that searches for a set of values within an utterance. Higher level 
 * functions like `findChoices()` and `recognizeChoices()` are layered above this function.  In most 
 * cases its easier to just call one of the higher level functions instead but this function contains 
 * the fuzzy search algorithm that drives choice recognition.
 * @param utterance The text or user utterance to search over.
 * @param values List of values to search over.
 * @param options (Optional) options used to tweak the search that's performed. 
 */
export function findValues(utterance: string, values: SortedValue[], options?: FindValuesOptions): ModelResult<FoundValue>[] {
    function indexOfToken(token: Token, startPos: number): number {
        for (let i = startPos; i < tokens.length; i++) {
            if (tokens[i].normalized === token.normalized) {
                return i;
            }
        }
        return -1;
    }

    function matchValue(index: number, value: string, vTokens: Token[], startPos: number): ModelResult<FoundValue>|undefined {
        // Match value to utterance and calculate total deviation.
        // - The tokens are matched in order so "second last" will match in 
        //   "the second from last one" but not in "the last from the second one".
        // - The total deviation is a count of the number of tokens skipped in the 
        //   match so for the example above the number of tokens matched would be
        //   2 and the total deviation would be 1. 
        let matched = 0;
        let totalDeviation = 0;
        let start = -1;
        let end = -1;
        vTokens.forEach((token) => {
            // Find the position of the token in the utterance.
            const pos = indexOfToken(token, startPos);
            if (pos >= 0) {
                // Calculate the distance between the current tokens position and the previous tokens distance.
                const distance = matched > 0 ? pos - startPos : 0;
                if (distance <= maxDistance) {
                    // Update count of tokens matched and move start pointer to search for next token after
                    // the current token.
                    matched++;
                    totalDeviation += distance;
                    startPos = pos + 1;

                    // Update start & end position that will track the span of the utterance that's matched.
                    if (start < 0) { start = pos }
                    end = pos;
                }
            }
        });

        // Calculate score and format result
        // - The start & end positions and the results text field will be corrected by the caller.
        let result: ModelResult<FoundValue>|undefined;
        if (matched > 0 && (matched == vTokens.length || opt.allowPartialMatches)) {
            // Percentage of tokens matched. If matching "second last" in 
            // "the second from last one" the completeness would be 1.0 since
            // all tokens were found.
            const completeness = matched / vTokens.length;

            // Accuracy of the match. The accuracy is reduced by additional tokens
            // occurring in the value that weren't in the utterance. So an utterance
            // of "second last" matched against a value of "second from last" would
            // result in an accuracy of 0.5. 
            const accuracy = (matched / (matched + totalDeviation))

            // The final score is simply the completeness multiplied by the accuracy.
            const score = completeness * accuracy;

            // Format result
            result = {
                start: start,
                end: end,
                typeName: 'value',
                resolution: {
                    value: value,
                    index: index,
                    score: score
                } 
            } as ModelResult<FoundValue>;
        }
        return result;
    }

    // Sort values in descending order by length so that the longest value is searched over first.
    const list = values.sort((a, b) => b.value.length - a.value.length);
    
    // Search for each value within the utterance.
    let matches: ModelResult<FoundValue>[] = [];
    const opt = options || {};
    const tokenizer = (opt.tokenizer || defaultTokenizer);
    const tokens = tokenizer(utterance, opt.locale);
    const maxDistance = opt.maxTokenDistance !== undefined ? opt.maxTokenDistance : 2;
    list.forEach((entry, index) => {
        // Find all matches for a value
        // - To match "last one" in "the last time I chose the last one" we need 
        //   to re-search the string starting from the end of the previous match.
        // - The start & end position returned for the match are token positions.
        let startPos = 0;
        const vTokens = tokenizer(entry.value.trim(), opt.locale);
        while (startPos < tokens.length) {
            const match = matchValue(entry.index, entry.value, vTokens, startPos);
            if (match) {
                startPos = match.end + 1;
                matches.push(match);
            } else {
                break;
            }
        }
    });

    // Sort matches by score descending
    matches = matches.sort((a,b) => b.resolution.score - a.resolution.score);

    // Filter out duplicate matching indexes and overlapping characters.
    // - The start & end positions are token positions and need to be translated to 
    //   character positions before returning. We also need to populate the "text"
    //   field as well. 
    const results: ModelResult<FoundValue>[] = [];
    const foundIndexes: { [index: number]: boolean } = {};
    const usedTokens: { [index: number]: boolean } = {};
    matches.forEach((match) => {
        // Apply filters
        let add = !foundIndexes.hasOwnProperty(match.resolution.index);
        for (let i = match.start; i <= match.end; i++) {
            if (usedTokens[i]) {
                add = false;
                break;
            }
        }

        // Add to results
        if (add) {
            // Update filter info
            foundIndexes[match.resolution.index] = true;
            for (let i = match.start; i <= match.end; i++) { usedTokens[i] = true }

            // Translate start & end and populate text field
            match.start = tokens[match.start].start;
            match.end = tokens[match.end].end;
            match.text = utterance.substring(match.start, match.end + 1);
            results.push(match);
        }
    });

    // Return the results sorted by position in the utterance
    return results.sort((a,b) => a.start - b.start);
}
