/* eslint-disable no-underscore-dangle */
import { GbsSyntaxError, GbsWarning } from './exceptions';
import { Input, MultifileReader, SourceReader } from './reader';
import {
    T_AND,
    T_ARROW,
    T_ASSIGN,
    T_CASE,
    T_CHOOSE,
    T_COMMA,
    T_CONCAT,
    T_DIV,
    T_ELLIPSIS,
    T_ELSE,
    T_ELSEIF,
    T_EOF,
    T_EQ,
    T_FIELD,
    T_FOREACH,
    T_FUNCTION,
    T_GE,
    T_GETS,
    T_GT,
    T_IF,
    T_IN,
    T_INTERACTIVE,
    T_IS,
    T_LBRACE,
    T_LBRACK,
    T_LE,
    T_LET,
    T_LOWERID,
    T_LPAREN,
    T_LT,
    T_MATCHING,
    T_MINUS,
    T_MOD,
    T_NE,
    T_NOT,
    T_NUM,
    T_ON,
    T_OR,
    T_OTHERWISE,
    T_PIPE,
    T_PLUS,
    T_POW,
    T_PROCEDURE,
    T_PROGRAM,
    T_RANGE,
    T_RBRACE,
    T_RBRACK,
    T_RECORD,
    T_REPEAT,
    T_RETURN,
    T_RPAREN,
    T_SELECT,
    T_SEMICOLON,
    T_STRING,
    T_SWITCH,
    T_THEN,
    T_TIMEOUT,
    T_TIMES,
    T_TO,
    T_TYPE,
    T_UNDERSCORE,
    T_UPPERID,
    T_VARIANT,
    T_WHEN,
    T_WHILE,
    Token
} from './token';

import { ASTDef } from './ast';
import { i18n } from './i18n';

const isWhitespace = (chr: string): boolean =>
    chr === ' ' || chr === '\t' || chr === '\r' || chr === '\n';

const isDigit = (chr: string): boolean => chr >= '0' && chr <= '9';

/* We define a character to be alphabetic if it has two distinct forms:
 * an uppercase form and a lowercase form.
 *
 * This accepts alphabetic Unicode characters but rejects numbers and symbols.
 */
const isAlpha = (chr: string): boolean => chr.toUpperCase() !== chr.toLowerCase();

/* An uppercase character is an alphabetic character that coincides with
 * its uppercase form */
const isUpper = (chr: string): boolean => isAlpha(chr) && chr.toUpperCase() === chr;

/* A lowercase character is an alphabetic character that coincides with
 * its lowercase form */
const isLower = (chr: string): boolean => isAlpha(chr) && chr.toLowerCase() === chr;

const isIdent = (chr: string): boolean =>
    isAlpha(chr) || isDigit(chr) || chr === '_' || chr === "'";

const KEYWORDS = {
    program: T_PROGRAM,
    interactive: T_INTERACTIVE,
    procedure: T_PROCEDURE,
    function: T_FUNCTION,
    return: T_RETURN,
    /* Control structures */
    if: T_IF,
    then: T_THEN,
    elseif: T_ELSEIF,
    else: T_ELSE,
    choose: T_CHOOSE,
    when: T_WHEN,
    otherwise: T_OTHERWISE,
    repeat: T_REPEAT,
    foreach: T_FOREACH,
    in: T_IN,
    while: T_WHILE,
    switch: T_SWITCH,
    to: T_TO,
    matching: T_MATCHING,
    select: T_SELECT,
    on: T_ON,
    /* Assignment */
    let: T_LET,
    /* Operators */
    not: T_NOT,
    div: T_DIV,
    mod: T_MOD,
    /* Records/variants */
    type: T_TYPE,
    is: T_IS,
    record: T_RECORD,
    variant: T_VARIANT,
    case: T_CASE,
    field: T_FIELD,
    /* Default case in a switch/match */
    _: T_UNDERSCORE
};

/* Pattern for timeouts in an interactive program */
// eslint-disable-next-line @typescript-eslint/ban-types
const i: string | Function = i18n('CONS:TIMEOUT');
const t: string = typeof i === 'string' ? i : '';
KEYWORDS[t] = T_TIMEOUT;

/* Note: the order is relevant so that the 'maximal munch' rule applies. */
const SYMBOLS = [
    /* Various delimiters */
    { symbol: '(', tag: T_LPAREN },
    { symbol: ')', tag: T_RPAREN },
    { symbol: '{', tag: T_LBRACE },
    { symbol: '}', tag: T_RBRACE },
    { symbol: '[', tag: T_LBRACK }, // For lists and ranges
    { symbol: ']', tag: T_RBRACK },
    { symbol: ',', tag: T_COMMA },
    { symbol: ';', tag: T_SEMICOLON },
    { symbol: '...', tag: T_ELLIPSIS }, // For intentionally incomplete programs
    /* Range operator */
    { symbol: '..', tag: T_RANGE },
    /* Assignment */
    { symbol: ':=', tag: T_ASSIGN },
    /* Logical operators */
    { symbol: '&&', tag: T_AND },
    { symbol: '||', tag: T_OR },
    /* Fields */
    { symbol: '<-', tag: T_GETS }, // Field initializer, e.g. Coord(x <- 1, y <- 2)
    { symbol: '|', tag: T_PIPE }, // Field update, e.g. Coord(c | x <- 2)
    /* Pattern matching */
    { symbol: '->', tag: T_ARROW }, // For the branches of a switch
    /* Relational operators */
    { symbol: '==', tag: T_EQ },
    { symbol: '/=', tag: T_NE },
    { symbol: '<=', tag: T_LE },
    { symbol: '>=', tag: T_GE },
    { symbol: '<', tag: T_LT },
    { symbol: '>', tag: T_GT },
    /* Functions */
    { symbol: '++', tag: T_CONCAT },
    { symbol: '+', tag: T_PLUS },
    { symbol: '-', tag: T_MINUS },
    { symbol: '*', tag: T_TIMES },
    { symbol: '^', tag: T_POW }
];

/* Valid language options accepted by the LANGUAGE pragma */
const LANGUAGE_OPTIONS = ['DestructuringForeach', 'AllowRecursion'];

const leadingZeroes = (string: string): boolean => string.length >= 0 && string[0] === '0';

// eslint-disable-next-line @typescript-eslint/explicit-function-return-type
function fail(startPos: SourceReader, endPos: SourceReader, reason: string, args: any[]) {
    throw new GbsSyntaxError(startPos, endPos, reason, args);
}

const CLOSING_DELIMITERS = {
    '(': ')',
    '[': ']',
    '{': '}'
};

/* An instance of Lexer scans source code for tokens.
 * Example:
 *
 *     let tok = new Lexer('if (a)');
 *     tok.nextToken(); // ~~> new Token(T_IF, null, ...)
 *     tok.nextToken(); // ~~> new Token(T_LPAREN, null, ...)
 *     tok.nextToken(); // ~~> new Token(T_LOWERID, 'a', ...)
 *     tok.nextToken(); // ~~> new Token(T_RPAREN, null, ...)
 *     tok.nextToken(); // ~~> new Token(T_EOF, null, ...)
 *
 * The 'input' parameter is either a string or a mapping
 * from filenames to strings.
 */
export class Lexer {
    private _multifileReader: MultifileReader;
    private _reader: SourceReader;
    private _warnings: GbsWarning[];
    private _delimiterStack: any[];
    private _pendingAttributes: Record<string, ASTDef>;
    private _languageOptions: string[];

    public constructor(input: Input) {
        this._multifileReader = new MultifileReader(input);
        this._reader = this._multifileReader.readCurrentFile();
        this._warnings = [];

        /* A stack of tokens '(', '[' and '{', to provide more helpful
         * error reporting if delimiters are not balanced. */
        this._delimiterStack = [];

        /* A dictionary of pending attributes, set by the ATTRIBUTE pragma.
         * Pending attributes are used by the parser to decorate any procedure
         * or function definition. */
        this._pendingAttributes = {};

        /* A list of language options, enabled by the LANGUAGE pragma.
         * Language options are interpreted by the runner to initialize.
         * the remaining modules (linter, compiler, runtime, ...)
         * accordingly. */
        this._languageOptions = [];
    }

    /* Return the next token from the input */
    public nextToken(): Token {
        if (!this._findNextToken()) {
            const token = new Token(T_EOF, undefined, this._reader, this._reader);
            this._checkBalancedDelimiters(token);
            return token;
        }
        if (isDigit(this._reader.peek())) {
            const startPos = this._reader;
            const value = this._readStringWhile(isDigit);
            const endPos = this._reader;
            if (leadingZeroes(value) && value.length > 1) {
                fail(startPos, endPos, 'numeric-constant-should-not-have-leading-zeroes', []);
            }
            return new Token(T_NUM, value, startPos, endPos);
        } else if (isIdent(this._reader.peek())) {
            const startPos = this._reader;
            const value = this._readStringWhile(isIdent);
            const endPos = this._reader;
            if (value in KEYWORDS) {
                return new Token(KEYWORDS[value], value, startPos, endPos);
            } else if (isUpper(value[0])) {
                return new Token(T_UPPERID, value, startPos, endPos);
            } else if (isLower(value[0])) {
                return new Token(T_LOWERID, value, startPos, endPos);
            } else {
                fail(startPos, endPos, 'identifier-must-start-with-alphabetic-character', []);
            }
        } else if (this._reader.peek() === '"') {
            return this._readStringConstant();
        } else {
            return this._readSymbol();
        }
    }

    /* When tokenization is done, this function returns the list of all
     * the warnings collected during tokenization */
    public warnings(): GbsWarning[] {
        return this._warnings;
    }

    /* Skip whitespace and advance through files until the start of the next
     * token is found. Return false if EOF is found. */
    public _findNextToken(): boolean {
        for (;;) {
            this._ignoreWhitespaceAndComments();
            if (!this._reader.eof()) {
                break;
            }
            if (this._multifileReader.moreFiles()) {
                this._multifileReader.nextFile();
                this._reader = this._multifileReader.readCurrentFile();
            } else {
                return false;
            }
        }
        return true;
    }

    /* Read a string while the given condition holds for the current
     * character */
    // eslint-disable-next-line @typescript-eslint/ban-types
    public _readStringWhile(condition: Function): string {
        const result = [];
        while (!this._reader.eof()) {
            if (!condition(this._reader.peek())) {
                break;
            }
            result.push(this._reader.peek());
            this._reader = this._reader.consumeCharacter();
        }
        return result.join('');
    }

    /* Reads a quote-delimited string constant.
     * Escapes are recognized. */
    public _readStringConstant(): Token {
        const startPos = this._reader;
        const result = [];
        this._reader = this._reader.consumeCharacter();
        while (!this._reader.eof()) {
            const c = this._reader.peek();
            if (c === '"') {
                this._reader = this._reader.consumeCharacter();
                return new Token(T_STRING, result.join(''), startPos, this._reader);
            } else if (c === '\\') {
                this._reader = this._reader.consumeCharacter();
                if (this._reader.eof()) {
                    break;
                }
                const c2 = this._reader.peek();
                this._reader = this._reader.consumeCharacter();
                switch (c2) {
                    case 'a':
                        result.push('\u0007');
                        break;
                    case 'b':
                        result.push('\u0008');
                        break;
                    case 'f':
                        result.push('\u000c');
                        break;
                    case 'n':
                        result.push('\u000a');
                        break;
                    case 'r':
                        result.push('\u000d');
                        break;
                    case 't':
                        result.push('\u0009');
                        break;
                    case 'v':
                        result.push('\u000b');
                        break;
                    default:
                        result.push(c2);
                        break;
                }
            } else {
                result.push(c);
                this._reader = this._reader.consumeCharacter();
            }
        }
        fail(startPos, this._reader, 'unclosed-string-constant', []);
    }

    /* Read a symbol */
    public _readSymbol(): Token {
        for (const { symbol, tag } of SYMBOLS) {
            if (this._reader.startsWith(symbol)) {
                const startPos = this._reader;
                this._reader = this._reader.consumeString(symbol);
                const endPos = this._reader;
                const token = new Token(tag, symbol, startPos, endPos);
                this._checkBalancedDelimiters(token);
                return token;
            }
        }
        fail(this._reader, this._reader, 'unknown-token', [this._reader.peek()]);
    }

    public _ignoreWhitespaceAndComments(): void {
        while (this._ignoreWhitespace() || this._ignoreComments()) {
            /* continue */
        }
    }

    public _ignoreWhitespace(): boolean {
        if (!this._reader.eof() && isWhitespace(this._reader.peek())) {
            this._reader = this._reader.consumeCharacter();
            return true;
        } else {
            return false;
        }
    }

    /* Skips comments and pragmas, returns false if there are no comments */
    public _ignoreComments(): boolean {
        if (this._startSingleLineComment()) {
            this._ignoreSingleLineComment();
            return true;
        } else if (this._reader.startsWith('/*@')) {
            const startPos = this._reader;
            this._evaluatePragma(startPos, this._readInvisiblePragma('/*', '*/', '@'));
            return true;
        } else if (this._reader.startsWith('{-')) {
            this._ignoreMultilineComment('{-', '-}');
            return true;
        } else if (this._reader.startsWith('/*')) {
            this._ignoreMultilineComment('/*', '*/');
            return true;
        } else {
            return false;
        }
    }

    /* Returns true if a single-line comment starts here */
    public _startSingleLineComment(): boolean {
        return (
            this._reader.startsWith('--') ||
            this._reader.startsWith('//') ||
            this._reader.startsWith('#')
        );
    }

    /* Skips a single-line comment */
    public _ignoreSingleLineComment(): void {
        while (!this._reader.eof()) {
            this._reader = this._reader.consumeCharacter();
            if (this._reader.peek() === '\n') {
                break;
            }
        }
    }

    /* Skips a multiline comment with the given left/right delimiters.
     * Multi-line comments may be nested. */
    public _ignoreMultilineComment(left: string, right: string): void {
        let nesting = 0;
        const startPos = this._reader;
        while (!this._reader.eof()) {
            if (this._reader.startsWith(left)) {
                this._reader = this._reader.consumeString(left);
                nesting++;
            } else if (this._reader.startsWith(right)) {
                this._reader = this._reader.consumeString(right);
                nesting--;
                if (nesting === 0) {
                    return;
                }
            } else {
                this._reader = this._reader.consumeCharacter();
            }
        }
        fail(startPos, this._reader, 'unclosed-multiline-comment', []);
    }

    /* Read a pragma. A pragma is a comment delimited by the
     * given left   / *
     * and right    * /
     * comment delimiters.
     * It has N >= 0 parts delimited by the pragma delimiter   @
     *   @part1@part2@...@partN@
     */
    public _readInvisiblePragma(left: string, right: string, pragmaDelim: string): string[] {
        const pragma = [];
        const startPos = this._reader;
        this._reader = this._reader.consumeInvisibleString(left);
        this._reader = this._reader.consumeInvisibleString(pragmaDelim);
        while (!this._reader.eof()) {
            pragma.push(this._readInvisibleStringUntilDelimiter(pragmaDelim));
            this._reader = this._reader.consumeInvisibleString(pragmaDelim);
            if (this._reader.startsWith(right)) {
                this._reader = this._reader.consumeInvisibleString(right);
                return pragma;
            }
        }
        fail(startPos, this._reader, 'unclosed-multiline-comment', []);
    }

    /* Read an invisible string until the given delimiter is found */
    public _readInvisibleStringUntilDelimiter(delimiter: string): string {
        const startPos = this._reader;
        const result = [];
        while (!this._reader.eof()) {
            if (this._reader.peek() === delimiter) {
                return result.join('');
            }
            result.push(this._reader.peek());
            this._reader = this._reader.consumeInvisibleCharacter();
        }
        fail(startPos, this._reader, 'unclosed-multiline-comment', []);
    }

    public _evaluatePragma(startPos: SourceReader, pragma: string[]): void {
        if (pragma.length === 0) {
            this._emitWarning(startPos, this._reader, 'empty-pragma', []);
        } else if (pragma[0] === 'BEGIN_REGION') {
            const region = pragma[1];
            this._reader = this._reader.beginRegion(region);
        } else if (pragma[0] === 'END_REGION') {
            this._reader = this._reader.endRegion();
        } else if (pragma[0] === 'ATTRIBUTE' && pragma.length >= 2) {
            const key = pragma[1];
            const value = pragma.slice(2, pragma.length).join('@');
            this.setAttribute(key, value);
        } else if (pragma[0] === 'LANGUAGE' && pragma.length === 2) {
            const languageOption = pragma[1];
            this.addLanguageOption(languageOption);
        } else {
            this._emitWarning(startPos, this._reader, 'unknown-pragma', [pragma[0]]);
        }
    }

    public _emitWarning(
        startPos: SourceReader,
        endPos: SourceReader,
        reason: string,
        args: any[]
    ): void {
        this._warnings.push(new GbsWarning(startPos, endPos, reason, args));
    }

    /* Check that reading a delimiter keeps the delimiter stack balanced. */
    public _checkBalancedDelimiters(token: Token): void {
        if (token.tag === T_EOF && this._delimiterStack.length > 0) {
            const openingDelimiter = this._delimiterStack.pop();
            fail(
                openingDelimiter.startPos,
                openingDelimiter.endPos,
                'unmatched-opening-delimiter',
                [openingDelimiter.value]
            );
        } else if (token.tag === T_LPAREN || token.tag === T_LBRACE || token.tag === T_LBRACK) {
            this._delimiterStack.push(token);
        } else if (token.tag === T_RPAREN || token.tag === T_RBRACE || token.tag === T_RBRACK) {
            if (this._delimiterStack.length === 0) {
                fail(token.startPos, token.endPos, 'unmatched-closing-delimiter', [token.value]);
            }
            const openingDelimiter = this._delimiterStack.pop();
            if (CLOSING_DELIMITERS[openingDelimiter.value] !== token.value) {
                fail(
                    openingDelimiter.startPos,
                    openingDelimiter.endPos,
                    'unmatched-opening-delimiter',
                    [openingDelimiter.value]
                );
            }
        }
    }

    /*
     * Interface for handling attributes.
     *
     * The pragma ATTRIBUTE@key@value
     * establishes the attribute given by <key> to <value>.
     *
     * Whenever the parser finds a definition of the following kinds:
     *   procedure
     *   function
     *   program
     *   interactive program
     *   type
     * it gets decorated with the pending attributes.
     */
    public getPendingAttributes(): Record<string, ASTDef> {
        const a = this._pendingAttributes;
        this._pendingAttributes = {};
        return a;
    }

    // eslint-disable-next-line @typescript-eslint/explicit-module-boundary-types
    public setAttribute(key: string, value: any): void {
        this._pendingAttributes[key] = value;
    }

    /*
     * Interface for handling language options.
     *
     * The pragma LANGUAGE@option sets the given option.
     *
     * The runner module reads these options to initialize the
     * linter/compiler/runtime.
     */
    public getLanguageOptions(): string[] {
        return this._languageOptions;
    }

    public addLanguageOption(option: string): void {
        if (LANGUAGE_OPTIONS.indexOf(option) !== -1) {
            this._languageOptions.push(option);
        } else {
            fail(this._reader, this._reader, 'unknown-language-option', [option]);
        }
    }
}
