core/parser.js - MathLive Docs

            
            
/**
 * @private
 */

define(['mathlive/core/definitions', 
    'mathlive/core/color', 
    'mathlive/core/fontMetrics', 
    'mathlive/core/lexer', 
    'mathlive/core/mathAtom'],
    function(Definitions, Color, FontMetrics, Lexer, MathAtomModule) {

const MathAtom = MathAtomModule.MathAtom;

/**
 * A parser transforms a list of tokens into a list of MathAtom.
 * 
 * @param {InputToken[]} tokens - An array of tokens generated by the lexer.
 * @param {Object} [args] - An optional list of arguments. `#n` tokens will be 
 * substituted with the corresponding element in the args array. This is used
 * when parsing macros.
 * @property {Object} [macros] - Optional macro definitions.
 * @class Parser
 * @global
 * @property {InputToken[]} tokens - An array of tokens generated by the lexer.
 * @property {Object} args - Optional arguments to substitute the `#` token.
 * @property {Object} macros - A dictionary of objects, index by the name of 
 * the macro, with the following keys:
 *    * args: an integer, the number of arguments, default 0. They can be referenced as #0, 
 *       #1, #2... inside the definition of the macro
 *    * def: a string, the definition of the macro, which can reference other macros
 * @property {number} index - The current token to be parsed: index in `this.tokens`
 * @property {MathAtom[]} mathList - Accumulated result of the parsing by 
 * `parseAtom()`
 * @property {string} parseMode - The parse mode indicates the syntax rules to 
 * use to parse the upcoming tokens.
 *  Valid values include:
 *  - `'math'`: spaces are ignored, math functions are allowed 
 *  - `'text'`: spaces are accounted for, math functions are ignored
 *  - `'string'`
 *  - `'color'`: color name, hex value: `'#fff'`, `'#a0a0a0'`
 *  - `'number'`: `+/-12.56`
 *  - `'dimen'`: `'25mu'`, `'2pt'`
 *  - `'skip'`: `'25mu plus 2em minus fiLll'`, `'2pt'`
 *  - `'colspec'`: formating of a column in tabular environment, e.g. `'r@{.}l'` 
 * @property {boolean} tabularMode - When in tabular mode, `'&'` is interpreted as
 *  a column separator and `'\'` as a row separator. Used for matrixes, etc...
 * @property {number} endCount - Counter to prevent deadlock. If `end()` is 
 * called too many times (1,000) in a row for the same token, bail.
 * @private
 */
function Parser(tokens, args, macros) {
    this.tokens = tokens;
    this.index = 0;
    this.args = args;
    this.macros = macros;
    this.mathList = [];
    this.parseMode = 'math';
    this.tabularMode = false;
    this.endCount = 0;
}

Parser.prototype.swapMathList = function(newMathList) {
    const result = this.mathList;
    this.mathList = newMathList || [];
    return result;
}

Parser.prototype.swapParseMode = function(mode) {
    const result = this.parseMode;
    this.parseMode = mode;
    return result;
}

/**
 * True if we've reached the end of the token stream.
 * @method Parser#end
 * @private
 */
Parser.prototype.end = function() {
    // To prevent a deadlock, count how many times end() is called without the 
    // index advancing. If it happens more than 1,000 times in a row, 
    // assume something is broken and pretend the stream is finished.
    this.endCount++;
    return this.index >= this.tokens.length || this.endCount > 1000;
}

Parser.prototype.get = function() {
    this.endCount = 0;
    return this.index < this.tokens.length ? this.tokens[this.index++] : null;
}

Parser.prototype.peek = function(offset) {
    const index = this.index + (offset ? offset : 0);
    return index < this.tokens.length ? this.tokens[index] : null;
}

/**
 * Return the last atom of the math list.
 * If force is true (or undefined) and the list is empty, a new empty
 * atom is created and returned as the result.
 * @method Parser#lastMathAtom
 */
Parser.prototype.lastMathAtom = function() {
    if (this.mathList.length === 0 || 
        this.mathList[this.mathList.length - 1].type !== 'mop') {
        // ZERO WIDTH SPACE
        const lastAtom = new MathAtom(this.parseMode, 'msubsup', '\u200b', 'main');
        lastAtom.attributes = {
            "aria-hidden": true
        };
        this.mathList.push(lastAtom);  
    }
    return this.mathList[this.mathList.length - 1];
}

/**
 * @param {string} type
 * @return {boolean} True if the next token is of the specified type
 * @method Parser#hasToken
 */
Parser.prototype.hasToken = function(type) {
    const index = this.index;
    return index < this.tokens.length ? 
        this.tokens[index].type === type : false;
}

/**
 * @param {string} [value]
 * @return {boolean} True if the next token is of type `'literal` and has the
 * specified value. If `value` is empty, return true if the token is of type
 * `'literal'`
 * @method Parser#hasLiteral
 */
Parser.prototype.hasLiteral = function(value) {
    const index = this.index;
    return index < this.tokens.length ? 
        this.tokens[index].type === 'literal' &&
        (!value || this.tokens[index].value === value) : false;
}

/**
 * @param {RegEx} pattern
 * @return {boolean} True if the next token is of type `'literal` and matches 
 * the specified regular expression pattern.
 * @method Parser#hasLiteralPattern
 */
Parser.prototype.hasLiteralPattern = function(pattern) {
    return this.hasToken('literal') && 
        pattern.test(this.tokens[this.index].value);
}

Parser.prototype.hasCommand = function(command) {
    console.assert(command === '\\' || command.charAt(0) !== '\\',
        'hasCommand() does not require a \\');

    const index = this.index;
    return index < this.tokens.length ? 
        this.tokens[index].type === 'command' &&
        this.tokens[index].value === command : false;
}

Parser.prototype.hasInfixCommand = function() {
    const index = this.index;
    if (index < this.tokens.length && 
        this.tokens[index].type === 'command') {
        const info = Definitions.getInfo('\\' + this.tokens[index].value, 
            this.parseMode, this.macros);
        return info && info.infix;
    }
    return false;
}




Parser.prototype.hasColumnSeparator = function() {
    const index = this.index;
    return this.tabularMode && index < this.tokens.length ? 
        this.tokens[index].type === 'literal' &&
        this.tokens[index].value === '&' : false;
}

Parser.prototype.hasRowSeparator = function() {
    const index = this.index;
    return this.tabularMode && index < this.tokens.length ? 
        this.tokens[index].type === 'command' &&
        (this.tokens[index].value === '\\' || 
        this.tokens[index].value === 'cr') : false;
}

Parser.prototype.parseColumnSeparator = function() {
    if (this.hasColumnSeparator()) {
        this.index++;
        return true;
    }
    return false;
}


/**
 * Return the appropriate value for a placeholder, either a default
 * one, or if a value was provided for #? via args, that value.
 */
Parser.prototype.placeholder = function() {
    if (this.args && typeof this.args['?'] === 'string') {
        // If there is a specific value defined for the placeholder, 
        // use it.
        return parseTokens(Lexer.tokenize(this.args['?']), this.parseMode, null, this.macros);
    }
    // U+2753 = BLACK QUESTION MARK ORNAMENT  
    return [new MathAtom(this.parseMode, 'placeholder', '?')];
}


const SIZING_COMMANDS = [
    'tiny', 'scriptsize', 'footnotesize', 'small', 
    'normalsize',
    'large', 'Large', 'LARGE', 'huge', 'Huge',
];

const MATHSTYLE_COMMANDS = [
    'displaystyle', 'textstyle', 'scriptstyle', 'scriptscriptstyle',
]

Parser.prototype.hasImplicitCommand = function(commands) {
    if (this.index < this.tokens.length) {
        const token = this.tokens[this.index]
        if (token.type === 'command') {
            return commands.includes(token.value);
        }
    }
    return false;
}



Parser.prototype.parseRowSeparator = function() {
    if (this.hasRowSeparator()) {
        this.index++;
        return true;
    }
    return false;
}


/**
 * @param {string} type
 * @method Parser#parseToken
 */
Parser.prototype.parseToken = function(type) {
    if (this.hasToken(type)) {
        this.index++;
        return true;
    }
    return false;
}

Parser.prototype.skipUntilToken = function(type) {
    while (!this.end() && !this.parseToken(type)) {
        this.get();
    }
}


Parser.prototype.parseCommand = function(command) {
    if (this.hasCommand(command)) {
        this.index++;
        return true;
    }
    return false;
}

Parser.prototype.parseLiteral = function(literal) {
    if (this.hasLiteral(literal)) {
        this.index++;
        return true;
    }
    return false;
}

Parser.prototype.parseFiller = function() {
    let skipped = false;
    let done = false;
    do {
        const skippedSpace = this.parseToken('space');
        const skippedRelax = this.parseCommand('relax');
        skipped = skipped || skippedSpace || skippedRelax;
        done = !skippedSpace && !skippedRelax;
    } while (!done);

    return skipped;
}

/**
 * Keywords are used to specify dimentions, and for various other 
 * syntactic constructs. Unlike commands, they are not case sensitive.
 * There are 25 keywords:
 * at by bp cc cm dd depth em ex fil fill filll height in minus
 * mm mu pc plus pt sp spread to true width
 * 
 * TeX: 8212
 * @return {boolean} true if the expected keyword is present
 * @method Parser#parseKeyword
 * @private
 */
Parser.prototype.parseKeyword = function(keyword) {
    const savedIndex = this.index;

    let done = this.end();
    let value = '';
    while(!done) {
        const token = this.get();
        if (token.type === 'literal') {
            value += token.value;
        }
        done = this.end() || token.type !== 'literal' || 
            value.length >= keyword.length;
    }

    const hasKeyword = keyword.toUpperCase() === value.toUpperCase();

    if (!hasKeyword) this.index = savedIndex;

    return hasKeyword;
}

/**
 * Return a sequence of characters as a string.
 * i.e. 'abcd' returns 'abcd'.
 * Terminates on the first non-character encountered 
 * e.g. '{', '}' etc...
 * Will also terminate on ']' 
 * @return {string}
 * @method Parser#scanString
 * @private
 */
Parser.prototype.scanString = function() {
    let result = '';
    let done = this.end();
    while(!done) {
        if (this.hasLiteral(']')) {
            done = true;
        } else if (this.hasToken('literal')) {
            result += this.get().value;
            done = this.end();

        } else if (this.parseToken('space')) {
            result += ' ';
            done = this.end();

        } else if (this.hasToken('command')) {
            // TeX will give a 'Missing \endcsname inserted' error
            // if it encounters any command when expecting a string.
            // We're a bit more lax.
            const token = this.get();
            const info = Definitions.getInfo('\\' + token.value, 
                this.parseMode, this.macros);
            // If parseMode is 'math', info.type will never be 'textord'
            // Otherwise, info.type will never be 'mord'
            if (info && (info.type === 'mord' || info.type === 'textord') && info.value) {
                result += info.value;
            }
            done = this.end();

        } else {
            done = true;
        }
    }
    
    return result;
}


/**
 * Return a CSS color (#rrggbb)
 * @method Parser#scanColor
 * @private
 */
Parser.prototype.scanColor = function() {
    return Color.stringToColor(this.scanString());
}

/**
 * Return as a number a group of characters representing a 
 * numerical quantity.
 * 
 * From TeX:8695 (scan_int):
 * An integer number can be preceded by any number of spaces and `\.+' or
 * `\.-' signs. Then comes either a decimal constant (i.e., radix 10), an
 * octal constant (i.e., radix 8, preceded by~\.\'), a hexadecimal constant
 * (radix 16, preceded by~\."), an alphabetic constant (preceded by~\.\`), or
 * an internal variable.
 * @return {number}
 * @method Parser#scanNumber
 * @private
 */
Parser.prototype.scanNumber = function(isInteger) {
    const negative = this.parseLiteral('-');

    // Optional (ignorable) '+' sign
    if (!negative) this.parseLiteral('+');
    this.parseToken('space');

    isInteger = !!isInteger;

    let radix = 10;
    let digits = /[0-9]/;
    if (this.parseLiteral("'")) {
        // Apostrophe indicates an octal value
        radix = 8;
        digits = /[0-7]/;
        isInteger = true;
    } else if (this.parseLiteral('"') || this.parseLiteral('x')) {
        // Double-quote indicates a hex value
        // The 'x' prefix notation for the hexadecimal numbers is a MathJax extension.
        // For example: 'x3a'
        radix = 16;
        // Hex digits have to be upper-case
        digits = /[0-9A-F]/;
        isInteger = true;
    }

    let value = '';
    while (this.hasLiteralPattern(digits)) {
        value += this.get().value;
    }

    // Parse the fractional part, if applicable
    if (!isInteger &&  (this.parseLiteral('.') || this.parseLiteral(','))) {
        value += '.';
        while (this.hasLiteralPattern(digits)) {
            value += this.get().value;
        }
    }

    const result = isInteger ? parseInt(value, radix) : parseFloat(value);
    return negative ? -result : result;
}


/**
 * Return as a floating point number a dimension in pt (1 em = 10 pt)
 * 
 * See TeX:8831
 * @todo: note that some units depend on the font (em, ex). So it might be
 * better to return a dimen struct with the value + unit and resolve
 * later when we have a font context....
 * @return {number}
 * @method Parser#scanDimen
 * @private
 */
Parser.prototype.scanDimen = function() {
    const value = this.scanNumber(false);

    this.parseToken('space');

    let result;

    if (this.parseKeyword('pt')) {
        result = FontMetrics.toEm(value, 'pt');
    } else if (this.parseKeyword('mm')) {
        result = FontMetrics.toEm(value, 'mm');
    } else if (this.parseKeyword('cm')) {
        result = FontMetrics.toEm(value, 'cm');
    } else if (this.parseKeyword('ex')) {
        result = FontMetrics.toEm(value, 'ex');
    } else if (this.parseKeyword('px')) {
        result = FontMetrics.toEm(value, 'px');
    } else if (this.parseKeyword('em')) {
        result = FontMetrics.toEm(value, 'em');
    } else if (this.parseKeyword('bp')) {
        result = FontMetrics.toEm(value, 'bp');
    } else if (this.parseKeyword('dd')) {
        result = FontMetrics.toEm(value, 'dd');
    } else if (this.parseKeyword('pc')) {
        result = FontMetrics.toEm(value, 'pc');
    } else if (this.parseKeyword('in')) {
        result = FontMetrics.toEm(value, 'in');
    } else if (this.parseKeyword('mu')) {
        result = FontMetrics.toEm(value, 'mu');
    } else {
        // If the units are missing, TeX assumes 'pt'
        result = FontMetrics.toEm(value, 'pt');
    }

    return result;
}

Parser.prototype.scanSkip = function() {
    const result = this.scanDimen();
    
    // We parse, but ignore the optional 'plus' and 'minus' 
    // arguments.

    this.parseToken('space');

    // 'plus', optionally followed by 'minus'
    // ('minus' cannot come before 'plus')
    // dimen or 'hfill'
    
    if (this.parseKeyword('plus')) {
        // @todo there could also be a \hFilLlL command here
        this.scanDimen();
    }

    this.parseToken('space');

    if (this.parseKeyword('minus')) {
        // @todo there could also be a \hFilLlL command here
        this.scanDimen();
    }
    
    return result;
}


Parser.prototype.scanColspec = function() {
    this.parseToken('space');
    const result = [];
    while (!this.end() && !(this.hasToken('}') || this.hasLiteral(']'))) {
        if (this.hasLiteral()) {
            const literal = this.get().value;
            if ('lcr'.includes(literal)) {
                result.push({align: literal});
            } else if (literal === '|') {
                result.push({rule: true});
            } else if (literal === '@') {
                if (this.parseToken('{')) {
                    const savedParsemode = this.swapParseMode('math');    
                    result.push({gap: this.scanImplicitGroup(
                        token => token.type === '}')});
                    this.swapParseMode(savedParsemode);
                }
                this.parseToken('}');
            }
        }
    }
    return result;
}

/**
 * Parse a `\(...\)` or `\[...\]` sequence
 * @return {MathAtom} group for the sequence or null
 * @method Parser#scanModeSet
 * @private
 */
Parser.prototype.scanModeSet = function() {
    let final;
    if (this.parseCommand('(')) final = ')';
    if (!final && this.parseCommand('[')) final = ']';
    if (!final) return null;

    const savedParsemode = this.swapParseMode('math');

    const result = new MathAtom('math', 'group');
    result.mathstyle = final === ')' ? 'textstyle' : 'displaystyle';
    result.body = this.scanImplicitGroup(
        token => token.type === 'command' && token.value === final);
    this.parseCommand(final);
   
    this.swapParseMode(savedParsemode);

    if (!result.body || result.body.length === 0) return null;
    return result;
}

/**
 * Parse a `$...$` or `$$...$$` sequence
 * @method Parser#scanModeShift
 * @private
 */
Parser.prototype.scanModeShift = function() {
    if (!this.hasToken('$') && !this.hasToken('$$')) return null;

    const final = this.get().type;

    const result = new MathAtom('math', 'group');
    result.mathstyle = final === '$' ? 'textstyle' : 'displaystyle';
    result.latexOpen = result.mathstyle === 'textstyle' ? '$' : '$$';
    result.latexClose = result.latexOpen;
    const savedParsemode = this.swapParseMode('math');

    result.body = this.scanImplicitGroup(token => token.type === final);

    this.parseToken(final);

    this.swapParseMode(savedParsemode);

    if (!result.body || result.body.length === 0) return null;
    return result;
}




/**
 * Parse a \begin{env}...\end{end} sequence
 * @method Parser#scanEnvironment
 * @private
 */
Parser.prototype.scanEnvironment = function() {
    // An environment starts with a \begin command
    if (!this.parseCommand('begin')) return null;

    // The \begin command is immediately followed by the environment
    // name, as a string argument
    const envName = this.scanArg('string');

    const env = Definitions.getEnvironmentInfo(envName);

    // If the environment has some arguments, parse them
    const args = [];
    if (env && env.params) {
        for (const param of env.params) {
            // Parse an argument
            if (param.optional) {
                // If it's not present, return the default argument value
                const arg = this.scanOptionalArg(param.type);
                // args.push(arg ? arg : param.defaultValue); @todo defaultvalue
                args.push(arg);

            } else {
                // If it's not present, scanArg returns null,
                // but push it on the list of arguments anyway.
                // The null value will be interpreted as unspecified
                // optional value by the command handler.
                args.push(this.scanArg(param.type));
            }
        }    
    }

    // Some environments change the mode
    const savedMode = this.parseMode;
    const savedTabularMode = this.tabularMode;
    const savedMathList = this.swapMathList([]);
    // @todo: since calling scanImplicitGroup(), may not need to save/restore the mathlist

    this.tabularMode = env.tabular;

    const array = [];
    const rowGaps = [];
    let row = [];

    let done = false;
    do {
        done = this.end();
        if (!done && this.parseCommand('end')) {
            done = this.scanArg('string') === envName;
        }
        if (!done) {
            if (this.parseColumnSeparator()) {
                row.push(this.swapMathList([]));

            } else if (this.parseRowSeparator()) {
                row.push(this.swapMathList([]));
                let gap = 0;
                this.parseToken('space');
                if (this.parseLiteral('[')) {
                    gap = this.scanDimen();
                    this.parseToken('space');
                    this.parseLiteral(']');
                }
                rowGaps.push(gap || 0);
                array.push(row);
                row = [];
            } else {
                this.mathList = this.mathList.concat(this.scanImplicitGroup());
            }
        }
    } while (!done);

    row.push(this.swapMathList([]));
    if (row.length > 0) array.push(row);

    const newMathList = this.swapMathList(savedMathList);

    // If we're in tabular mode, we should end up with an empty mathlist
    console.assert(!this.tabularMode || newMathList.length === 0,
        'Leftover atoms in tabular mode');

    this.parseMode = savedMode;
    this.tabularMode = savedTabularMode;

    if (!env.tabular && newMathList.length === 0) return null;
    if (env.tabular && array.length === 0) return null;

    const result = new MathAtom(this.parseMode, 'array', null, null,
        env.parser ? env.parser(envName, args, array) : {});
    result.array = array;
    result.body = newMathList;
    result.rowGaps = rowGaps;
    result.env = env;
    result.env.name = envName;

    return result;
}

/**
 * Parse a sequence terminated with a group end marker, such as 
 * `}`, `\end`, `&`, etc...
 * Returns an array of atoms or an empty array if the sequence
 * terminates right away.
 * @param {function(Token):boolean} [done] A predicate indicating if a token signals the 
 * end of an implicit group
 * @return {MathAtom[]}
 * @method Parser#scanImplicitGroup
 * @private
 */
Parser.prototype.scanImplicitGroup = function(done) {
    // {black\color{red}red\color{green}green}black
    // An implicit group is a sequence of atoms that terminates with 
    // a `'}'`, `'&'`, `'\'`, `'\cr'` or `'\end'` or the end of the stream
    if (!done) {
        done =  token =>
         token.type === '}' || 
        (token.type === 'literal' && token.value === '&') ||
        (token.type === 'command' && (
            token.value === 'end' || 
            token.value === 'cr' || 
            token.value === '\\'
        ));
    }
    // To handle infix operators, we'll keep track of their prefix
    // (tokens coming before them)
    let infix = null;     // A token
    let prefix = null;    // A mathlist

    const savedMathlist = this.swapMathList([]);
    // if (this.index >= this.tokens.length) return true;
    // const token = this.tokens[this.index];
    while(!this.end() && !done(this.peek())) {
        if (this.hasImplicitCommand(SIZING_COMMANDS)) {
            // Implicit sizing command such as \Large, \small
            // affect the tokens following them
            // Note these commands are only appropriate in 'text' mode.
            const atom = new MathAtom(this.parseMode, 'sizing');
            atom.size = {
                'tiny' : 'size1', 
                'scriptsize': 'size2', 
                'footnotesize': 'size3',
                'small' : 'size4', 
                'normalsize': 'size5',
                'large': 'size6', 
                'Large': 'size7', 
                'LARGE': 'size8', 
                'huge': 'size9',
                'Huge': 'size10'
            }[this.get().value];
            this.mathList.push(atom);

        } else if (this.hasImplicitCommand(MATHSTYLE_COMMANDS)) {
            // Implicit math style commands such as \displaystyle, \textstyle...
            // Note these commands switch to math mode and a specific size
            // \textsize is the mathstyle used for inlinemath, not for text
            this.parseMode = 'math';
            const atom = new MathAtom('math', 'mathstyle');
            atom.mathstyle = this.get().value;
            this.mathList.push(atom);

        } else if (this.hasInfixCommand() && !infix) {
            // The next token is an infix and we have not seen one yet
            // (there can be only one infix command per implicit group).
            infix = this.get();
            // Save the math list so far and start a new one
            prefix = this.swapMathList([]);

        } else {
            this.parseAtom();
        }
    }
    

    let result;

    if (infix) {
        const suffix = this.swapMathList(savedMathlist);
        // The current parseMode, this.parseMode, may no longer have the value
        // it had when we encountered the infix. However, since all infix are
        // only defined in 'math' mode, we can use the 'math' constant 
        // for the parseMode
        const info = Definitions.getInfo('\\' + infix.value, 
            'math', this.macros);
        if (info) {
            result =  [new MathAtom(
                this.parseMode, info.type || 'mop', 
                info.value || infix.value, 
                info.fontFamily,
                info.handler ? 
                    info.handler('\\' + infix.value, [prefix, suffix]) :
                    null)];
        } else {
            result =  [new MathAtom(
                this.parseMode, 'mop', infix.value, '', null)];
        }
    } else {
        result = this.swapMathList(savedMathlist);
    }
    return result;
}

/**
 * Parse a group enclosed in a pair of braces: `{...}`.
 * 
 * Return either a group MathAtom or null if not a group.
 * 
 * Return a group MathAtom with an empty body if an empty
 * group (i.e. `{}`).
 * @return {MathAtom} 
 * @method Parser#scanGroup
 * @private
 */
Parser.prototype.scanGroup = function() {
    if (!this.parseToken('{')) return null;

    const result = new MathAtom(this.parseMode, 'group');
    result.body = this.scanImplicitGroup(token => token.type === '}');
    this.parseToken('}');

    return result;
}


Parser.prototype.scanSmartFence = function() {
    this.parseToken('space');
    if (!this.parseLiteral('(')) return null;

    // We've found an open paren... Convert to a `\mleft...\mright`
    const result = new MathAtom(this.parseMode, 'leftright');
    result.leftDelim = '(';
    result.inner = false;   // It's a `\mleft`, not a `\left`

    const savedMathList = this.swapMathList([]);
    let nestLevel = 1;
    while(!this.end() && nestLevel !== 0) {
        if (this.hasLiteral('(')) nestLevel += 1;
        if (this.hasLiteral(')')) nestLevel -= 1;
        if (nestLevel !== 0) this.parseAtom();
    }
    if (nestLevel === 0) this.parseLiteral(')');

    result.rightDelim = nestLevel === 0 ? ')' : '?';
    result.body = this.swapMathList(savedMathList);

    return result;
}


/**
 * Scan a delimiter, e.g. '(', '|', '\vert', '\ulcorner'
 * 
 * @return {string} The delimiter (as a character or command) or null
 * @memberof Parser
 * @method Parser#scanDelim
 * @private
 */
Parser.prototype.scanDelim = function() {
    this.parseToken('space');

    const token = this.get();
    if (!token) return null;

    let delim = '.';
    if (token.type === 'command') {
        delim = '\\' + token.value;
    } else if (token.type === 'literal') {
        delim = token.value;
    }
    const info = Definitions.getInfo(delim, 'math', this.macros);
    if (!info) return null;

    if (info.type === 'mopen' || info.type === 'mclose') {
        return delim;
    }

    // Some symbols are not of type mopen/mclose, but are still 
    // valid delimiters...
    // '?' is a special delimiter used as a 'placeholder'
    // (when the closing delimiter is displayed greyed out)
    if (['?', '|', '<', '>', '\\vert', '\\Vert', '\\|', '\\surd', 
        '\\uparrow', '\\downarrow', '\\Uparrow', '\\Downarrow', 
        '\\updownarrow', '\\Updownarrow', 
        '\\mid', '\\mvert', '\\mVert'].includes(delim)) {
        return delim;
    }

    return null;
}

/**
 * Parse a `/left.../right` sequence.
 * 
 * Note: the `/middle` command can occur multiple times inside a 
 * `/left.../right` sequence, and is handled separately.
 * 
 * Return either an atom of type `'leftright'` or null
 * @return {MathAtom} 
 * @method Parser#scanLeftRight
 * @private
 */
Parser.prototype.scanLeftRight = function() {
    if (this.parseCommand('right') || this.parseCommand('mright')) {
        // We have an unbalanced left/right (there's a \right, but no \left)
        const result = new MathAtom(this.parseMode, 'leftright');
        result.rightDelim = this.scanDelim() || '.';
        return result;
    }

    let close = 'right';
    if (!this.parseCommand('left')) {
        if (!this.parseCommand('mleft')) return null;
        close = 'mright';
    }
    
    const leftDelim = this.scanDelim() || '.';

    const savedMathList = this.swapMathList([]);
    while(!this.end() && !this.parseCommand(close)) {
        this.parseAtom();
    }
    
    // If we've reached the end and there was no `\right` or 
    // there isn't a valid delimiter after `\right`, we'll 
    // consider the `\right` missing and set the `rightDelim` to undefined
    const rightDelim = this.scanDelim();

    const result = new MathAtom(this.parseMode, 'leftright');
    result.leftDelim = leftDelim;
    result.rightDelim = rightDelim;
    result.inner = close === 'right';
    result.body = this.swapMathList(savedMathList);

    return result;
}

/**
 * Parse a subscript/superscript: `^` and `_`.
 * 
 * Modify the last atom accordingly.
 * 
 * @return {MathAtom} 
 * @method Parser#parseSupSub
 * @private
 */
Parser.prototype.parseSupSub = function() {
    // No sup/sub in text or command mode.
    if (this.parseMode !== 'math') return false;

    // Apply the subscript/superscript to the last render atom.
    // If none is present (beginning of the mathlist, i.e. `{^2}`,
    // an empty atom will be created, equivalent to `{{}^2}`
    let result = false;

    while (this.hasToken('^') || this.hasToken('_') || this.hasLiteral("'")) {
        let supsub;
        if (this.hasToken('^')) {
            supsub = 'superscript';
        } else if (this.hasToken('_')) {
            supsub = 'subscript';
        }
        if (this.parseToken('^') || this.parseToken('_')) {
            const arg = this.scanArg();
            if (arg) {
                const atom = this.lastMathAtom();
                atom[supsub] = atom[supsub] || [];
                atom[supsub] = atom[supsub].concat(arg);
                result = true;
            }

        } else if (this.parseLiteral("'")) {
            // A single quote (prime) is actually equivalent to a 
            // '^{\prime}'
            const atom = this.lastMathAtom();
            atom.superscript = atom.superscript || [];
            atom.superscript.push(
                new MathAtom(atom.parseMode, 'mord', '\u2032', 'main')
            );
            result = true;
        }
    }

    return result;
}

/**
 * Parse a `\limits` or `\nolimits` command.
 * 
 * This will change the placement of limits to be either above or below
 * (if `\limits`) or in the superscript/subscript position (if `\nolimits`).
 * 
 * This overrides the calculation made for the placement, which is usually
 * dependent on the displaystyle (`inlinemath` prefers `\nolimits`, while 
 * `displaymath` prefers `\limits`).
 * @method Parser#parseLimits
 * @private
 */
Parser.prototype.parseLimits = function() {
    // Note: technically, \limits and \nolimits are only applicable
    // after an operator. However, we apply them in all cases. They
    // will simply be ignored when not applicable (i.e. on a literal)
    // which is actually consistent with TeX.
    if (this.parseCommand('limits')) {
        const lastAtom = this.lastMathAtom()
        lastAtom.limits = 'limits';
        // Record that the limits was set through an explicit command
        // so we can generate the appropriate LaTeX later
        lastAtom.explicitLimits = true;
        return true;
    }
    if (this.parseCommand('nolimits')) {
        const lastAtom = this.lastMathAtom()
        lastAtom.limits = 'nolimits';
        // Record that the limits was set through an explicit command
        // so we can generate the appropriate LaTeX later
        lastAtom.explicitLimits = true;
        return true;
    }

    return false;
}

Parser.prototype.scanOptionalArg = function(parseMode) {
    parseMode = (!parseMode || parseMode === 'auto') ? this.parseMode : parseMode;
    this.parseToken('space');

    if (!this.parseLiteral('[')) return null;

    const savedParseMode = this.parseMode;
    this.parseMode = parseMode;
    const savedMathlist = this.swapMathList();
    let result;
    while (!this.end() && !this.parseLiteral(']')) {
        if (parseMode === 'string') {
            result = this.scanString();
        } else if (parseMode === 'number') {
            result = this.scanNumber();
        } else if (parseMode === 'dimen') {
            result = this.scanDimen();
        } else if (parseMode === 'skip') {
            result = this.scanSkip();
        } else if (parseMode === 'colspec') {
            result = this.scanColspec();
        } else if (parseMode === 'color') {
            result = this.scanColor() || '#ffffff';
        } else if (parseMode === 'bbox') {
            // The \bbox command takes a very particular argument:
            // a comma delimited list of up to three arguments:
            // a color, a dimension and a string.
            // Split the string by comma delimited sub-strings, ignoring commas
            // that may be inside (). For example"x, rgb(a, b, c)" would return 
            // ['x', 'rgb(a, b, c)']
            const list = this.scanString().toLowerCase().trim().split(/,(?![^(]*\)(?:(?:[^(]*\)){2})*[^"]*$)/);
            for (const elem of list) {
                const color = Color.stringToColor(elem);
                if (color) {
                    result = result || {};
                    result.backgroundcolor = color;
                } else {
                    const m = elem.match(/^\s*([0-9.]+)\s*([a-z][a-z])/);
                    if (m) {
                        result = result || {};
                        result.padding = FontMetrics.toEm(m[1], m[2]);
                    } else {
                        const m = elem.match(/^\s*border\s*:\s*(.*)/);
                        if (m) {
                            result = result || {};
                            result.border = m[1];
                        }
                    }
                }
            }
        } else {
            console.assert(parseMode === 'math', 
                'Unexpected parse mode: "' + parseMode + '"');
            this.mathList = this.mathList.concat(
                this.scanImplicitGroup(token => 
                    token.type === 'literal' && token.value === ']'));
        }
    }
    this.parseMode = savedParseMode;
    const mathList = this.swapMathList(savedMathlist);
    return result ? result : mathList;
}


/**
 * Parse a math field, an argument to a function.
 * 
 * An argument can either be a single atom or 
 * a sequence of atoms enclosed in braces.
 * 
 * @param {string} [parseMode] Temporarily overrides the parser parsemode. For 
 * example: `'dimension'`, `'color'`, `'text'`, etc...
 * @method Parser#scanArg
 * @private
 */
Parser.prototype.scanArg = function(parseMode) {
    parseMode = (!parseMode || parseMode === 'auto') ? this.parseMode : parseMode;
    this.parseFiller();

    let result;

    // An argument (which is called a 'math field' in TeX)
    // could be a single character or symbol, as in `\frac12`
    // Note that ``\frac\sqrt{-1}\alpha\beta`` is equivalent to
    // ``\frac{\sqrt}{-1}{\beta}``
    if (!this.parseToken('{')) {
        if (parseMode === 'delim') {
            return this.scanDelim() || '.';
        } else if (parseMode === 'math') {
            // Parse a single atom.
            const savedParseMode = this.parseMode;
            this.parseMode = 'math';

            const atom = this.scanToken();

            this.parseMode = savedParseMode;

            if (Array.isArray(atom)) return atom;
            return atom ? [atom] : null;
        }
    }

    // If this is a param token, substitute it with the
    // (optional) argument passed to the parser
    if (this.hasToken('#')) {
        const paramToken = this.get();
        this.skipUntilToken('}');
        if (paramToken.value === '?') {
            return this.placeholder();
        }
        if (this.args) {
            if (this.args[paramToken.value] === undefined && 
                this.args['?'] !== undefined) {
                return this.placeholder();
            }
            return this.args[paramToken.value] || null;
        }
        return null;
    }

    const savedParseMode = this.parseMode;
    this.parseMode = parseMode;
    const savedMathList = this.swapMathList([]);

    
    if (parseMode === 'string') {
        result = this.scanString();
        this.skipUntilToken('}');

    } else if (parseMode === 'number') {
        result = this.scanNumber();
        this.skipUntilToken('}');

    } else if (parseMode === 'dimen') {
        result = this.scanDimen();
        this.skipUntilToken('}');

    } else if (parseMode === 'skip') {
        result = this.scanSkip();
        this.skipUntilToken('}');

    } else if (parseMode === 'colspec') {
        result = this.scanColspec();
        this.skipUntilToken('}');

    } else if (parseMode === 'color') {
        result = this.scanColor() || '#ffffff';
        this.skipUntilToken('}');

    } else if (parseMode === 'delim') {
        result = this.scanDelim() || '.';
        this.skipUntilToken('}');

    } else {
        console.assert(parseMode === 'math' || parseMode === 'text', 
            'Unexpected parse mode: "' + parseMode + '"');

        do {
            this.mathList = this.mathList.concat(this.scanImplicitGroup());
        } while(!this.parseToken('}') && !this.end());
    }

    this.parseMode = savedParseMode;
    const mathList = this.swapMathList(savedMathList);
    return result ? result : mathList;
}


/**
 * @return {Array.MathAtom|MathAtom}
 * @method Parser#scanToken
 * @private
 */
Parser.prototype.scanToken = function() {
    const token = this.get();
    if (!token) return null;

    let result = null;

    if (token.type === 'space') {
        if (this.parseMode === 'text') {
            result = new MathAtom('text', 'textord', ' ');
        }
    } else if (token.type === 'esc') {
        // RENDER ESCAPE SEQUENCE INDICATOR
        result = new MathAtom(this.parseMode, 'esc', 'ESC');

    } else if (token.type === 'backslash') {
        // RENDER BACKSLASH INDICATOR
        result = new MathAtom(this.parseMode, 'command', '\\');

    } else if (token.type === 'commandliteral' || token.type === 'backslash') {
        // RENDER ESCAPE SEQUENCE
        let body = token.value;
        while (this.hasToken('commandliteral') || 
                this.hasToken('backslash')) {
            body += this.get().value;
        }

        result = new MathAtom(this.parseMode, 'command', body);

    } else if (token.type === 'placeholder') {
        // RENDER PLACEHOLDER
        result = new MathAtom(this.parseMode, 'placeholder', token.value);

    } else if (token.type === 'command') {
        // RENDER COMMAND
        if (token.value === 'placeholder') {
            result = new MathAtom(this.parseMode, 'placeholder', this.scanArg('string'));
            
        } else if (token.value === 'char') {
            // \char has a special syntax and requires a non-braced integer 
            // argument
            let codepoint = Math.floor(this.scanNumber(true));
            if (!isFinite(codepoint) || codepoint < 0 || codepoint > 0x10FFFF) {
                codepoint = 0x2753; // BLACK QUESTION MARK
            }
            result = new MathAtom(this.parseMode,
                this.parseMode === 'math' ? 'mord' : 'textord', 
                String.fromCodePoint(codepoint), 
                'main');
            result.latex = '{\\char"' + 
                ('000000' + codepoint.toString(16)).toUpperCase().substr(-6) + '}';

        } else if (token.value === 'hskip' || token.value === 'kern') {
            // \hskip and \kern have a special syntax and requires a non-braced 
            // 'skip' argument
            const width = this.scanSkip();
            if (!isNaN(width)) {
                result = new MathAtom(this.parseMode, 'spacing');
                result.width = width;
            }

        } else {
            result = this.scanMacro(token.value);

            if (!result) {
                const info = Definitions.getInfo('\\' + token.value, 
                    this.parseMode, this.macros);
                const args = [];

                // Parse the arguments
                if (info && info.params) {
                    for (const param of info.params) {
                        // Parse an argument
                        if (param.optional) {
                            // If it's not present, return the default argument value
                            const arg = this.scanOptionalArg(param.type);
                    // args.push(arg ? arg : param.defaultValue); @todo defaultvalue
                            args.push(arg);

                        } else {
                            // If it's not present, scanArg returns null.
                            // Add a placeholder instead.
                            const arg = this.scanArg(param.type);
                            if (arg && arg.length === 1 && 
                                arg[0].type === 'placeholder' && param.placeholder) {
                                arg[0].value = param.placeholder;
                            }
                            if (arg) {
                                args.push(arg);
                            } else if (param.placeholder) {
                                args.push([new MathAtom(this.parseMode, 'placeholder', param.placeholder)]);
                            } else {
                                args.push(this.placeholder());
                            }
                        }
                    }
            }

                if (info && !info.infix) {
                    // Infix commands should be handled in scanImplicitGroup
                    // If we find an infix command here, it's a syntax error
                    // (second infix command in an implicit group) and should be ignored.

                    // Create the MathAtom
                    // If a handler is present, invoke it with the arguments, 
                    // and pass the result to be appended by the constructor.
                    if (info.handler) {
                        result =  new MathAtom(
                                this.parseMode, info.type, 
                                null, 
                                info.fontFamily,
                                info.handler('\\' + token.value, args));
                    } else {
                        result =  new MathAtom(
                                this.parseMode, info.type || 'mop', 
                                info.value || token.value, 
                                info.fontFamily);
                    }
                    result.latex = '\\' + token.value + ' ';
                    if (result.isFunction && this.smartFence) {
                        // The atom was a function that may be followed by 
                        // an argument, like `\sin(`
                        const smartFence = this.scanSmartFence();
                        if (smartFence) {
                            result = [result, smartFence];
                        }
                    }
                }
            }
        }

    } else if (token.type === 'literal') {
        const info = Definitions.getInfo(token.value, this.parseMode, this.macros);
        if (info) {
            result = new MathAtom(this.parseMode,  info.type, 
                info.value || token.value, info.fontFamily);
            result.isFunction = info.isFunction;
        } else {
            // console.warn('Unknown literal "' + token.value + 
            //     '" (U+' + ('000000' + token.value.charCodeAt(0).toString(16)).substr(-6) + ')');
            result = new MathAtom(this.parseMode, 
                this.parseMode === 'math' ? 'mord' : 'textord', 
                token.value, 'main');
        }
        result.latex = Definitions.matchCodepoint(token.value);

        if (info && info.isFunction && this.smartFence) {
            // The atom was a function that may be followed by 
            // an argument, like `f(`.
            const smartFence = this.scanSmartFence();
            if (smartFence) {
                result = [result, smartFence];
            }
        }

    } else if (token.type === '#') {
        // Parameter token in an implicit group (not as a parameter)
        if (token.value === '?') {
            // '#?' indicates that a placeholder should be used
            result = this.placeholder();
        } else if (this.args) {
            result = this.args[token.value] || null;
            if (Array.isArray(result) && result.length === 1) {
                result = result[0];
            } else if (Array.isArray(result)) {
                const group = new MathAtom(this.parseMode, 'group');
                group.body = result;
                result = group;
            } else {
                // If there is no argument value specified, use a placeholder
                result = this.placeholder();
            }
        }
    } else {
        console.warn('Unexpected token type "' + token.type + '"');
    }
    return result;
}

/**
 * Attempt to scan the macro name and return an atom list if successful.
 * Otherwise, it wasn't a macro.
 */
Parser.prototype.scanMacro = function(macro) {
    if (!this.macros || !this.macros[macro]) return null;

    const args = {};
    let def;
    let argCount = 0;
    if (typeof this.macros[macro] === 'string') {
        def = this.macros[macro];
        // Let's see if there are arguments in the definition.
        if (/(^|[^\\])#1/.test(def)) argCount = 1;
        if (/(^|[^\\])#2/.test(def)) argCount = 2;
        if (/(^|[^\\])#3/.test(def)) argCount = 3;
        if (/(^|[^\\])#4/.test(def)) argCount = 4;
        if (/(^|[^\\])#5/.test(def)) argCount = 5;
        if (/(^|[^\\])#6/.test(def)) argCount = 6;
        if (/(^|[^\\])#7/.test(def)) argCount = 7;
        if (/(^|[^\\])#8/.test(def)) argCount = 8;
        if (/(^|[^\\])#9/.test(def)) argCount = 9;
    } else {
        def = this.macros[macro].def;
        argCount = (this.macros[macro].args || 0);
    }
    for (let i = 1; i <= argCount; i++) {
        args[i] = this.scanArg();
    }

    // Carry forward the placeholder argument, if any.
    if (this.args && typeof this.args['?'] === 'string') {
        args['?'] = this.args['?'];
    }

    // Group the result of the macro expansion, and set the 
    // captureSelection attribute so that it is handled as an unbreakable
    // unit
    const atom = new MathAtom(this.parseMode, 'group', 
        parseTokens(Lexer.tokenize(def), this.parseMode, args, this.macros));
    atom.captureSelection = true;
    atom.latex = '\\' + macro;

    let argString = '';
    for (let i = 1; i <= argCount; i++) {
        argString += '{';
        if (Array.isArray(args[i])) {
            for (let j = 0; j < args[i].length; j++) {
                argString += args[i][j].latex;
            }
        }
        argString += '}';
    }

    atom.latex += argString ? argString : ' ';

    return atom;
}

/**
 * Make a MathAtom for the current token or token group and 
 * add it to the parser's current mathList
 * @method Parser#parseAtom
 * @private
 */
Parser.prototype.parseAtom = function() {
    let result = this.scanEnvironment() ||
         this.scanModeShift() ||
         this.scanModeSet() ||
         this.scanGroup() || 
         this.scanLeftRight();

    if (!result && (this.parseSupSub() || this.parseLimits())) return true;

    if (!result) result = this.scanToken();

    // If we have an atom to add, push it at the end of the current math list
    // We could have no atom for tokens that were skipped, a ' ' in mathmode
    // for example
    if (Array.isArray(result)) {
        this.mathList = this.mathList.concat(result);
    } else if (result) {
        this.mathList.push(result);
    }
    
    return result !== null;
}


/**
 * Given an array of tokens returned by the lexer, return a corresponding 
 * math list (array of atoms).
 * @param {Array.<Token>} tokens 
 * @param {string} [parseMode='math']
 * @param {Array.<string>} [args={}] - If there are any placeholder tokens, e.g. 
 * `#0`, `#1`, etc... they will be replaced by the value provided by `args`.
 * @param {*} [macro={}] Dictionary defining macros
 * @param {boolean} [smartFence=false] If true, promote plain fences, e.g. `(`,
 * as `\left...\right` or `\mleft...\mright`
 * @return {Array.<MathAtom>}
 * @private
 */
function parseTokens(tokens, parseMode, args, macros, smartFence) {
    let mathlist = [];
    const parser = new Parser(tokens, args, macros);
    parser.parseMode = parseMode || 'math';  // other possible values: 'text', 'color', etc...
    if (smartFence) parser.smartFence = true;

    while(!parser.end()) {
        mathlist = mathlist.concat(parser.scanImplicitGroup());
    }
    return mathlist;
}

// Export the public interface for this module
return { 
    Parser: Parser,
    parseTokens: parseTokens
}


})