import {
  FX_PREFIX,
  NEWLINE,
  NUMBER,
  OPERATOR,
  REF_NAMED,
  UNKNOWN,
  WHITESPACE,
  FUNCTION,
  OPERATOR_TRIM,
  REF_RANGE,
  REF_BEAM
} from './constants.ts';
import { mergeRefTokens } from './mergeRefTokens.ts';
import { lexers, type PartLexer } from './lexers/sets.ts';
import type { Token } from './types.ts';
import { isRCTokenValue } from './isRCTokenValue.ts';

const reLetLambda = /^l(?:ambda|et)$/i;
const isType = (t: Token, type: string) => t && t.type === type;
const isTextTokenType = (tokenType: string) => tokenType === REF_NAMED || tokenType === FUNCTION;

const causesBinaryMinus = (token: Token) => {
  return !isType(token, OPERATOR) || (
    token.value === '%' ||
    token.value === '}' ||
    token.value === ')' ||
    token.value === '#'
  );
};

function fixRCNames (tokens: Token[], r1c1Mode?: boolean): Token[] {
  let withinCall = 0;
  let parenDepth = 0;
  let lastToken: Token;
  for (const token of tokens) {
    const tokenType = token.type;
    if (tokenType === OPERATOR) {
      if (token.value === '(') {
        parenDepth++;
        if (lastToken.type === FUNCTION) {
          if (reLetLambda.test(lastToken.value)) {
            withinCall = parenDepth;
          }
        }
      }
      else if (token.value === ')') {
        parenDepth--;
        if (parenDepth < withinCall) {
          withinCall = 0;
        }
      }
    }
    else if (withinCall && tokenType === UNKNOWN && isRCTokenValue(token.value)) {
      token.type = REF_NAMED;
    }
    else if (withinCall && r1c1Mode && tokenType === REF_BEAM && isRCTokenValue(token.value)) {
      token.type = REF_NAMED;
    }
    lastToken = token;
  }
  return tokens;
}

type OptsGetTokens = {
  withLocation?: boolean,
  mergeRefs?: boolean,
  negativeNumbers?: boolean
  allowTernary?: boolean
  r1c1?: boolean
  xlsx?: boolean
};

export function getTokens (fx: string, tokenHandlers: PartLexer[], options: OptsGetTokens = {}) {
  const {
    withLocation = false,
    mergeRefs = true,
    negativeNumbers = true
  } = options;
  const opts = {
    withLocation: withLocation,
    mergeRefs: mergeRefs,
    allowTernary: options.allowTernary ?? false,
    negativeNumbers: negativeNumbers,
    r1c1: options.r1c1 ?? false,
    xlsx: options.xlsx ?? false
  };

  const tokens = [];
  let pos = 0;
  let letOrLambda = 0;
  let unknownRC = 0;
  const trimOps = [];

  let tail0: Token; // last non-whitespace token
  let tail1: Token; // penultimate non-whitespace token
  let lastToken: Token; // last token
  const pushToken = (token: Token) => {
    let tokenType = token.type;
    const isCurrUnknown = tokenType === UNKNOWN;
    const isLastUnknown = lastToken && lastToken.type === UNKNOWN;
    if (lastToken && (
      (isCurrUnknown && isLastUnknown) ||
      (isCurrUnknown && isTextTokenType(lastToken.type)) ||
      (isLastUnknown && isTextTokenType(tokenType))
    )) {
      // UNKNOWN tokens "contaminate" sibling text tokens
      lastToken.value += token.value;
      lastToken.type = UNKNOWN;
      if (withLocation) {
        lastToken.loc[1] = token.loc[1];
      }
    }
    else {
      if (tokenType === OPERATOR_TRIM) {
        trimOps.push(tokens.length);
        tokenType = UNKNOWN;
        token.type = UNKNOWN;
      }
      // push token as normally
      tokens[tokens.length] = token;
      lastToken = token;
      if (tokenType !== WHITESPACE && tokenType !== NEWLINE) {
        tail1 = tail0;
        tail0 = token;
      }
    }
  };

  if (fx.startsWith('=')) {
    const token: Token = { type: FX_PREFIX, value: '=' };
    if (withLocation) {
      token.loc = [ 0, 1 ];
    }
    pos++;
    pushToken(token);
  }

  const numHandlers = tokenHandlers.length;
  while (pos < fx.length) {
    const startPos = pos;
    let token;
    for (let i = 0; i < numHandlers; i++) {
      token = tokenHandlers[i](fx, pos, opts);
      if (token) {
        pos += token.value.length;
        break;
      }
    }

    if (!token) {
      token = {
        type: UNKNOWN,
        value: fx[pos]
      };
      pos++;
    }
    if (withLocation) {
      token.loc = [ startPos, pos ];
    }

    // make a note if we found a let/lambda call
    if (lastToken && token.value === '(' && lastToken.type === FUNCTION) {
      if (reLetLambda.test(lastToken.value)) {
        letOrLambda++;
      }
    }
    // Make a note if we found a R or C unknown or REF_BEAM token in R1C1 mode.
    // It seemse unlikely that anyone does `F2 = LET(c,1,c+F:F)` as this is a
    // circular reference (and not a very useful one), so we're assuming that
    // all "c" or "r" tokens found within the LET are names.
    if (token.value.length === 1 && (token.type === UNKNOWN || (opts.r1c1 && token.type === REF_BEAM))) {
      unknownRC += isRCTokenValue(token.value) ? 1 : 0;
    }

    if (negativeNumbers && token.type === NUMBER) {
      const last1 = lastToken;
      // do we have a number preceded by a minus?
      if (last1?.type === OPERATOR && last1.value === '-') {
        // missing tail1 means we are at the start of the stream
        if (
          !tail1 ||
          tail1.type === FX_PREFIX ||
          !causesBinaryMinus(tail1)
        ) {
          const minus = tokens.pop();
          token.value = '-' + token.value;
          if (token.loc) {
            // ensure offsets are up to date
            token.loc[0] = minus.loc[0];
          }
          // next step tries to counter the screwing around with the tailing
          // it should be correct again once we pushToken()
          tail0 = tail1;
          lastToken = tokens[tokens.length - 1];
        }
      }
    }

    pushToken(token);
  }

  // if we encountered both a LAMBDA/LET call, and unknown 'r' or 'c' tokens
  // we'll turn the unknown tokens into names within the call.
  if (unknownRC && letOrLambda) {
    fixRCNames(tokens, opts.r1c1);
  }

  // Any OPERATOR_TRIM tokens have been indexed already, they now need to be
  // either turned into OPERATORs or UNKNOWNs. Trim operators are only allowed
  // between two REF_RANGE tokens as they are not valid in expressions as full
  // operators.
  for (const index of trimOps) {
    const before = tokens[index - 1];
    const after = tokens[index + 1];
    tokens[index].type = (before?.type === REF_RANGE && after?.type === REF_RANGE)
      ? OPERATOR
      : UNKNOWN;
  }

  if (mergeRefs) {
    return mergeRefTokens(tokens);
  }

  return tokens;
}

/**
 * Options for {@link tokenize}.
 */
export type OptsTokenize = {
  /**
   * Nodes will include source position offsets to the tokens: `{ loc: [ start, end ] }`
   * @defaultValue true
   */
  withLocation?: boolean,
  /**
   * Should ranges be returned as whole references (`Sheet1!A1:B2`) or as separate tokens for each
   * part: (`Sheet1`,`!`,`A1`,`:`,`B2`). This is the same as calling [`mergeRefTokens`](#mergeRefTokens)
   * @defaultValue true
   */
  mergeRefs?: boolean,
  /**
   * Merges unary minuses with their immediately following number tokens (`-`,`1`) => `-1`
   * (alternatively these will be unary operations in the tree).
   * @defaultValue true
   */
  negativeNumbers?: boolean
  /**
   * Enables the recognition of ternary ranges in the style of `A1:A` or `A1:1`. These are supported
   * by Google Sheets but not Excel. See: [References.md](./References.md).
   * @defaultValue false
   */
  allowTernary?: boolean
  /**
   * Ranges are expected to be in the R1C1 style format rather than the more popular A1 style.
   * @defaultValue false
   */
  r1c1?: boolean
};

/**
 * Breaks a string formula into a list of tokens.
 *
 * The returned output will be an array of objects representing the tokens:
 *
 * ```js
 * [
 *   { type: FX_PREFIX, value: '=' },
 *   { type: FUNCTION, value: 'SUM' },
 *   { type: OPERATOR, value: '(' },
 *   { type: REF_RANGE, value: 'A1:B2' },
 *   { type: OPERATOR, value: ')' }
 * ]
 * ```
 *
 * A collection of token types may be found as an object as the {@link tokenTypes}
 * export on the package.
 *
 * _Warning:_ To support syntax highlighting as you type, `STRING` tokens are allowed to be
 * "unterminated". For example, the incomplete formula `="Hello world` would be
 * tokenized as:
 *
 * ```js
 * [
 *   { type: FX_PREFIX, value: '=' },
 *   { type: STRING, value: '"Hello world', unterminated: true },
 * ]
 * ```
 *
 * Parsers will need to handle this.
 *
 * @see {@link OptsTokenize}
 * @see {@link tokenTypes}
 * @param formula An Excel formula string (an Excel expression).
 * @param [options]  Options
 * @returns An array of Tokens
 */
export function tokenize (
  formula: string,
  options: OptsTokenize = {}
): Token[] {
  return getTokens(formula, lexers, options);
}

/**
 * Breaks a string formula into a list of tokens.
 *
 * The returned output will be an array of objects representing the tokens:
 *
 * ```js
 * [
 *   { type: FX_PREFIX, value: '=' },
 *   { type: FUNCTION, value: 'SUM' },
 *   { type: OPERATOR, value: '(' },
 *   { type: REF_RANGE, value: 'A1:B2' },
 *   { type: OPERATOR, value: ')' }
 * ]
 * ```
 *
 * A collection of token types may be found as an object as the {@link tokenTypes}
 * export on the package.
 *
 * _Warning:_ To support syntax highlighting as you type, `STRING` tokens are allowed to be
 * "unterminated". For example, the incomplete formula `="Hello world` would be
 * tokenized as:
 *
 * ```js
 * [
 *   { type: FX_PREFIX, value: '=' },
 *   { type: STRING, value: '"Hello world', unterminated: true },
 * ]
 * ```
 *
 * @see {@link OptsTokenize}
 * @see {@link tokenTypes}
 * @param formula An Excel formula string (an Excel expression).
 * @param [options]  Options
 * @returns An array of Tokens
 */
export function tokenizeXlsx (
  formula: string,
  options: OptsTokenize = {}
): Token[] {
  const opts = {
    withLocation: options.withLocation ?? false,
    mergeRefs: options.mergeRefs ?? true,
    allowTernary: options.allowTernary ?? false,
    negativeNumbers: options.negativeNumbers ?? true,
    r1c1: options.r1c1 ?? false,
    xlsx: true
  };
  return getTokens(formula, lexers, opts);
}
