// Copyright 2016 The Chromium Authors
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.

import * as Platform from '../../core/platform/platform.js';

import {CSSFormatter} from './CSSFormatter.js';
import type {FormattedContentBuilder} from './FormattedContentBuilder.js';
import {AbortTokenization, createTokenizer} from './FormatterWorker.js';
import {JavaScriptFormatter} from './JavaScriptFormatter.js';
import {JSONFormatter} from './JSONFormatter.js';

export class HTMLFormatter {
  readonly #builder: FormattedContentBuilder;
  readonly #jsFormatter: JavaScriptFormatter;
  readonly #jsonFormatter: JSONFormatter;
  readonly #cssFormatter: CSSFormatter;
  #text?: string;
  #lineEndings?: number[];
  #model?: HTMLModel;

  constructor(builder: FormattedContentBuilder) {
    this.#builder = builder;
    this.#jsFormatter = new JavaScriptFormatter(builder);
    this.#jsonFormatter = new JSONFormatter(builder);
    this.#cssFormatter = new CSSFormatter(builder);
  }

  format(text: string, lineEndings: number[]): void {
    this.#text = text;
    this.#lineEndings = lineEndings;
    this.#model = new HTMLModel(text);
    this.#walk(this.#model.document());
  }

  #formatTokensTill(element: FormatterElement, offset: number): void {
    if (!this.#model) {
      return;
    }

    let nextToken = this.#model.peekToken();
    while (nextToken && nextToken.startOffset < offset) {
      const token = (this.#model.nextToken() as Token);
      this.#formatToken(element, token);
      nextToken = this.#model.peekToken();
    }
  }

  #walk(element: FormatterElement): void {
    if (!element.openTag || !element.closeTag) {
      throw new Error('Element is missing open or close tag');
    }

    if (element.parent) {
      this.#formatTokensTill(element.parent, element.openTag.startOffset);
    }
    this.#beforeOpenTag(element);
    this.#formatTokensTill(element, element.openTag.endOffset);
    this.#afterOpenTag(element);
    for (let i = 0; i < element.children.length; ++i) {
      this.#walk(element.children[i]);
    }

    this.#formatTokensTill(element, element.closeTag.startOffset);
    this.#beforeCloseTag(element);
    this.#formatTokensTill(element, element.closeTag.endOffset);
    this.#afterCloseTag(element);
  }

  #beforeOpenTag(element: FormatterElement): void {
    if (!this.#model) {
      return;
    }

    if (!element.children.length || element === this.#model.document()) {
      return;
    }
    this.#builder.addNewLine();
  }

  #afterOpenTag(element: FormatterElement): void {
    if (!this.#model) {
      return;
    }

    if (!element.children.length || element === this.#model.document()) {
      return;
    }
    this.#builder.increaseNestingLevel();
    this.#builder.addNewLine();
  }

  #beforeCloseTag(element: FormatterElement): void {
    if (!this.#model) {
      return;
    }

    if (!element.children.length || element === this.#model.document()) {
      return;
    }
    this.#builder.decreaseNestingLevel();
    this.#builder.addNewLine();
  }

  #afterCloseTag(_element: FormatterElement): void {
    this.#builder.addNewLine();
  }

  #formatToken(element: FormatterElement, token: Token): void {
    if (Platform.StringUtilities.isWhitespace(token.value)) {
      return;
    }
    if (hasTokenInSet(token.type, 'comment') || hasTokenInSet(token.type, 'meta')) {
      this.#builder.addNewLine();
      this.#builder.addToken(token.value.trim(), token.startOffset);
      this.#builder.addNewLine();
      return;
    }

    if (!element.openTag || !element.closeTag) {
      return;
    }

    const isBodyToken =
        element.openTag.endOffset <= token.startOffset && token.startOffset < element.closeTag.startOffset;
    if (isBodyToken && element.name === 'style') {
      this.#builder.addNewLine();
      this.#builder.increaseNestingLevel();
      this.#cssFormatter.format(this.#text || '', this.#lineEndings || [], token.startOffset, token.endOffset);
      this.#builder.decreaseNestingLevel();
      return;
    }
    if (isBodyToken && element.name === 'script') {
      this.#builder.addNewLine();
      this.#builder.increaseNestingLevel();
      if (scriptTagIsJavaScript(element)) {
        this.#jsFormatter.format(this.#text || '', this.#lineEndings || [], token.startOffset, token.endOffset);
      } else if (scriptTagIsJSON(element)) {
        this.#jsonFormatter.format(this.#text || '', this.#lineEndings || [], token.startOffset, token.endOffset);
      } else {
        this.#builder.addToken(token.value, token.startOffset);
        this.#builder.addNewLine();
      }
      this.#builder.decreaseNestingLevel();
      return;
    }

    if (!isBodyToken && hasTokenInSet(token.type, 'attribute')) {
      this.#builder.addSoftSpace();
    }

    this.#builder.addToken(token.value, token.startOffset);
  }
}

function scriptTagIsJavaScript(element: FormatterElement): boolean {
  if (!element.openTag) {
    return true;
  }

  if (!element.openTag.attributes.has('type')) {
    return true;
  }

  let type = element.openTag.attributes.get('type');
  if (!type) {
    return true;
  }

  type = type.toLowerCase();
  const isWrappedInQuotes = /^(["\'])(.*)\1$/.exec(type.trim());
  if (isWrappedInQuotes) {
    type = isWrappedInQuotes[2];
  }
  return [
    'application/ecmascript',
    'application/javascript',
    'application/x-ecmascript',
    'application/x-javascript',
    'module',
    'text/ecmascript',
    'text/javascript',
    'text/javascript1.0',
    'text/javascript1.1',
    'text/javascript1.2',
    'text/javascript1.3',
    'text/javascript1.4',
    'text/javascript1.5',
    'text/jscript',
    'text/livescript',
    'text/x-ecmascript',
    'text/x-javascript',
  ].includes(type.trim());
}

function scriptTagIsJSON(element: FormatterElement): boolean {
  if (!element.openTag) {
    return false;
  }

  let type = element.openTag.attributes.get('type');
  if (!type) {
    return false;
  }

  type = type.toLowerCase();
  const isWrappedInQuotes = /^(["\'])(.*)\1$/.exec(type.trim());
  if (isWrappedInQuotes) {
    type = isWrappedInQuotes[2];
  }
  const isSubtype = /^application\/\w+\+json$/.exec(type.trim());
  if (isSubtype) {
    type = 'application/json';
  }
  return [
    'application/json',
    'importmap',
    'speculationrules',
  ].includes(type.trim());
}

function hasTokenInSet(tokenTypes: Set<string>, type: string): boolean {
  // We prefix the CodeMirror HTML tokenizer with the xml- prefix
  // in a full version. When running in a worker context, this
  // prefix is not appended, as the global is only overridden
  // in CodeMirrorTextEditor.js.
  return tokenTypes.has(type) || tokenTypes.has(`xml-${type}`);
}

export class HTMLModel {
  #state: ParseState = ParseState.INITIAL;
  readonly #document: FormatterElement;
  #stack: FormatterElement[];
  readonly #tokens: Token[] = [];
  #tokenIndex = 0;
  #attributes = new Map<string, string>();
  #attributeName = '';
  #tagName = '';
  #isOpenTag = false;
  #tagStartOffset?: number|null;
  #tagEndOffset?: number|null;

  constructor(text: string) {
    this.#document = new FormatterElement('document');
    this.#document.openTag = new Tag('document', 0, 0, new Map(), true, false);
    this.#document.closeTag = new Tag('document', text.length, text.length, new Map(), false, false);

    this.#stack = [this.#document];

    this.#build(text);
  }

  #build(text: string): void {
    const tokenizer = createTokenizer('text/html');
    let baseOffset = 0, lastOffset = 0;
    let pendingToken: Token|null = null;

    const pushToken = (token: Token): Object|undefined => {
      this.#tokens.push(token);
      this.#updateDOM(token);

      const element = this.#stack[this.#stack.length - 1];
      if (element && (element.name === 'script' || element.name === 'style') &&
          element.openTag?.endOffset === lastOffset) {
        return AbortTokenization;
      }

      return;
    };

    const processToken = (
        tokenValue: string,
        type: string|null,
        tokenStart: number,
        tokenEnd: number,
        ): Object|undefined => {
      tokenStart += baseOffset;
      tokenEnd += baseOffset;
      lastOffset = tokenEnd;

      const tokenType = type ? new Set<string>(type.split(' ')) : new Set<string>();
      const token = new Token(tokenValue, tokenType, tokenStart, tokenEnd);

      // This is a pretty horrible work-around for two bugs in the CodeMirror 5 HTML
      // tokenizer, which aren't easy to fix because it shares this code with the
      // XML parser[^1], and which is also not actively maintained anymore. The
      // real fix here is to migrate off of CodeMirror 5 also for formatting and
      // pretty printing and use CodeMirror 6 instead, but that's a bigger
      // project.
      //
      // For now we ducktape the first problem by merging a '/' token
      // following a string token in the HTML formatter, which does the trick, and
      // also merging the error tokens for unescaped ampersands with text tokens
      // (where `type` is `null`) preceeding and following the error tokens.
      //
      // [^1]: https://github.com/codemirror/codemirror5/blob/742627a/mode/xml/xml.js#L137
      //
      if (pendingToken) {
        if (tokenValue === '/' && type === 'attribute' && pendingToken.type.has('string')) {
          token.startOffset = pendingToken.startOffset;
          token.value = `${pendingToken.value}${tokenValue}`;
          token.type = pendingToken.type;
        } else if (
            (tokenValue.startsWith('&') && type === 'error' && pendingToken.type.size === 0) ||
            (type === null && pendingToken.type.has('error'))) {
          pendingToken.endOffset = token.endOffset;
          pendingToken.value += tokenValue;
          pendingToken.type = token.type;
          return;
        } else if (pushToken(pendingToken) === AbortTokenization) {
          return AbortTokenization;
        }
        pendingToken = null;
      }
      if (type === 'string' || type === null) {
        pendingToken = token;
        return;
      }

      return pushToken(token);
    };

    while (true) {
      baseOffset = lastOffset;
      tokenizer(text.substring(lastOffset), processToken);
      if (pendingToken) {
        pushToken(pendingToken);
        pendingToken = null;
      }
      if (lastOffset >= text.length) {
        break;
      }
      const element = this.#stack[this.#stack.length - 1];
      if (!element) {
        break;
      }

      while (true) {
        lastOffset = text.indexOf('</', lastOffset);
        if (lastOffset === -1) {
          lastOffset = text.length;
          break;
        }
        if (text.substring(lastOffset + 2).toLowerCase().startsWith(element.name)) {
          break;
        }
        lastOffset += 2;
      }

      if (!element.openTag) {
        break;
      }

      const tokenStart = element.openTag.endOffset;
      const tokenEnd = lastOffset;
      const tokenValue = text.substring(tokenStart, tokenEnd);
      this.#tokens.push(new Token(tokenValue, new Set(), tokenStart, tokenEnd));
    }

    while (this.#stack.length > 1) {
      const element = this.#stack[this.#stack.length - 1];
      if (!element) {
        break;
      }

      this.#popElement(new Tag(element.name, text.length, text.length, new Map(), false, false));
    }
  }

  #updateDOM(token: Token): void {
    const value = token.value;
    const type = token.type;
    switch (this.#state) {
      case ParseState.INITIAL:
        if (hasTokenInSet(type, 'bracket') && (value === '<' || value === '</')) {
          this.#onStartTag(token);
          this.#state = ParseState.TAG;
        }
        return;
      case ParseState.TAG:
        if (hasTokenInSet(type, 'tag') && !hasTokenInSet(type, 'bracket')) {
          this.#tagName = value.trim().toLowerCase();
        } else if (hasTokenInSet(type, 'attribute')) {
          this.#attributeName = value.trim().toLowerCase();
          this.#attributes.set(this.#attributeName, '');
          this.#state = ParseState.ATTRIBUTE_NAME;
        } else if (hasTokenInSet(type, 'bracket') && (value === '>' || value === '/>')) {
          this.#onEndTag(token);
          this.#state = ParseState.INITIAL;
        }
        return;
      case ParseState.ATTRIBUTE_NAME:
        if (!type.size && value === '=') {
          this.#state = ParseState.ATTRIBUTE_VALUE;
        } else if (hasTokenInSet(type, 'bracket') && (value === '>' || value === '/>')) {
          this.#onEndTag(token);
          this.#state = ParseState.INITIAL;
        }
        return;
      case ParseState.ATTRIBUTE_VALUE:
        if (hasTokenInSet(type, 'string')) {
          this.#attributes.set(this.#attributeName, value);
          this.#state = ParseState.TAG;
        } else if (hasTokenInSet(type, 'bracket') && (value === '>' || value === '/>')) {
          this.#onEndTag(token);
          this.#state = ParseState.INITIAL;
        }
        return;
    }
  }

  #onStartTag(token: Token): void {
    this.#tagName = '';
    this.#tagStartOffset = token.startOffset;
    this.#tagEndOffset = null;
    this.#attributes = new Map();
    this.#attributeName = '';
    this.#isOpenTag = token.value === '<';
  }

  #onEndTag(token: Token): void {
    this.#tagEndOffset = token.endOffset;
    const selfClosingTag = token.value === '/>' || SelfClosingTags.has(this.#tagName);
    const tag = new Tag(
        this.#tagName, this.#tagStartOffset || 0, this.#tagEndOffset, this.#attributes, this.#isOpenTag,
        selfClosingTag);
    this.#onTagComplete(tag);
  }

  #onTagComplete(tag: Tag): void {
    if (tag.isOpenTag) {
      const topElement = this.#stack[this.#stack.length - 1];
      if (topElement) {
        const tagSet = AutoClosingTags.get(topElement.name);
        if (topElement !== this.#document && topElement.openTag?.selfClosingTag) {
          this.#popElement(autocloseTag(topElement, topElement.openTag.endOffset));
        } else if (tagSet?.has(tag.name)) {
          this.#popElement(autocloseTag(topElement, tag.startOffset));
        }
        this.#pushElement(tag);
      }
      return;
    }

    let lastTag = this.#stack[this.#stack.length - 1];
    while (this.#stack.length > 1 && lastTag && lastTag.name !== tag.name) {
      this.#popElement(autocloseTag(lastTag, tag.startOffset));
      lastTag = this.#stack[this.#stack.length - 1];
    }
    if (this.#stack.length === 1) {
      return;
    }
    this.#popElement(tag);

    function autocloseTag(element: FormatterElement, offset: number): Tag {
      return new Tag(element.name, offset, offset, new Map(), false, false);
    }
  }

  #popElement(closeTag: Tag): void {
    const element = this.#stack.pop();
    if (!element) {
      return;
    }
    element.closeTag = closeTag;
  }

  #pushElement(openTag: Tag): void {
    const topElement = this.#stack[this.#stack.length - 1];
    const newElement = new FormatterElement(openTag.name);
    if (topElement) {
      newElement.parent = topElement;
      topElement.children.push(newElement);
    }
    newElement.openTag = openTag;
    this.#stack.push(newElement);
  }

  peekToken(): Token|null {
    return this.#tokenIndex < this.#tokens.length ? this.#tokens[this.#tokenIndex] : null;
  }

  nextToken(): Token|null {
    return this.#tokens[this.#tokenIndex++];
  }

  document(): FormatterElement {
    return this.#document;
  }
}

const SelfClosingTags = new Set<string>([
  'area',
  'base',
  'br',
  'col',
  'command',
  'embed',
  'hr',
  'img',
  'input',
  'keygen',
  'link',
  'meta',
  'param',
  'source',
  'track',
  'wbr',
]);

// @see https://www.w3.org/TR/html/syntax.html 8.1.2.4 Optional tags
const AutoClosingTags = new Map([
  ['head', new Set(['body'])],
  ['li', new Set(['li'])],
  ['dt', new Set(['dt', 'dd'])],
  ['dd', new Set(['dt', 'dd'])],
  [
    'p',
    new Set([
      'address', 'article', 'aside', 'blockquote', 'div', 'dl',      'fieldset', 'footer', 'form',
      'h1',      'h2',      'h3',    'h4',         'h5',  'h6',      'header',   'hgroup', 'hr',
      'main',    'nav',     'ol',    'p',          'pre', 'section', 'table',    'ul',
    ]),
  ],
  ['rb', new Set(['rb', 'rt', 'rtc', 'rp'])],
  ['rt', new Set(['rb', 'rt', 'rtc', 'rp'])],
  ['rtc', new Set(['rb', 'rtc', 'rp'])],
  ['rp', new Set(['rb', 'rt', 'rtc', 'rp'])],
  ['optgroup', new Set(['optgroup'])],
  ['option', new Set(['option', 'optgroup'])],
  ['colgroup', new Set(['colgroup'])],
  ['thead', new Set(['tbody', 'tfoot'])],
  ['tbody', new Set(['tbody', 'tfoot'])],
  ['tfoot', new Set(['tbody'])],
  ['tr', new Set(['tr'])],
  ['td', new Set(['td', 'th'])],
  ['th', new Set(['td', 'th'])],
]);

const enum ParseState {
  INITIAL = 'Initial',
  TAG = 'Tag',
  ATTRIBUTE_NAME = 'AttributeName',
  ATTRIBUTE_VALUE = 'AttributeValue',
}

class Token {
  value: string;
  type: Set<string>;
  startOffset: number;
  endOffset: number;

  constructor(value: string, type: Set<string>, startOffset: number, endOffset: number) {
    this.value = value;
    this.type = type;
    this.startOffset = startOffset;
    this.endOffset = endOffset;
  }
}

class Tag {
  name: string;
  startOffset: number;
  endOffset: number;
  attributes: Map<string, string>;
  isOpenTag: boolean;
  selfClosingTag: boolean;

  constructor(
      name: string, startOffset: number, endOffset: number, attributes: Map<string, string>, isOpenTag: boolean,
      selfClosingTag: boolean) {
    this.name = name;
    this.startOffset = startOffset;
    this.endOffset = endOffset;
    this.attributes = attributes;
    this.isOpenTag = isOpenTag;
    this.selfClosingTag = selfClosingTag;
  }
}

class FormatterElement {
  name: string;
  children: FormatterElement[] = [];
  parent: FormatterElement|null = null;
  openTag: Tag|null = null;
  closeTag: Tag|null = null;

  constructor(name: string) {
    this.name = name;
  }
}
