import striptags from 'striptags';

type Normalize = (definition: string) => string;

const EMPHASIS_TAGS = ['a', 'b', 'em', 'internalXref'];

const normalizeHtmlTags: Normalize = (definition) => striptags(striptags(definition, EMPHASIS_TAGS), undefined, '"');

const normalizeLineBreaks: Normalize = (definition) => definition.replace(/\r\n/g, '\n');

const normalizeWhitespace: Normalize = (definition) => definition.replace(/[^\S\n ]+/g, ' ').replace(/[ ]+/g, ' ');

const normalizeQuotes: Normalize = (definition) => definition.replace(/\."/g, '".');

/**
 * `(1.2) definition` -> `definition`
 */
const normalizeMarkers: Normalize = (definition) => definition.replace(/^\(\d+\.\d+\)\s+/g, '');

const normalizeTrailingSymbols: Normalize = (definition) => definition.trim().replace(/:$/, '');

const normalizeLeadingSymbols: Normalize = (definition) => definition.trim().replace(/^:/, '');

/**
 * @see https://stackoverflow.com/a/40732368
 */
const normalizeNonWords: Normalize = (definition) => (/[\w\u0621-\u064A]+/.test(definition) ? definition : '');

const normalizeCommas: Normalize = (definition) => {
  return definition
    .replace(/\s+,\s+/g, ', ')
    .replace(/^,/, '')
    .replace(/,$/, '');
};

const normalizers: Normalize[] = [
  normalizeHtmlTags,
  normalizeMarkers,
  normalizeQuotes,
  normalizeLineBreaks,
  normalizeWhitespace,
  normalizeTrailingSymbols,
  normalizeLeadingSymbols,
  normalizeNonWords,
  normalizeCommas,
  (definition) => definition.trim(),
];

export const normalizeDefinition = (definition: string): string => {
  const normalized = normalizers.reduce((result, normalize) => normalize(result), definition);
  const hasChanged = normalized !== definition;
  return hasChanged ? normalizeDefinition(normalized) : normalized;
};
