/**
 * Validation utilities for ensuring English-only content in analysis documents.
 *
 * These validators detect non-English characters (Chinese, Korean, Japanese,
 * etc.) that may be incorrectly generated by LLMs despite prompt instructions.
 */

/**
 * Regex pattern to detect non-English characters. Includes:
 *
 * - Chinese (CJK Unified Ideographs): \u4e00-\u9fff
 * - Korean (Hangul Syllables): \uac00-\ud7af
 * - Japanese Hiragana: \u3040-\u309f
 * - Japanese Katakana: \u30a0-\u30ff
 * - CJK Extension A: \u3400-\u4dbf
 * - CJK Compatibility Ideographs: \uf900-\ufaff
 */
const NON_ENGLISH_PATTERN =
  /[\u4e00-\u9fff\uac00-\ud7af\u3040-\u309f\u30a0-\u30ff\u3400-\u4dbf\uf900-\ufaff]/g;

/** Check if text contains non-English characters. */
export const containsNonEnglish = (text: string): boolean => {
  return NON_ENGLISH_PATTERN.test(text);
};

/**
 * Find all non-English characters in text. Returns array of { char, index,
 * context } objects.
 */
export const findNonEnglishCharacters = (
  text: string,
): Array<{ char: string; index: number; context: string }> => {
  const results: Array<{ char: string; index: number; context: string }> = [];
  const regex = new RegExp(NON_ENGLISH_PATTERN.source, "g");

  let match;
  while ((match = regex.exec(text)) !== null) {
    const start = Math.max(0, match.index - 20);
    const end = Math.min(text.length, match.index + 21);
    results.push({
      char: match[0],
      index: match.index,
      context: text.slice(start, end),
    });
  }

  return results;
};

/**
 * Validate that content is English-only. Returns validation result with details
 * if non-English characters found.
 */
export const validateEnglishOnly = (
  content: string,
): { valid: boolean; errors: string[] } => {
  const nonEnglish = findNonEnglishCharacters(content);

  if (nonEnglish.length === 0) {
    return { valid: true, errors: [] };
  }

  const errors = nonEnglish.slice(0, 5).map((item) => {
    return `Non-English character "${item.char}" found at index ${item.index}: "...${item.context}..."`;
  });

  if (nonEnglish.length > 5) {
    errors.push(`... and ${nonEnglish.length - 5} more non-English characters`);
  }

  return { valid: false, errors };
};

/** Validate module section content for English-only requirement. */
export const validateModuleSectionContent = (
  sections: Array<{ title: string; purpose: string; content: string }>,
): { valid: boolean; errors: string[] } => {
  const allErrors: string[] = [];

  sections.forEach((section, index) => {
    const titleResult = validateEnglishOnly(section.title);
    const purposeResult = validateEnglishOnly(section.purpose);
    const contentResult = validateEnglishOnly(section.content);

    if (!titleResult.valid) {
      allErrors.push(
        `Module section ${index} title: ${titleResult.errors.join("; ")}`,
      );
    }
    if (!purposeResult.valid) {
      allErrors.push(
        `Module section ${index} purpose: ${purposeResult.errors.join("; ")}`,
      );
    }
    if (!contentResult.valid) {
      allErrors.push(
        `Module section ${index} content: ${contentResult.errors.join("; ")}`,
      );
    }
  });

  return {
    valid: allErrors.length === 0,
    errors: allErrors,
  };
};

/** Validate unit section content for English-only requirement. */
export const validateUnitSectionContent = (
  sections: Array<{
    title: string;
    purpose: string;
    content: string;
    keywords: string[];
  }>,
): { valid: boolean; errors: string[] } => {
  const allErrors: string[] = [];

  sections.forEach((section, index) => {
    const titleResult = validateEnglishOnly(section.title);
    const purposeResult = validateEnglishOnly(section.purpose);
    const contentResult = validateEnglishOnly(section.content);

    if (!titleResult.valid) {
      allErrors.push(
        `Unit section ${index} title: ${titleResult.errors.join("; ")}`,
      );
    }
    if (!purposeResult.valid) {
      allErrors.push(
        `Unit section ${index} purpose: ${purposeResult.errors.join("; ")}`,
      );
    }
    if (!contentResult.valid) {
      allErrors.push(
        `Unit section ${index} content: ${contentResult.errors.join("; ")}`,
      );
    }

    section.keywords.forEach((keyword, kwIndex) => {
      const kwResult = validateEnglishOnly(keyword);
      if (!kwResult.valid) {
        allErrors.push(
          `Unit section ${index} keyword ${kwIndex}: ${kwResult.errors.join("; ")}`,
        );
      }
    });
  });

  return {
    valid: allErrors.length === 0,
    errors: allErrors,
  };
};

/** Validate section section content for English-only requirement. */
export const validateSectionSectionContent = (
  sections: Array<{ title: string; content: string }>,
): { valid: boolean; errors: string[] } => {
  const allErrors: string[] = [];

  sections.forEach((section, index) => {
    const titleResult = validateEnglishOnly(section.title);
    const contentResult = validateEnglishOnly(section.content);

    if (!titleResult.valid) {
      allErrors.push(
        `Section ${index} title: ${titleResult.errors.join("; ")}`,
      );
    }
    if (!contentResult.valid) {
      allErrors.push(
        `Section ${index} content: ${contentResult.errors.join("; ")}`,
      );
    }
  });

  return {
    valid: allErrors.length === 0,
    errors: allErrors,
  };
};

/**
 * Validate scenario file names for correct format. Expected format: 00-toc.md,
 * 01-xxx.md, 02-xxx.md, ...
 */
export const validateScenarioFileNames = (
  files: Array<{ filename: string }>,
): { valid: boolean; errors: string[] } => {
  const allErrors: string[] = [];
  const filenamePattern = /^\d{2}-[a-z][a-z0-9-]*\.md$/;

  // Check first file is 00-toc.md
  if (files.length > 0 && files[0].filename !== "00-toc.md") {
    allErrors.push(
      `First file must be "00-toc.md", got "${files[0].filename}"`,
    );
  }

  // Check all filenames match pattern
  files.forEach((file, index) => {
    if (!filenamePattern.test(file.filename)) {
      allErrors.push(
        `File ${index}: Invalid filename format "${file.filename}". Expected format: XX-name.md`,
      );
    }
  });

  // Check sequential numbering
  files.forEach((file, index) => {
    const expectedPrefix = index.toString().padStart(2, "0");
    if (!file.filename.startsWith(expectedPrefix + "-")) {
      allErrors.push(
        `File ${index}: Expected prefix "${expectedPrefix}-", got "${file.filename}"`,
      );
    }
  });

  return {
    valid: allErrors.length === 0,
    errors: allErrors,
  };
};
