// SPDX-License-Identifier: Apache-2.0

import AdmZip from 'adm-zip';
import fs from 'node:fs';
import path from 'node:path';
import chalk from 'chalk';
import * as constants from '../../core/constants.js';
import {PathEx} from '../../business/utils/path-ex.js';
import {type SoloLogger} from '../../core/logging/solo-logger.js';

const {green, yellow} = chalk;

/**
 * Severity-ordered categories for diagnostics findings.
 *
 * Ordering (lowest value = highest severity in the report):
 *   1. image-pull       — container image could not be pulled; pod will never start.
 *   2. oom              — container was killed by the kernel due to memory exhaustion.
 *   3. pod-readiness    — pod is not Running or its readiness probe is failing.
 *   4. consensus-active — consensus node did not reach ACTIVE platform status.
 *   5. log-exception    — an exception/stack-trace was found in an application log.
 *   6. app-error        — an ERROR line was found in a pod's raw container log.
 */
export type DiagnosticsFindingCategory =
  | 'image-pull'
  | 'oom'
  | 'pod-readiness'
  | 'consensus-active'
  | 'log-exception'
  | 'app-error';

/** A single detected problem with its supporting evidence lines. */
export type DiagnosticsFinding = {
  category: DiagnosticsFindingCategory;
  title: string;
  /** Relative path of the source file (or "archive:entry") that triggered this finding. */
  source: string;
  /** Up to 14 verbatim lines from the source that match the failure pattern. */
  evidence: string[];
};

type ConsensusLogDefinition = {
  entrySuffix: 'output/swirlds.log' | 'output/hgcaa.log';
  displayName: 'swirlds.log' | 'hgcaa.log';
  checkConsensusActive: boolean;
};

/**
 * DiagnosticsAnalyzer scans a previously-collected diagnostics output directory
 * (produced by `deployment diagnostics logs`) and identifies common failure
 * signatures without requiring a live cluster connection.
 *
 * ## Input sources
 *
 * ### 1. Solo CLI log  (`solo.log`)
 * The Solo CLI's own Pino log file (`~/.solo/logs/solo.log` by default, or
 * `solo.log` found recursively under `customOutputDirectory`).  Lines
 * matching `] ERROR:` are captured as `app-error` findings.  ANSI escape
 * codes and `[traceId="..."]` suffixes are stripped before matching.
 *
 * ### 2. Pod describe files  (`*.describe.txt`)
 * Written by `downloadHieroComponentLogs()` for every pod across all clusters.
 * These are the output of `kubectl describe pod <name> -n <namespace>` and
 * contain the pod's status, container states, events, and resource usage.
 *
 * Detectable errors:
 *
 * | Category        | Detected keywords / conditions                                                         |
 * |-----------------|----------------------------------------------------------------------------------------|
 * | `image-pull`    | `ErrImagePull`, `ImagePullBackOff`, `Back-off pulling image`,                          |
 * |                 | `failed to pull and unpack image`, `unexpected EOF` (truncated layer),                 |
 * |                 | `toomanyrequests`, `rate limit exceeded`, `429 Too Many Requests`                      |
 * | `oom`           | `OOMKilled`, `out of memory`, `reason: OOMKilled`                                      |
 * | `pod-readiness` | Pod `Status` field is not `Running`, or `Ready: False` is present in container status; |
 * |                 | supporting `Reason:` / `Message:` lines are captured as evidence                       |
 *
 * ### 2. Consensus node log archives  (`*-log-config.zip`)
 * Written by `getNodeLogsAndConfigs()` under `~/.solo/logs/<namespace>/`.
 * Each zip contains the node's log and config snapshot.  Only two log files
 * inside the archive are inspected:
 *
 * - `output/swirlds.log` — Hashgraph platform log
 * - `output/hgcaa.log`   — Hedera application log
 *
 * Detectable errors:
 *
 * | Category           | Detected keywords / conditions                                                      |
 * |--------------------|-------------------------------------------------------------------------------------|
 * | `consensus-active` | `swirlds.log` never contains the word `ACTIVE` — the node stalled during           |
 * |                    | startup (e.g. stuck in `STARTING_UP`, `OBSERVING`, or `REPLAYING_EVENTS`);         |
 * |                    | status-transition lines are captured as evidence                                    |
 * | `log-exception`    | Any line in `swirlds.log` or `hgcaa.log` matching `Exception`, `Error`,            |
 * |                    | or `Caused by:` — the first matching stack-trace block (up to 14 lines) is         |
 * |                    | captured as evidence                                                                |
 *
 * ## Output
 * All findings are written to `diagnostics-analysis.txt` inside the input
 * directory.  Up to 10 findings are also printed to the terminal in severity
 * order.  Duplicate findings (same category + title + source) are suppressed.
 */
export class DiagnosticsAnalyzer {
  private static readonly CONSENSUS_LOG_DEFINITIONS: readonly ConsensusLogDefinition[] = [
    {entrySuffix: 'output/swirlds.log', displayName: 'swirlds.log', checkConsensusActive: true},
    {entrySuffix: 'output/hgcaa.log', displayName: 'hgcaa.log', checkConsensusActive: false},
  ];

  public constructor(private readonly logger: SoloLogger) {}

  /**
   * Run the full analysis against `customOutputDirectory` (or the default
   * `~/.solo/logs/hiero-components-logs` when empty).
   *
   * Consensus node zip archives are looked up under
   * `~/.solo/logs/<namespaceName>/` when `namespaceName` is provided, or
   * directly under `~/.solo/logs/` otherwise.
   */
  public analyze(customOutputDirectory: string, namespaceName: string | undefined): void {
    const hieroOutputDirectory: string = customOutputDirectory
      ? path.resolve(customOutputDirectory)
      : PathEx.join(constants.SOLO_LOGS_DIR, 'hiero-components-logs');
    const findings: DiagnosticsFinding[] = [];

    this.logger.showUser(`Scanning directory: ${hieroOutputDirectory}`);

    if (fs.existsSync(hieroOutputDirectory)) {
      this.analyzeDescribeFiles(hieroOutputDirectory, findings);
    } else {
      this.logger.showUser(yellow(`  Pod describe directory not found, skipping: ${hieroOutputDirectory}`));
    }

    let consensusArchiveDirectory: string = constants.SOLO_LOGS_DIR;
    if (customOutputDirectory) {
      consensusArchiveDirectory = path.resolve(customOutputDirectory);
    } else if (namespaceName) {
      consensusArchiveDirectory = PathEx.join(constants.SOLO_LOGS_DIR, namespaceName);
    }
    if (fs.existsSync(consensusArchiveDirectory)) {
      this.analyzeConsensusNodeArchives(consensusArchiveDirectory, findings);
    } else {
      this.logger.showUser(yellow(`  Consensus archive directory not found, skipping: ${consensusArchiveDirectory}`));
    }

    if (fs.existsSync(hieroOutputDirectory)) {
      this.analyzePodLogFiles(hieroOutputDirectory, findings);
    }

    if (fs.existsSync(hieroOutputDirectory)) {
      this.analyzeSoloLogFiles(hieroOutputDirectory, customOutputDirectory, findings);
    } else {
      this.logger.showUser(yellow(`  Diagnostics output directory not found, skipping: ${hieroOutputDirectory}`));
    }

    if (!fs.existsSync(hieroOutputDirectory)) {
      fs.mkdirSync(hieroOutputDirectory, {recursive: true});
    }

    const reportPath: string = PathEx.join(hieroOutputDirectory, 'diagnostics-analysis.txt');
    this.logger.showUser(`Writing report to: ${reportPath}`);
    const reportText: string = this.renderDiagnosticsFindings(findings);
    fs.writeFileSync(reportPath, reportText, 'utf8');

    if (findings.length > 0) {
      this.logger.showUser(
        yellow(
          `Detected ${findings.length} potential issue(s) from diagnostics logs. Summary written to ${reportPath}`,
        ),
      );
      for (const [index, finding] of findings.slice(0, 10).entries()) {
        this.logger.showUser(`${index + 1}. ${finding.title} [${finding.source}]`);
        if (finding.evidence.length > 0) {
          const maxEvidenceLines: number = finding.category === 'log-exception' ? 8 : 4;
          for (const evidenceLine of finding.evidence.slice(0, maxEvidenceLines)) {
            this.logger.showUser(`   - ${evidenceLine}`);
          }
          if (finding.evidence.length > maxEvidenceLines) {
            this.logger.showUser(
              `   ... and ${finding.evidence.length - maxEvidenceLines} more evidence line(s) in diagnostics-analysis.txt`,
            );
          }
        }
      }
      if (findings.length > 10) {
        this.logger.showUser(`... and ${findings.length - 10} more. See diagnostics-analysis.txt for details.`);
      }
    } else {
      this.logger.showUser(green(`No common failure signatures detected. Report: ${reportPath}`));
    }
  }

  /**
   * Recursively scans `rootDirectory` for `*.describe.txt` files (one per pod)
   * and checks each for image-pull failures, OOM kills, and pod-readiness
   * problems.
   *
   * Detected errors:
   *  - `image-pull`    ErrImagePull / ImagePullBackOff / rate-limit / unexpected EOF
   *  - `oom`           OOMKilled / out of memory
   *  - `pod-readiness` Status != Running  OR  Ready: False
   */
  private analyzeDescribeFiles(rootDirectory: string, findings: DiagnosticsFinding[]): void {
    const describeFiles: string[] = this.collectFilesRecursively(rootDirectory, (filePath: string): boolean =>
      filePath.endsWith('.describe.txt'),
    );

    // Matches any image-pull error surfaced in `kubectl describe pod` output.
    // Covers:
    //   - ErrImagePull / ImagePullBackOff  (standard Kubernetes pull errors)
    //   - "Back-off pulling image"          (CRI back-off message in Events)
    //   - "failed to pull and unpack image" (containerd error)
    //   - "unexpected EOF"                  (truncated layer download)
    //   - toomanyrequests / rate limit exceeded / 429 Too Many Requests
    //     (Docker Hub and other registries throttle anonymous pulls)
    const imagePullPattern: RegExp =
      /ErrImagePull|ImagePullBackOff|Back-off pulling image|failed to pull and unpack image|unexpected EOF|toomanyrequests|rate limit exceeded|429 Too Many Requests/i;

    // Matches out-of-memory kills.
    // "OOMKilled" appears in the container's LastTerminationState and in Events.
    // "reason: OOMKilled" is the structured field in the container status JSON.
    const oomPattern: RegExp = /OOMKilled|out of memory|reason:\s*OOMKilled/i;

    this.logger.showUser(`  Found ${describeFiles.length} pod describe file(s)`);

    for (const describeFile of describeFiles) {
      const relatedPath: string = path.relative(rootDirectory, describeFile);
      this.logger.showUser(`  Reading: ${relatedPath}`);
      let content: string;
      try {
        content = fs.readFileSync(describeFile, 'utf8');
      } catch (error) {
        this.logger.showUser(yellow(`  Unable to read describe file ${relatedPath}: ${(error as Error).message}`));
        continue;
      }

      const podName: string = path.basename(describeFile, '.describe.txt');
      const source: string = path.relative(rootDirectory, describeFile);

      if (imagePullPattern.test(content)) {
        this.addDiagnosticsFinding(findings, {
          category: 'image-pull',
          title: `Image pull failure detected for pod ${podName}`,
          source,
          evidence: this.extractMatchSnippets(content, imagePullPattern, 8),
        });
      }

      if (oomPattern.test(content)) {
        this.addDiagnosticsFinding(findings, {
          category: 'oom',
          title: `OOM-related failure detected for pod ${podName}`,
          source,
          evidence: this.extractMatchSnippets(content, oomPattern, 6),
        });
      }

      // A pod is unhealthy if its top-level status is anything other than
      // "Running" or if any container is not ready.
      //
      // Two file formats are possible depending on how the describe file was
      // collected:
      //   - Text format (kubectl describe pod):  "Status: Pending"
      //                                          "Ready: False"
      //   - YAML format (kubectl get pod -o yaml): "phase: Pending"
      //                                            "ready: false"
      //
      // Both are matched so the check is format-agnostic.
      // Reason: / Message: / reason: / message: lines (case-insensitive) are
      // captured for additional context.
      const statusMatch: RegExpMatchArray = content.match(/^\s*(?:Status|phase):\s+([^\n]+)/m);
      const status: string = statusMatch?.[1]?.trim().replaceAll(/^"|"$/g, '') ?? '';
      const readyFalse: boolean = /^\s*[Rr]eady:\s+[Ff]alse\b/m.test(content);
      if ((status && status !== constants.POD_PHASE_RUNNING) || readyFalse) {
        const evidence: string[] = [];
        if (status) {
          evidence.push(`Status: ${status}`);
        }
        if (readyFalse) {
          evidence.push('Ready: False');
        }
        evidence.push(...this.extractMatchSnippetsJoiningContinuations(content, /^\s*(Reason|Message):\s+/i, 8));

        this.addDiagnosticsFinding(findings, {
          category: 'pod-readiness',
          title: `Pod not ready/running: ${podName}`,
          source,
          evidence,
        });
      }
    }
  }

  /**
   * Recursively scans `rootDirectory` for `*.log` pod log files and checks each
   * for application-level ERROR lines (category: `app-error`).
   *
   * These are the raw container logs downloaded by `downloadHieroComponentLogs()`
   * alongside the `*.describe.txt` files. Each file is scanned for lines
   * containing `ERROR` and the first matching block (up to 8 lines) is captured.
   */
  private analyzePodLogFiles(rootDirectory: string, findings: DiagnosticsFinding[]): void {
    // Only scan logs for non-consensus components. Consensus node logs are
    // handled separately via the *-log-config.zip archives (which include
    // swirlds.log and hgcaa.log).  Broad *.log would match those files too
    // and produce duplicate / noisy findings.
    const componentLogPattern: RegExp = /[\\/](?:mirror|block|relay|explorer|solo-shared)[^/\\]*\.log$/i;
    const logFiles: string[] = this.collectFilesRecursively(rootDirectory, (filePath: string): boolean =>
      componentLogPattern.test(filePath),
    );

    // Strip Docker/containerd timestamp prefix (e.g. "2026-04-06T03:24:32.470558065Z ") before matching.
    const errorPattern: RegExp = /\b(?:ERROR|FATAL)\b/i;

    this.logger.showUser(`  Found ${logFiles.length} pod log file(s)`);

    for (const logFile of logFiles) {
      const relativePath: string = path.relative(rootDirectory, logFile);
      this.logger.showUser(`  Reading: ${relativePath}`);
      let content: string;
      try {
        content = fs.readFileSync(logFile, 'utf8');
      } catch (error) {
        this.logger.showUser(yellow(`  Unable to read log file ${relativePath}: ${(error as Error).message}`));
        continue;
      }

      // Strip leading container-runtime timestamps so the pattern matches the application log line.
      const strippedContent: string = content.replaceAll(/^\d{4}-\d{2}-\d{2}T[\d:.]+Z\s+/gm, '');
      if (!errorPattern.test(strippedContent)) {
        continue;
      }

      const podName: string = path.basename(logFile, '.log');
      const evidence: string[] = this.extractMatchSnippets(strippedContent, errorPattern, 8);
      this.addDiagnosticsFinding(findings, {
        category: 'app-error',
        title: `Application ERROR detected in pod log: ${podName}`,
        source: relativePath,
        evidence,
      });
    }
  }

  /**
   * Searches for `solo.log` in `hieroOutputDirectory` (recursively) and, when
   * no custom output directory was specified, also checks the standard
   * `~/.solo/logs/solo.log` location.  ERROR lines are extracted and reported
   * as `app-error` findings.
   *
   */
  private analyzeSoloLogFiles(
    hieroOutputDirectory: string,
    customOutputDirectory: string,
    findings: DiagnosticsFinding[],
  ): void {
    const soloLogFiles: string[] = this.collectFilesRecursively(
      hieroOutputDirectory,
      (filePath: string): boolean => path.basename(filePath) === 'solo.log',
    );

    // When using the default output path, the solo.log lives one level up at
    // ~/.solo/logs/solo.log — outside hieroOutputDirectory, so check it separately.
    if (!customOutputDirectory) {
      const defaultSoloLog: string = PathEx.join(constants.SOLO_LOGS_DIR, 'solo.log');
      if (fs.existsSync(defaultSoloLog) && !soloLogFiles.includes(defaultSoloLog)) {
        soloLogFiles.push(defaultSoloLog);
      }
    }

    this.logger.showUser(`  Found ${soloLogFiles.length} solo log file(s)`);

    const errorPattern: RegExp = /\]\s+ERROR:/;
    // eslint-disable-next-line no-control-regex
    const ansiPattern: RegExp = new RegExp('\u001B\\[[0-9;]*m', 'g');
    const traceIdPattern: RegExp = /\s+\[traceId="[^"]*"\]/g;

    for (const soloLogFile of soloLogFiles) {
      const relativePath: string = path.relative(hieroOutputDirectory, soloLogFile);
      const sourceLabel: string = relativePath || path.basename(soloLogFile);
      this.logger.showUser(`  Reading: ${sourceLabel}`);
      let content: string;
      try {
        content = fs.readFileSync(soloLogFile, 'utf8');
      } catch (error) {
        this.logger.showUser(yellow(`  Unable to read solo log ${sourceLabel}: ${(error as Error).message}`));
        continue;
      }

      const cleanedContent: string = content.replaceAll(ansiPattern, '').replaceAll(traceIdPattern, '');
      if (!errorPattern.test(cleanedContent)) {
        continue;
      }

      const evidence: string[] = this.extractSoloLogErrorBlocks(cleanedContent, 3, 14);
      this.addDiagnosticsFinding(findings, {
        category: 'app-error',
        title: 'ERROR detected in solo.log',
        source: sourceLabel,
        evidence,
      });
    }
  }

  /**
   * Recursively scans `archiveRootDirectory` for `*-log-config.zip` archives
   * produced by `getNodeLogsAndConfigs()` and inspects two log files inside
   * each archive:
   *
   *  - `output/swirlds.log` — checked for absence of the `ACTIVE` platform
   *    status marker (category: `consensus-active`) and for exception blocks
   *    (category: `log-exception`).
   *  - `output/hgcaa.log`   — checked for exception blocks only
   *    (category: `log-exception`).
   *
   * Only the first exception block per log file is captured (up to 14 lines)
   * to keep the report readable.
   */
  private analyzeConsensusNodeArchives(archiveRootDirectory: string, findings: DiagnosticsFinding[]): void {
    const archiveFiles: string[] = this.collectFilesRecursively(archiveRootDirectory, (filePath: string): boolean =>
      filePath.endsWith('-log-config.zip'),
    );

    this.logger.showUser(`  Found ${archiveFiles.length} consensus log archive(s)`);

    for (const archiveFile of archiveFiles) {
      const archiveName: string = path.basename(archiveFile);
      this.logger.showUser(`  Unzipping: ${archiveName}`);
      let archive: AdmZip;
      try {
        archive = new AdmZip(archiveFile, {readEntries: true});
      } catch (error) {
        this.logger.showUser(yellow(`  Unable to read archive ${archiveName}: ${(error as Error).message}`));
        continue;
      }

      for (const entry of archive.getEntries()) {
        const logDefinition: ConsensusLogDefinition | undefined = this.findConsensusLogDefinition(entry.entryName);
        if (!logDefinition) {
          continue;
        }
        this.analyzeConsensusLogEntry(archiveName, entry, logDefinition, findings);
      }
    }
  }

  private findConsensusLogDefinition(entryName: string): ConsensusLogDefinition | undefined {
    return DiagnosticsAnalyzer.CONSENSUS_LOG_DEFINITIONS.find((logDefinition: ConsensusLogDefinition): boolean =>
      entryName.endsWith(logDefinition.entrySuffix),
    );
  }

  private analyzeConsensusLogEntry(
    archiveName: string,
    entry: AdmZip.IZipEntry,
    logDefinition: ConsensusLogDefinition,
    findings: DiagnosticsFinding[],
  ): void {
    this.logger.showUser(`    Reading entry: ${entry.entryName}`);
    const source: string = `${archiveName}:${entry.entryName}`;
    const content: string = entry.getData().toString('utf8');

    if (logDefinition.checkConsensusActive) {
      this.analyzeConsensusActiveStatus(content, source, findings);
    }
    this.analyzeExceptionBlocks(logDefinition.displayName, content, source, findings);
  }

  /**
   * A healthy consensus node transitions through STARTING_UP → OBSERVING →
   * REPLAYING_EVENTS → ACTIVE. If `ACTIVE` never appears in swirlds.log,
   * the node likely stalled before becoming ready for transactions.
   */
  private analyzeConsensusActiveStatus(content: string, source: string, findings: DiagnosticsFinding[]): void {
    if (/\bACTIVE\b/.test(content)) {
      return;
    }

    const evidence: string[] = this.extractMatchSnippets(
      content,
      /PlatformStatus|status|STARTING_UP|OBSERVING|REPLAYING_EVENTS|FREEZING|ACTIVE/i,
      8,
    );
    if (evidence.length === 0) {
      evidence.push('No ACTIVE status marker found in swirlds.log');
    }

    this.addDiagnosticsFinding(findings, {
      category: 'consensus-active',
      title: 'Consensus node may not have reached ACTIVE status',
      source,
      evidence,
    });
  }

  /**
   * Captures the first exception/stack-trace block from a consensus log file.
   */
  private analyzeExceptionBlocks(
    logDisplayName: ConsensusLogDefinition['displayName'],
    content: string,
    source: string,
    findings: DiagnosticsFinding[],
  ): void {
    const exceptionBlocks: string[] = this.extractExceptionBlocks(content, 1, 14);
    if (exceptionBlocks.length === 0) {
      return;
    }

    this.addDiagnosticsFinding(findings, {
      category: 'log-exception',
      title: `Exception detected in ${logDisplayName}`,
      source,
      evidence: exceptionBlocks[0].split('\n').filter((line: string): boolean => line.trim().length > 0),
    });
  }

  /**
   * Adds `finding` to `findings` unless an identical entry (same category,
   * title, and source) already exists.  Evidence lines are deduplicated and
   * capped at 14 entries to keep the report compact.
   */
  private addDiagnosticsFinding(findings: DiagnosticsFinding[], finding: DiagnosticsFinding): void {
    const key: string = `${finding.category}|${finding.title}|${finding.source}`;
    const existingKeys: Set<string> = new Set(
      findings.map((item: DiagnosticsFinding): string => `${item.category}|${item.title}|${item.source}`),
    );
    if (existingKeys.has(key)) {
      return;
    }

    findings.push({
      ...finding,
      evidence: [...new Set(finding.evidence)].filter((line: string): boolean => line.trim().length > 0).slice(0, 14),
    });
  }

  /**
   * Walks `rootDirectory` recursively and returns all file paths for which
   * `matcher` returns `true`.
   */
  private collectFilesRecursively(rootDirectory: string, matcher: (filePath: string) => boolean): string[] {
    const files: string[] = [];
    const visit: (directory: string) => void = (directory: string): void => {
      const entries: fs.Dirent[] = fs.readdirSync(directory, {withFileTypes: true});
      for (const entry of entries) {
        const entryPath: string = path.join(directory, entry.name);
        if (entry.isDirectory()) {
          visit(entryPath);
          continue;
        }
        if (entry.isFile() && matcher(entryPath)) {
          files.push(entryPath);
        }
      }
    };

    visit(rootDirectory);
    return files;
  }

  /**
   * Extracts up to `maxBlocks` ERROR blocks from a solo.log file.
   *
   * Each block starts on a line matching `] ERROR:` and continues while
   * subsequent lines are indented (part of the Pino `err:` object dump).
   * A new log entry — any line starting with `[HH:MM:SS` — terminates the
   * current block.  Each block is capped at `maxLinesPerBlock` lines.
   *
   * Evidence lines are returned flat (one string per line) in
   * `"line <N>: <content>"` format so they render consistently with other
   * findings.
   */
  private extractSoloLogErrorBlocks(content: string, maxBlocks: number, maxLinesPerBlock: number): string[] {
    const lines: string[] = content.split(/\r?\n/);
    const errorPattern: RegExp = /\]\s+ERROR:/;
    // New Pino log entries start with a bracketed timestamp, e.g. "[17:25:23.788]"
    const newEntryPattern: RegExp = /^\[\d{2}:\d{2}:\d{2}\.\d{3}]/;
    const evidence: string[] = [];
    let blocksCollected: number = 0;

    for (let index: number = 0; index < lines.length && blocksCollected < maxBlocks; index++) {
      if (!errorPattern.test(lines[index])) {
        continue;
      }

      const blockLines: string[] = [`line ${index + 1}: ${lines[index].trim()}`];
      let next: number = index + 1;
      while (next < lines.length && blockLines.length < maxLinesPerBlock) {
        const nextLine: string = lines[next];
        // Stop at the next log entry or a blank line that precedes one
        if (newEntryPattern.test(nextLine)) {
          break;
        }
        if (nextLine.trim().length > 0) {
          blockLines.push(`line ${next + 1}: ${nextLine.trim()}`);
        }
        next++;
      }

      evidence.push(...blockLines);
      blocksCollected++;
      index = next - 1;
    }

    return evidence;
  }

  /**
   * Returns up to `maxMatches` lines from `content` that match `pattern`,
   * formatted as `"line <N>: <trimmed line>"`.
   *
   * The global (`g`) flag is stripped before matching so the RegExp lastIndex
   * does not interfere with repeated calls against the same pattern instance.
   */
  private extractMatchSnippets(content: string, pattern: RegExp, maxMatches: number): string[] {
    const snippets: string[] = [];
    const lines: string[] = content.split(/\r?\n/);
    const normalizedFlags: string = pattern.flags.includes('g') ? pattern.flags.replaceAll('g', '') : pattern.flags;
    const matcher: RegExp = new RegExp(pattern.source, normalizedFlags);

    for (const [index, line] of lines.entries()) {
      if (matcher.test(line)) {
        snippets.push(`line ${index + 1}: ${line.trim()}`);
        if (snippets.length >= maxMatches) {
          break;
        }
      }
    }

    return snippets;
  }

  /**
   * Like {@link extractMatchSnippets} but joins indented continuation lines
   * (YAML/kubectl-describe multi-line values) into a single evidence entry.
   *
   * When a matching key line is found, any immediately following lines whose
   * leading whitespace is strictly greater than the key line's indentation are
   * appended (space-separated) before the snippet is recorded.  This collapses
   * a multi-line `message:` value into one readable line instead of surfacing
   * only the truncated first line.
   */
  private extractMatchSnippetsJoiningContinuations(content: string, pattern: RegExp, maxMatches: number): string[] {
    const snippets: string[] = [];
    const lines: string[] = content.split(/\r?\n/);
    const normalizedFlags: string = pattern.flags.includes('g') ? pattern.flags.replaceAll('g', '') : pattern.flags;
    const matcher: RegExp = new RegExp(pattern.source, normalizedFlags);

    for (let index: number = 0; index < lines.length && snippets.length < maxMatches; index++) {
      const line: string = lines[index];
      if (!matcher.test(line)) {
        continue;
      }

      const keyIndent: number = (line.match(/^(\s*)/)?.[1] ?? '').length;
      let joined: string = line.trim();

      // Absorb continuation lines that are indented more than the key line.
      let next: number = index + 1;
      while (next < lines.length) {
        const nextLine: string = lines[next];
        if (nextLine.trim().length === 0) {
          break;
        }
        const nextIndent: number = (nextLine.match(/^(\s*)/)?.[1] ?? '').length;
        if (nextIndent <= keyIndent) {
          break;
        }
        joined += ' ' + nextLine.trim();
        next++;
      }

      snippets.push(`line ${index + 1}: ${joined}`);
    }

    return snippets;
  }

  /**
   * Extracts up to `maxBlocks` exception/stack-trace blocks from `content`.
   *
   * A block starts on any line matching `Exception`, `Error`, or `Caused by:`
   * and continues as long as subsequent lines are stack frames (`at …`),
   * chained causes (`Caused by:`), or truncation markers (`… N more`).
   * Each block is capped at `maxLinesPerBlock` lines.
   */
  private extractExceptionBlocks(content: string, maxBlocks: number, maxLinesPerBlock: number): string[] {
    const lines: string[] = content.split(/\r?\n/);
    const blocks: string[] = [];
    const timestampPattern: RegExp = /^\d{4}-\d{2}-\d{2}[ T]\d{2}:\d{2}:\d{2}/;
    const exceptionTypeLinePattern: RegExp =
      /^\s*(?:[a-z_][A-Za-z0-9_$]*\.)*[A-Z][A-Za-z0-9_$]*(?:Exception|Error|Throwable)(?::|\b)/;
    const startPattern: RegExp = new RegExp(
      String.raw`${exceptionTypeLinePattern.source}|\b(?:Exception|Error)\b|^\s*Caused by:`,
    );

    // Matches only the severity levels that indicate a real error.
    const errorLevelPattern: RegExp = /\b(?:ERROR|FATAL|SEVERE)\b/i;

    for (let index: number = 0; index < lines.length && blocks.length < maxBlocks; index++) {
      if (!startPattern.test(lines[index])) {
        continue;
      }

      // Look back up to 5 lines to find the nearest timestamped log line and
      // determine its severity.  Stack traces following a WARN/INFO/DEBUG line
      // are expected (e.g. FileAlreadyExistsException on a WARN archive attempt)
      // and must not be reported as findings.
      let precedingIsError: boolean = false;
      let precedingLogLine: string = '';
      for (let scan: number = index - 1; scan >= 0 && scan >= index - 5; scan--) {
        if (timestampPattern.test(lines[scan])) {
          precedingLogLine = lines[scan];
          precedingIsError = errorLevelPattern.test(lines[scan]);
          break;
        }
      }
      // If the nearest timestamped line exists and is not an error level, skip.
      if (precedingLogLine && !precedingIsError) {
        continue;
      }

      const blockLines: string[] = [lines[index]];
      // In swirlds/hgcaa logs, the actual throwable class line can follow a
      // timestamped ERROR marker line. Include that marker line as context.
      if (
        index > 0 &&
        blockLines.length < maxLinesPerBlock &&
        (/\bERROR\s+EXCEPTION\b/i.test(lines[index - 1]) ||
          (timestampPattern.test(lines[index - 1]) && errorLevelPattern.test(lines[index - 1]))) &&
        !blockLines.includes(lines[index - 1])
      ) {
        blockLines.unshift(lines[index - 1]);
      }

      let next: number = index + 1;
      while (next < lines.length && blockLines.length < maxLinesPerBlock) {
        const line: string = lines[next];
        if (line.trim().length === 0 || timestampPattern.test(line)) {
          break;
        }
        if (
          /^\s+at\s+/.test(line) ||
          /^\s*Caused by:/.test(line) ||
          /^\s*Suppressed:/.test(line) ||
          /^\s*\.\.\.\s+\d+\s+more/.test(line) ||
          exceptionTypeLinePattern.test(line)
        ) {
          blockLines.push(line);
          next++;
          continue;
        }
        break;
      }

      blocks.push(blockLines.join('\n'));
      index = next - 1;
    }

    return blocks;
  }

  /**
   * Renders all findings into a human-readable plain-text report, sorted by
   * severity (image-pull → oom → pod-readiness → consensus-active →
   * log-exception).  Returns the report as a string ready to be written to
   * `diagnostics-analysis.txt`.
   */
  private renderDiagnosticsFindings(findings: DiagnosticsFinding[]): string {
    const severityOrder: Record<DiagnosticsFindingCategory, number> = {
      'image-pull': 1,
      oom: 2,
      'pod-readiness': 3,
      'consensus-active': 4,
      'log-exception': 5,
      'app-error': 6,
    };
    const categoryLabel: Record<DiagnosticsFindingCategory, string> = {
      'image-pull': 'Image Pull',
      oom: 'Out Of Memory',
      'pod-readiness': 'Pod Readiness',
      'consensus-active': 'Consensus Active State',
      'log-exception': 'Exception Stack',
      'app-error': 'Application Error',
    };

    const lines: string[] = ['Solo Diagnostics Analysis Report', `Generated: ${new Date().toISOString()}`, ''];

    if (findings.length === 0) {
      lines.push('No common failure signatures were detected.');
      return lines.join('\n');
    }

    const orderedFindings: DiagnosticsFinding[] = [];
    for (const finding of findings) {
      let insertionIndex: number = orderedFindings.length;
      for (const [index, existingFinding] of orderedFindings.entries()) {
        if (severityOrder[finding.category] < severityOrder[existingFinding.category]) {
          insertionIndex = index;
          break;
        }
      }
      orderedFindings.splice(insertionIndex, 0, finding);
    }

    lines.push(`Detected ${orderedFindings.length} potential issue(s):`, '');

    for (const [index, finding] of orderedFindings.entries()) {
      lines.push(`${index + 1}. [${categoryLabel[finding.category]}] ${finding.title}`, `   Source: ${finding.source}`);
      if (finding.evidence.length > 0) {
        lines.push('   Evidence:');
        for (const evidenceLine of finding.evidence) {
          lines.push(`   - ${evidenceLine}`);
        }
      }
      lines.push('');
    }

    return lines.join('\n');
  }
}
