import * as depGraph from "@snyk/dep-graph";
import * as Debug from "debug";
import { eventLoopSpinner } from "event-loop-spinner";
// NOTE: Paths will always be normalized to POSIX even on Windows.
// This makes it easier to ignore differences between Linux and Windows.
import { posix as path } from "path";
import * as varint from "varint";

import { DEP_GRAPH_TYPE } from "./";
import { GoModule } from "./go-module";
import { LineTable } from "./pclntab";
import { Elf, ElfProgram } from "./types";

const debug = Debug("snyk");

/**
 * GoBinary: Parser for Go compiled binaries
 *
 * This class extracts dependency information from Go binaries by reading ELF sections.
 * It implements two scanning strategies depending on binary characteristics:
 * - If .gopclntab exists: Extract source files → Map to packages → Report packages
 * - If .gopclntab missing: Extract modules from .go.buildinfo → Report all modules
 *
 * Binary Types:
 *
 * 1. Normal Go Binaries (with .gopclntab):
 *    - Built with standard flags
 *    - Contains .gopclntab (Go Program Counter Line Table) section
 *    - .gopclntab maps program counter addresses to source files
 *
 * 2. Stripped Go Binaries (without .gopclntab):
 *    - Built with -ldflags='-s -w' flag
 *    - Removes debug symbols, symbol tables (.symtab, .strtab), and .gopclntab
 *
 * 3. CGo Go Binaries:
 *    - Built with CGO_ENABLED=1 (calls C code)
 *    - May or may not contain .gopclntab depending on build configuration
 *
 * ELF Sections Used:
 * - .go.buildinfo: Module names, versions, and build information (always present)
 * - .gopclntab: Source file to package mapping (missing in stripped/some CGo binaries)
 *
 */
export class GoBinary {
  public name: string;
  public modules: GoModule[];
  public goVersion: string;
  private hasPclnTab: boolean;

  constructor(goElfBinary: Elf) {
    [this.name, this.modules, this.goVersion] =
      extractModuleInformation(goElfBinary);

    const pclnTab = goElfBinary.body.sections.find(
      (section) => section.name === ".gopclntab",
    );

    // Track whether pclnTab exists to determine reporting strategy
    this.hasPclnTab = pclnTab !== undefined;

    // Stripped binaries (built with -ldflags='-s -w') and some CGo binaries
    // do not contain .gopclntab, which means we cannot detect package-level
    // dependencies. In this case, we fall back to module-level reporting from
    // .go.buildinfo, as remediation is performed at the module level anyway.
    if (pclnTab !== undefined) {
      try {
        this.matchFilesToModules(new LineTable(pclnTab.data).go12MapFiles());
      } catch (err) {
        debug(`Failed to parse .gopclntab in ${this.name}`, err.stack || err);
      }
    }
  }

  public async depGraph(): Promise<depGraph.DepGraph> {
    const goModulesDepGraph = new depGraph.DepGraphBuilder(
      { name: DEP_GRAPH_TYPE },
      { name: this.name },
    );

    for (const module of this.modules) {
      if (eventLoopSpinner.isStarving()) {
        await eventLoopSpinner.spin();
      }

      // If we have package-level information (from pclntab), use it
      if (module.packages.length > 0) {
        for (const pkg of module.packages) {
          const nodeId = `${pkg}@${module.version}`;
          goModulesDepGraph.addPkgNode(
            { name: pkg, version: module.version },
            nodeId,
          );
          goModulesDepGraph.connectDep(goModulesDepGraph.rootNodeId, nodeId);
        }
      } else if (!this.hasPclnTab) {
        // ONLY if .gopclntab is missing (stripped/CGo binaries), report module-level
        // dependencies from .go.buildinfo.
        //
        // Note: .go.buildinfo contains ALL modules from the build graph, including
        // modules required only for version resolution (transitive dependencies with
        // no code actually compiled into the binary). Without .gopclntab, we cannot
        // distinguish these from modules with actual code present.
        const nodeId = `${module.name}@${module.version}`;
        goModulesDepGraph.addPkgNode(
          { name: module.name, version: module.version },
          nodeId,
        );
        goModulesDepGraph.connectDep(goModulesDepGraph.rootNodeId, nodeId);
      }
      // else: pclnTab exists but module has no packages - don't report anything
    }

    if (this.goVersion) {
      const stdlibNodeId = `stdlib@${this.goVersion}`;
      goModulesDepGraph.addPkgNode(
        { name: "stdlib", version: this.goVersion },
        stdlibNodeId,
      );
      goModulesDepGraph.connectDep(goModulesDepGraph.rootNodeId, stdlibNodeId);
    } else {
      debug(
        `Skipping stdlib node for ${this.name}: could not parse Go version`,
      );
    }

    return goModulesDepGraph.build();
  }

  // matchFilesToModules goes through all files, extracts the package name and
  // adds it to the relevant module in the GoBinary.
  public matchFilesToModules(files: string[]): void {
    const normalizedFiles = files.map((file) => path.normalize(file));
    const { modCachePath, vendorPath } = determinePaths(
      this.modules,
      normalizedFiles,
    );
    for (const fileName of normalizedFiles) {
      if (fileName === "<autogenerated>") {
        continue;
      }

      let moduleName = (mod: GoModule): string => mod.fullName();
      let pkgFile = "";
      if (vendorPath && fileName.startsWith(vendorPath)) {
        moduleName = (mod: GoModule): string => mod.name;
        pkgFile = trimPrefix(fileName, vendorPath);
      } else if (modCachePath && fileName.startsWith(modCachePath)) {
        moduleName = (mod: GoModule): string => mod.fullName();
        pkgFile = trimPrefix(fileName, modCachePath);
      } else if (!vendorPath && !modCachePath) {
        // is trimmed
        pkgFile = fileName;
      } else {
        // skip file, probably a file from the Go source.
        continue;
      }

      // Try to find the module that matches our file name, and if found,
      // extract the package name out of it.
      // Go source files will not be matched by any module, so they will be
      // skipped automatically.
      for (const module of this.modules) {
        const modFullName = moduleName(module);
        if (pkgFile.startsWith(modFullName)) {
          // For example, the filename "github.com/my/pkg@v0.0.1/a/a.go" will be
          // split into "github.com/my/pkg@v0.0.1/" and "a/a.go". We then get
          // the package name from the package and file section, and add the
          // normalized module name (without the version) in front. This will
          // result in the package name "github.com/my/pkg/a".
          const parts = pkgFile.split(modFullName);
          if (parts.length !== 2 || parts[0] !== "") {
            throw new GoFileNameError(pkgFile, modFullName);
          }

          // for files in the "root" of a module
          // (github.com/my/pkg@v0.0.1/a.go), the path.parse expression returns
          // just a slash. This would result in a package name with a trailing
          // slash, which is incorrect.
          let dirName = path.parse(parts[1]).dir;
          if (dirName === path.sep) {
            dirName = "";
          }

          const pkgName = module.name + dirName;
          if (!module.packages.includes(pkgName)) {
            module.packages.push(pkgName);
          }
        }
      }
    }
  }
}

export class GoFileNameError extends Error {
  public readonly fileName: string;
  public readonly moduleName: string;

  constructor(fileName: string, moduleName: string) {
    super();
    this.name = "GoFileNameError";
    this.message = `Failed to match Go file "${fileName}" to module "${moduleName}"`;
    this.fileName = fileName;
    this.moduleName = moduleName;
  }
}

/**
 * Strips the "go" prefix from a Go version string and validates the format.
 * Returns the cleaned version (e.g., "1.21.0") or empty string if invalid.
 * Rejects RC/beta/devel versions since we cannot accurately match vulnerabilities
 * against pre-release builds.
 */
export function parseGoVersion(rawVersion: string): string {
  // Only match release versions (e.g., "go1.21" or "go1.21.5").
  // Reject RC/beta (go1.21rc1, go1.22beta2) and devel builds.
  const match = rawVersion.match(/^go(\d+\.\d+(?:\.\d+)?)$/);
  if (!match) {
    return "";
  }
  const ver = match[1];
  // Ensure three-segment semver (e.g., "1.19" → "1.19.0") because
  // @snyk/vuln uses node's semver library which requires three segments.
  return ver.includes(".", ver.indexOf(".") + 1) ? ver : ver + ".0";
}

export function extractModuleInformation(
  binary: Elf,
): [name: string, deps: GoModule[], goVersion: string] {
  const { goVersion: rawGoVersion, modInfo } = readRawBuildInfo(binary);
  if (!modInfo) {
    throw Error("binary contains empty module info");
  }

  const goVersion = parseGoVersion(rawGoVersion);

  const [pathDirective, mainModuleLine, ...versionsLines] = modInfo
    .replace("\r", "")
    .split("\n");
  const lineSplit = mainModuleLine.split("\t");
  let name = lineSplit[1];
  if (lineSplit[0] !== "mod") {
    // If the binary has no mod directive, it is a binary from the Go
    // distribution, like the "go" command, "vet", "gofmt" or others. In that
    // case, we use "go-distribution@" plus the path directive ("cmd/vet" for
    // example) as the name.  Using the "@" ensures that customers cannot create
    // name-clashes with these as "@" is an invalid character in Go modules.
    name = "go-distribution@" + pathDirective.split("\t")[1];
  }

  const modules: GoModule[] = [];
  versionsLines.forEach((versionLine) => {
    const [depType, name, ver] = versionLine.split("\t");
    if (!name || !ver) {
      return;
    }

    if (depType === "dep") {
      modules.push(new GoModule(name, ver));
    } else if (depType === "=>") {
      // we've found a replace directive. These are always for the previous
      // line/ module, so we simply need to replace the last module we added.
      const last = modules.length - 1;
      modules[last].name = name;
      modules[last].version = ver;
    }
  });

  return [name, modules, goVersion];
}

// Source
// https://cs.opensource.google/go/go/+/refs/tags/go1.18.5:src/debug/buildinfo/buildinfo.go;l=142
/**
 * Function finds and returns the Go version and
 * module version information in the executable binary
 * @param binary
 */
export interface RawBuildInfo {
  goVersion: string;
  modInfo: string;
}

export function readRawBuildInfo(binary: Elf): RawBuildInfo {
  const buildInfoMagic = "\xff Go buildinf:";
  // Read the first 64kB of dataAddr to find the build info blob.
  // On some platforms, the blob will be in its own section, and DataStart
  // returns the address of that section. On others, it's somewhere in the
  // data segment; the linker puts it near the beginning.
  const dataAddr = dataStart(binary);
  let data =
    readData(binary.body.programs, dataAddr, 64 * 1024) || Buffer.from([]);

  const buildInfoAlign = 16;
  const buildInfoSize = 32;

  while (true) {
    const i = data.toString("binary").indexOf(buildInfoMagic);
    if (i < 0 || data.length - i < buildInfoSize) {
      throw Error("not a Go executable");
    }
    if (i % buildInfoAlign === 0 && data.length - i >= buildInfoSize) {
      data = data.subarray(i);
      break;
    }
    data = data.subarray((i + buildInfoAlign - 1) & ~buildInfoAlign);
  }

  // Decode the blob.
  // The first 14 bytes are buildInfoMagic.
  // The next two bytes indicate pointer size in bytes (4 or 8) and endianness
  // (0 for little, 1 for big).
  // Two virtual addresses to Go strings follow that: runtime.buildVersion,
  // and runtime.modinfo.
  // On 32-bit platforms, the last 8 bytes are unused.
  // If the endianness has the 2 bit set, then the pointers are zero
  // and the 32-byte header is followed by varint-prefixed string data
  // for the two string values we care about.
  const ptrSize = data[14];
  if ((data[15] & 2) !== 0) {
    data = data.subarray(32);
    const [goVersion, rest] = decodeString(data);
    const [mod] = decodeString(rest);
    return { goVersion, modInfo: mod };
  } else {
    const bigEndian = data[15] !== 0;

    let readPtr: ReadPtrFunc;
    if (ptrSize === 4) {
      if (bigEndian) {
        readPtr = (buffer) => buffer.readUInt32BE(0);
      } else {
        readPtr = (buffer) => buffer.readUInt32LE(0);
      }
    } else {
      if (bigEndian) {
        readPtr = (buffer) => Number(buffer.readBigUInt64BE());
      } else {
        readPtr = (buffer) => Number(buffer.readBigUInt64LE());
      }
    }

    // The build info blob left by the linker is identified by
    // a 16-byte header, consisting of buildInfoMagic (14 bytes),
    // the binary's pointer size (1 byte),
    // and whether the binary is big endian (1 byte).
    // Now we attempt to read info after metadata.
    // From 16th byte to 16th + ptrSize there is a header that points
    // to go version
    const version: string = readString(
      binary,
      ptrSize,
      readPtr,
      readPtr(data.slice(16, 16 + ptrSize)),
    );

    if (version === "") {
      throw Error("no version found in go binary");
    }

    // Go version header was right after metadata.
    // Modules header right after go version
    // Read next `ptrSize` bytes, this point to the
    // place where modules info is stored
    const mod: string = readString(
      binary,
      ptrSize,
      readPtr,
      readPtr(data.slice(16 + ptrSize, 16 + 2 * ptrSize)),
    );

    // This verifies that what we got are actually go modules
    // First 16 bytes are unicodes as last 16
    // Mirrors go version source code
    if (mod.length >= 33 && mod[mod.length - 17] === "\n") {
      return { goVersion: version, modInfo: mod.slice(16, mod.length - 16) };
    } else {
      throw Error("binary is not built with go module support");
    }
  }
}

function decodeString(data: Buffer): [string, Buffer] {
  const num = varint.decode(data);
  const size = varint.decode.bytes;
  if (size <= 0 || num >= data.length - size) {
    return ["", Buffer.from([])];
  }
  const res = data.subarray(size, num + size);
  const rest = data.subarray(num + size);
  return [res.toString("binary"), rest];
}

// Source
// https://github.com/golang/go/blob/46f99ce7ea97d11b0a1a079da8dda0f51df2a2d2/src/cmd/go/internal/version/exe.go#L105
/**
 * Find start of section that contains module version data
 * @param binary
 */
function dataStart(binary: Elf): number {
  for (const section of binary.body.sections) {
    if (section.name === ".go.buildinfo") {
      return section.addr;
    }
  }

  for (const program of binary.body.programs) {
    if (program.type === "load" && program.flags.w === true) {
      return program.vaddr;
    }
  }

  return 0;
}

// Source
// https://github.com/golang/go/blob/46f99ce7ea97d11b0a1a079da8dda0f51df2a2d2/src/cmd/go/internal/version/exe.go#L87
/**
 * Read at most `size` of bytes from `program` that contains byte at `addr`
 * @param programs
 * @param addr
 * @param size
 */
function readData(
  programs: ElfProgram[],
  addr: number,
  size: number,
): Buffer | undefined {
  for (const program of programs) {
    const vaddr = program.vaddr;
    const filesz = program.filesz;
    if (vaddr <= addr && addr <= vaddr + filesz - 1) {
      let n = vaddr + filesz - addr;

      if (n > size) {
        n = size;
      }

      const from = addr - vaddr; // offset from the beginning of the program

      return program.data.slice(from, from + n);
    }
  }

  return undefined;
}

type ReadPtrFunc = (Buffer) => number;

// Source
// https://github.com/golang/go/blob/46f99ce7ea97d11b0a1a079da8dda0f51df2a2d2/src/cmd/go/internal/version/version.go#L189
/**
 * Function returns the string at address addr in the executable x
 * @param binaryFile
 * @param ptrSize
 * @param readPtr
 * @param addr
 */
function readString(
  binaryFile: Elf,
  ptrSize: number,
  readPtr: ReadPtrFunc,
  addr: number,
): string {
  const hdr = readData(binaryFile.body.programs, addr, 2 * ptrSize);
  if (!hdr || hdr.length < 2 * ptrSize) {
    return "";
  }

  const dataAddr = readPtr(hdr);
  const dataLen = readPtr(hdr.slice(ptrSize));

  const data = readData(binaryFile.body.programs, dataAddr, dataLen);

  if (!data || data.length < dataLen) {
    return "";
  }

  return data.toString("binary");
}

function isTrimmed(files: string[]): boolean {
  // the easiest way to detect trimmed binaries: the filenames will all be
  // relative.
  // There usually is a `build` line in the Go binary's metadata that denotes
  // whether `trimpath` has been used or not, but there are binaries out there
  // that have trimmed paths without that annotation (for example
  // kyverno@v1.8.1).
  return files.every((file) => !file.startsWith(path.sep));
}

// determinePaths returns the modCachePath and vendorPath for a binary.
// goModCachePath is the path at which the modules are downloaded to. When
// building a Go binary, this is usually either $GOMODCACHE or
// $GOROOT/pkg/mod.
// The vendorPath is the path where vendored files are located, which is usually
// the main module's location + "/vendor".
//
// Binaries built with `-trimpath` will have all paths trimmed away, meaning
// that both returned values will be empty.
export function determinePaths(
  modules: GoModule[],
  files: string[],
): { modCachePath: string; vendorPath: string } {
  const normalizedFiles = files.map((file) => path.normalize(file));
  if (isTrimmed(normalizedFiles)) {
    return { modCachePath: "", vendorPath: "" };
  }

  return {
    modCachePath: determineGoModCachePath(modules, normalizedFiles),
    vendorPath: determineVendorPath(modules, normalizedFiles),
  };
}

function determineVendorPath(modules: GoModule[], files: string[]): string {
  // to determine the vendor path, we search for a file that contains
  // `vendor/<module name>`. This file also contains the "root" of the
  // repository, e.g. `/app/vendor/<module-name>`. This means that the `main`
  // package is located somewhere in `/app/...`.
  // We check for other files in that root to make sure that we really got the
  // right vendor folder, and not just a random folder named `vendor` somewhere.
  for (const [, mod] of Object.entries(modules)) {
    // use path.join so that we will always get linux-style paths even if
    // the plugin runs on Windows. This is necessary because the Go binaries
    // always contain linux-style path separators.
    const vendoredModulePath = path.join("vendor", mod.name) + path.sep;
    const file = files.find((file) => file.includes(vendoredModulePath));
    if (file) {
      // make sure that we find other files in that path not in the vendor
      // folder.
      const mainModulePath = file.split(vendoredModulePath)[0];
      const success = files.find(
        (file) =>
          file.includes(mainModulePath) && !file.includes(vendoredModulePath),
      );
      if (success) {
        return path.join(mainModulePath, "vendor") + path.sep;
      }
    }
  }
  return "";
}

function determineGoModCachePath(modules: GoModule[], files: string[]): string {
  // files in the go mod cache path always contain the module name and version.
  for (const [, mod] of Object.entries(modules)) {
    const file = files.find((file) => file.includes(path.sep + mod.fullName()));
    if (file) {
      return file.split(mod.fullName())[0];
    }
  }
  return "";
}

function trimPrefix(s: string, prefix: string): string {
  if (s.startsWith(prefix)) {
    return s.substring(prefix.length);
  }
  return s;
}
