import { genotypeColors } from "./globals";
import { defaultEntropyState } from "../reducers/entropy";

/**
 * Object used for user-provided JSON.
 * Stricter than simply `object` which implicitly types values as `any`.
 */
interface UnknownJsonObject {
  [key: string]: unknown
}

type Strand = '+' | '-' // other GFF-valid options are '.' and '?'

/**
 * Specifies the range of the each segment's corresponding position in the genome,
 * or defines the range of the genome (chromosome) itself.
 * Start is always less than or equal to end. 
 * Start is 1-based, End is 1-based closed. I.e. GFF.
 */
type RangeGenome = [number, number]

/**
 * Same as RangeGenome but now relative to the nucleotides which make up the CDS
 * (i.e. after slippage, splicing etc). The first CDS segment's RangeLocal will always
 * start at 1, and the end value (of the last segment) corresponds to the number of nt in the CDS:
 * range_segLast[1] - range_seg1[0] + 1 = 3 * number_of_amino_acids_in_translated_CDS
 */
type RangeLocal = [number, number]

interface ChromosomeMetadata {
  strandsObserved: Set<Strand>
  posStrandStackHeight: number
  negStrandStackHeight: number
}

interface Chromosome {
  name: string
  range: RangeGenome
  genes: Gene[]
  metadata: ChromosomeMetadata
}

interface Gene {
  name: string
  cds: CDS[]
}

interface CDS {
  /** length of the CDS in nucleotides. Will be a multiple of 3 */
  length: number
  segments: CdsSegment[]
  strand: Strand
  color: string
  name: string
  isWrapping: boolean
  displayName?: string
  description?: string
  stackPosition?: number
}

type Phase = 0 | 1 | 2

type Frame = 0 | 1 | 2

interface CdsSegment {
  rangeLocal: RangeLocal
  rangeGenome: RangeGenome

  /** 1-based */
  segmentNumber: number

  /** Indicates where the next codon begins relative to the 5' end of this segment */
  phase: Phase

  /** The frame the codons are in, relative to the 5' end of the genome. It thus takes into account the phase */
  frame: Frame
}

/**
 * This is in flux -- Richard's working on an updated representation for the JSON
 * Here we do our best to massage the JSON annotations block into a hierarchical
 * representation of Genome → Chromosome[] → Gene[] → CDS[] → CDS_Segment[].
 * The intention is for this structure to entirely replace the various other pieces of redux
 * state such as 'annotations', 'geneMap', 'geneLengths', 'genomeAnnotations'.
 *
 * Each key:value entry in the JSON annotation block, where key!=='nuc', is interpreted as
 * a CDS. There is currently no way to encode multiple CDS segments¹. Each CDS name
 * is unique, as JavaScript JSON parsing guarantees the keys to be unique (even if there are
 * duplicates in the JSON).
 *
 * By default, each CDS name (key) is set as the gene name as well, so 1 gene = 1 CDS.
 * We extend the JSON to allow `value.gene` which, if set, can group multiple CDSs into
 * a single gene. We also allow `value.color`, which sets the _gene_ colour (optional).
 *
 * ¹ The exception being a single CDS which wraps around the origin, which we are able
 * to split into two segments here.
 */
export const genomeMap = (annotations: UnknownJsonObject): Chromosome[] => {

  const nucAnnotation = Object.entries(annotations)
    .filter(([name,]) => name==='nuc')
    .map(([, annotation]) => annotation)[0];

  if (!nucAnnotation) {
    throw new Error("Genome annotation missing 'nuc' definition");
  }
  if (typeof nucAnnotation !== 'object') {
    throw new Error("Genome annotation for 'nuc' is not a JSON object.");
  }
  if (!('start' in nucAnnotation) || !('end' in nucAnnotation)) {
    throw new Error("Genome annotation for 'nuc' missing start or end");
  }
  if (typeof nucAnnotation.start !== 'number' || typeof nucAnnotation.end !== 'number') {
    throw new Error("Genome annotation for 'nuc.start' or 'nuc.end' is not a number.");
  }
  if ('strand' in nucAnnotation && nucAnnotation.strand === '-') {
    throw new Error("Auspice can only display genomes represented as positive strand." +
      "Note that -ve strand RNA viruses are typically annotated as 5' → 3'.");
  }

  const rangeGenome: RangeGenome =  [nucAnnotation.start, nucAnnotation.end];


  /* Group by genes -- most JSONs will not include this information, so it'll essentially be
  one CDS per gene, but that's just fine! */
  const annotationsPerGene: Record<string,Record<string, UnknownJsonObject>> = {};
  Object.entries(annotations)
    .filter(([name,]) => name!=='nuc')
    .map(([annotationKey, annotation]) => {

      if (typeof annotation !== 'object') {
        throw new Error(`Genome annotation for '${annotationKey}' is not a JSON object.`);
      }

      let geneName = annotationKey;

      if ('gene' in annotation) {
        if (typeof annotation.gene !== 'string') {
          throw new Error(`Genome annotation '${annotationKey}.gene' is not a string.`);
        }
        geneName = annotation.gene;
      }

      if (!(geneName in annotationsPerGene)) annotationsPerGene[geneName] = {};

      /* Assertion is safe: see docstring of UnknownJsonObject
       */
      // eslint-disable-next-line @typescript-eslint/consistent-type-assertions
      annotationsPerGene[geneName][annotationKey] = annotation as UnknownJsonObject;
    })

  const nextColor = nextColorGenerator();

  const strandsObserved: Set<Strand> = new Set();

  const genes = Object.entries(annotationsPerGene)
    .map(([geneName, cdsAnnotations]) => {
      const gene: Gene = {
        name: geneName,
        cds: []
      }
      const defaultColor = nextColor.next().value; // default colours are per-gene (not per-CDS)

      gene.cds = Object.entries(cdsAnnotations)
        .map(([cdsName, annotation]) => cdsFromAnnotation(cdsName, annotation, rangeGenome, defaultColor))
        .filter((cds) => cds.name!=='__INVALID__');
      gene.cds.forEach((cds) => strandsObserved.add(cds.strand));
      return gene;
    })

  const metadata: ChromosomeMetadata = {
    strandsObserved,
    posStrandStackHeight: calculateStackPosition(genes, '+'),
    negStrandStackHeight: calculateStackPosition(genes, '-'),
  }

  const chromosome: Chromosome = {
    name: 'source',
    range: rangeGenome,
    genes,
    metadata
  }
  return [chromosome];
}

export const entropyCreateState = (genomeAnnotations: UnknownJsonObject): unknown => {
  if (genomeAnnotations) {
    try {
      return {
        showCounts: false,
        loaded: true,
        genomeMap: genomeMap(genomeAnnotations)
      };
    } catch (e) {
      if (e instanceof Error) console.error(e.message);
      console.error("Genotype colorings and the entropy panel will not be available.")
      // fallthrough
    }
  }
  return defaultEntropyState();
};


function validColor(color: string | undefined | unknown): false | string {
  if (typeof color !== "string") return false;
  return color; // TODO XXX
}

function* nextColorGenerator(): Generator<string> {
  let i=0;
  while (true) {
    yield genotypeColors[i++];
    if (i===genotypeColors.length) i=0;
  }
}

/**
 * Returns a CDS object parsed from the provided JsonAnnotation block
 */
function cdsFromAnnotation(
  cdsName: string,
  annotation: UnknownJsonObject,
  rangeGenome: RangeGenome,
  defaultColor: string | void,
): CDS {
  const invalidCds: CDS = {
    name: '__INVALID__',
    length: 0,
    segments: [],
    strand: '+',
    isWrapping: false,
    color: '#000',
  }
  const strand = annotation.strand;
  if (!(strand==='+' || strand==='-')) {
    /** GFF allows for strands '?' (features whose strandedness is relevant, but unknown) and '.' (features that are not stranded),
     * which are represented by augur as '?' and null, respectively. (null comes from `None` in python.)
     * In both cases it's not a good idea to make an assumption of strandedness, or to assume it's even a CDS. */
    console.error(`[Genome annotation]  ${cdsName} has strand ` + 
      (annotation.strand !== undefined ? annotation.strand : '(missing)') +
      ". This CDS will be ignored.");
    return invalidCds;
  }
  const positive = strand==='+';
  
  let length = 0;  // rangeLocal length
  const segments: CdsSegment[] = [];
  if (annotation.start && annotation.end) {

    if (typeof annotation.start !== 'number' || typeof annotation.end !== 'number') {
      console.error(`[Genome annotation] ${cdsName} start (${annotation.start}) and/or end (${annotation.end}) is not a number.`);
      return invalidCds;
    }

    /* The simplest case is where a JSON annotation block defines a
    contiguous CDS, however it may be a wrapping CDS (i.e. cds end > genome
    end */
    if (annotation.end <= rangeGenome[1]) {
      length = annotation.end-annotation.start+1;    
      segments.push({
        segmentNumber: 1,
        rangeLocal: [1, length],
        rangeGenome: [annotation.start, annotation.end],
        phase: 0,
        frame: _frame(annotation.start, annotation.end, 0, rangeGenome[1], positive),
      })
    } else {
      /* We turn this into the equivalent JsonSegments to minimise code duplication */
      annotation.segments = [
        {start: annotation.start, end: rangeGenome[1]},
        {start: 1, end: annotation.end-rangeGenome[1]}
      ]
      // TypeScript is unable to infer that annotation.segments is an array,
      // hence the explicit type guard.
      if (Array.isArray(annotation.segments)){
        /* -ve strand segments are 3' -> 5', so segment[0] is at the start of the genome */
        if (!positive) annotation.segments.reverse();
      }
    }
  }

  if (annotation.segments && Array.isArray(annotation.segments)) {
    if (segments.length) { // indicates we've already created one from start/stop coords
      console.error(`[Genome annotation] ${cdsName} defines both start/stop and segments, but they are mutually exclusive.`);
      return invalidCds;
    }
    let previousRangeLocalEnd = 0;
    let segmentNumber = 1;
    for (const segment of annotation.segments) {
      /* The segments, as defined in the JSON, must be ordered according to the order the appear in the CDS.
      For -ve strand that's 3' to 5'. The rangeGenome within each segment is always 5' to 3'. */
      const segmentLength = segment.end - segment.start + 1; // in nucleotides
      /* phase is the number of nucs we need to add to the so-far-observed length to make it mod 3 */
      const phase: Phase = length%3===0 ? 0 : (length%3===1 ? 2 : 1);

      const s: CdsSegment = {
        segmentNumber: segmentNumber++,
        rangeLocal: [previousRangeLocalEnd+1, previousRangeLocalEnd+segmentLength],
        rangeGenome: [segment.start, segment.end],
        phase,
        frame: _frame(segment.start, segment.end, phase, rangeGenome[1], positive)
      };
      segments.push(s);
      length += segmentLength;
      previousRangeLocalEnd += segmentLength;

    }
  }
  if (!segments.length) {
    console.error(`[Genome annotation] ${cdsName} requires either start+end or segments to be defined`);
    return invalidCds;
  }

  if (length%3) {
    console.error(`[Genome annotation] ${cdsName} has length ${length} which is not a multiple of 3`);
    return invalidCds; // skip parsing of this CDS's annotation block
  }

  const cds: CDS = {
    name: cdsName,
    length,
    segments,
    strand,
    isWrapping: _isCdsWrapping(strand, segments),
    color: validColor(annotation.color) || defaultColor || '#000',
  }
  if (typeof annotation.display_name === 'string') {
    cds.displayName = annotation.display_name;
  }
  if (typeof annotation.description === 'string') {
    cds.description = annotation.description;
  }
  return cds
}

/**
 * Calculates the (open reading) frame the provided segment is in.
 * For +ve strand this is calculated 5'->3', for -ve strand it's 3'->5'.
 * The frame is calculated once the CDS is back in phase.
 */
function _frame(
  /** 1 based, rangeGenome[0] of the segment */
  start: number,

  /**
   * 1 based, rangeGenome[1] of the segment.
   * start < end always
   */
  end: number,

  phase: Phase,

  /** 1 based */
  genomeLength: number,
  positiveStrand: boolean,
): Frame {
  /* TypeScript cannot infer the exact range of values from a modulo operation,
   * so it is manually provided.
   */
  // eslint-disable-next-line @typescript-eslint/consistent-type-assertions
  return (positiveStrand ?
    (start+phase-1)%3 :
    Math.abs((end-phase-genomeLength)%3)) as 0 | 1 | 2;
}

/**
 * Given a list of genes (each with CDSs), we want to calculate and assign each
 * CDS a "stack position" such that each CDS can be plotted with no overlaps.
 * All segments of a given CDS will have the same stack position. (Stack here
 * refers to this being a stacking problem.) The stack position starts at 1.
 * Returns the maximum position observed.
 */
function calculateStackPosition(
  genes: Gene[],
  strand: Strand,
): number {
  /* List of CDSs, sorted by their earliest occurrence in the genome (for any segment) */
  const cdss = genes
    .reduce((acc: CDS[], gene) => [...acc, ...gene.cds], [])
    .filter((cds) => cds.strand===strand)
    .sort((a, b) =>
      Math.min(...a.segments.map((s) => s.rangeGenome[0])) < Math.min(...b.segments.map((s) => s.rangeGenome[0])) ?
        -1 : 1
    );
  let stack: CDS[] = []; // current CDSs in stack
  for (const newCds of cdss) {
    /* remove any CDS from the stack which has ended (completely) before this one starts */
    const newMinStart = Math.min(...newCds.segments.map((s) => s.rangeGenome[0]));
    stack = stack.filter((cds) =>
     !(Math.max(...cds.segments.map((s) => s.rangeGenome[1])) < newMinStart)
    );
    // console.log("\nstacK:", stack.map((cds) => cds.name).join(", "));
    // console.log("\tconsideing", newCds.name)
    /* If there are any empty slots in the current stack, take the lowest! */
    const existingY = stack.map((cds) => cds.stackPosition || 0).sort();
    const empty = _emptySlots(existingY);
    if (empty) {
      // console.log("\t\ttaking empty slot", empty)
      newCds.stackPosition = empty;
      stack.push(newCds);
      continue;
    }
    /* If any CDS in the stack has a single space (i.e. between 2 segments) into which the entire
    new CDS (i.e. all segments of newCds) can fit into, then we can re-use that position */
    const reuseablePosition = _fitCdssTogether(stack, newCds);
    if (reuseablePosition) {
      // console.log("\t\treusing position", reuseablePosition)
      newCds.stackPosition = reuseablePosition;
      stack.push(newCds);
      continue;
    }
    /* fallthrough: use a higher position! */
    newCds.stackPosition = (existingY[existingY.length-1] || 0) + 1;
    // console.log("\t\tAdding to the top!", newCds.stackPosition)
    stack.push(newCds);
  }

  return Math.max(...cdss.map((cds) => cds.stackPosition || 0));
}

/**
 * Given an array of sorted integers, if there are any spaces (starting with 1)
 * then return the value which can fill that space. Returns 0 if no spaces.
 */
function _emptySlots(values: number[]): number {
  if ((values[0] || 0) > 1) return 1;
  for (let i=1; i<values.length; i++) {
    /* intermediate variables because of https://github.com/microsoft/TypeScript/issues/46253 */
    const [a, b] = [values[i-1], values[i]];
    if (a && b && b-a>1) return a+1;
  }
  return 0;
}

/**
 * If the newCds completely (i.e. all of its segments) fits inside a single
 * between-segment space of an existing segment, then return the stackPosition
 * of that existing CDS. Otherwise return 0;
 */
function _fitCdssTogether(
  existing: CDS[],
  newCds: CDS,
): number {
  const a = Math.min(...newCds.segments.map((s) => s.rangeGenome[0]));
  const b = Math.max(...newCds.segments.map((s) => s.rangeGenome[1]));
  for (const cds of existing) {
    if (cds.segments.length===1) continue;
    const segments = [...cds.segments]
    segments.sort((a, b) => a.rangeGenome[0]<b.rangeGenome[1] ? -1 : 1)
    for (let i = 0; i<segments.length-1; i++) {
      const end = segments[i]?.rangeGenome[1] || 0;
      const nextStart = segments[i+1]?.rangeGenome[0] || 0;
      const stackPosition = cds.stackPosition || 0;
      if (end<a && nextStart>b) {
        /* yes - we can fit into the same position as this cds, but check if
        another CDS in the stack is occupying this space first! */
        let spaceTaken = false;
        existing.forEach((el) => {
          if (el.stackPosition!==stackPosition) return; // only consider same row
          if (spaceTaken) return; // saves time
          el.segments.forEach((s) => {
            if (s.rangeGenome[1]>=a && s.rangeGenome[0]<=b) {
              spaceTaken = true
            }
          })
        })
        if (!spaceTaken) {
          return stackPosition;
        }
      }
    }
  }
  return 0;
}


/* Does a CDS wrap the origin? */
function _isCdsWrapping(
  strand: Strand,
  segments: CdsSegment[],
): boolean {
  const positive = strand==='+';
  // segments ordered to guarantee rangeLocal will always be greater (than the previous segment)
  let prevSegment: CdsSegment;
  for (const segment of segments) {
    if (prevSegment) {
      if (positive && prevSegment.rangeGenome[0] > segment.rangeGenome[0]) {
        return true;
      }
      if (!positive && prevSegment.rangeGenome[0] < segment.rangeGenome[0]) {
        return true;
      }
    }
    prevSegment = segment;
  }
  return false; // fallthrough
}
