let Papa: typeof import("papaparse"); /* lazily imported once a file is dropped on */
let XLSX: typeof import("xlsx/xlsx.mini"); /* lazily imported once a file is dropped on */
import { rgb } from "d3-color";
import { NewMetadata, AttrDetails } from "../updateMetadata/updateMetadata.types"

/**
 * ------------------------------------------------------
 * General notes about our handling of metadata TSVs/CSVs
 * ------------------------------------------------------
 *
 * NOTIFICATIONS are dispatched in a somewhat piecemeal fashion (and some
 * simply use the console). One day we will have a sidebar-style logging
 * interface which should improve this.
 *
 * BOOLEAN SCALES are not well handled in auspice. When we improve this the
 * values of the existence scale can be booleans themselves.
 *
 * DATE is a skipped field, but this should be improved
 *
 * SCALE TYPES are always categorical (except for the existence coloring)
 *
 */


interface CsvParseError {
  type: string;
  code: string;
  message: string;
  row?: number;
}

interface CsvMetadata {
  delimiter: string;
  linebreak: string;
  aborted: boolean;
  truncated: boolean;
  fields?: string[];
}

interface ParseResults {
  data: Record<string, string>[];
  errors: CsvParseError[];
  meta: CsvMetadata;
}

interface LatLongKeys {
  latitude: string;
  longitude: string;
}

interface AttrColoring {
  attrName: string;
  fieldName?: string;
  scaleType: AttrDetails['scaleType'];
  strains: AttrDetails['strains'];
  colors?: AttrDetails['colors'];
  colorScaleFieldName: string | undefined;
}

interface Header {
  fields: string[];
  strainKey: string;
  latLongKeys: LatLongKeys | undefined;
  ignoredFields: Set<string>;
}


/**
 * Reads the dropped file and coverts it to the canonical `NewMetadata` structure
 * for merging with redux state. Errors result in a rejected promise. The resolved
 * data structure has not been cross-referenced with redux state, it's simply
 * a representation of the file contents.
 */
export async function handleCsvLikeDroppedFile(file: File, nodeNames: Set<string>): Promise<NewMetadata> {
  const {fileName, csvString} = await readDroppedFile(file);
  const { errors, data, meta } = await parseCsv(csvString);
  if (errors.length) throw new Error(errors.map((e) => e.message).join(", "));
  let { colorings, header } = processHeader(meta.fields);
  processRows(colorings, header, data, nodeNames);
  if (colorings.length === 0) {
    throw new Error("No valid columns")
  }
  // Drop coloring for which no valid strains were found
  colorings = colorings.filter((coloring) => Object.keys(coloring.strains).length)
  if (colorings.length === 0) {
    throw new Error("No matching nodes found in tree")
  }
  addExistenceColoring(colorings, fileName);

  const geographic = header.latLongKeys ?
    processLatLongs(data, colorings, header, `${fileName}_geo`) :
    [];

  return {
    attributes: Object.fromEntries(colorings.map((c) => [
      c.attrName,
      {
        key: c.attrName,
        name: c.attrName,
        scaleType: c.scaleType,
        strains: c.strains,
        colors: c.colors,
      }
    ])),
    geographic,
  }
}

/**
 * A promise-ified version of Papa.parse()
 * A note on encoding here: It will be common that people drop CSVs from microsoft excel
 * in here and, you guessed it, this causes all sorts of problems.
 * https://github.com/mholt/PapaParse/issues/169 suggests adding encoding: "ISO-8859-1"
 * to the config, which may work
 */
async function parseCsv(csvString: string): Promise<ParseResults> {
  if (!Papa) Papa = (await import("papaparse")).default;
  return new Promise((resolve, reject) => {
    Papa.parse(csvString, {
      header: true,
      complete: (results: ParseResults) => {
        resolve(results);
      },
      error: (error: Error) => {
        reject(error);
      },
      encoding: "UTF-8",
      comments: "#",
      delimiter: ",",
      skipEmptyLines: true,
      dynamicTyping: false
    });
  });
}

async function readDroppedFile(file: File): Promise<{fileName: string; csvString: string}> {
  const fileName = file.name;
  const reader = new FileReader();
  return new Promise((resolve, reject) => {
    reader.onload = async (_event: ProgressEvent<FileReader>): Promise<void> => {
      try {
        /* the XLSX library will handle CSV, TSV, Excel etc, converting to a CSV string */
        /* If dropped file is Excel workbook, only reads in the data from the first sheet */
        if (!XLSX) XLSX = (await import("xlsx/xlsx.mini")).default;
        const workbook = XLSX.read(reader.result, { type: 'binary', raw: true });
        const firstSheet = workbook.Sheets[workbook.SheetNames[0]];
        const csvString = XLSX.utils.sheet_to_csv(firstSheet);
        resolve({ fileName, csvString });
      } catch (err) {
        reject(err)
      }
    }
    reader.onerror = reject;
    reader.readAsBinaryString(file);
  })
}


/**
 * Parses the header row of the CSV file to intitialise `colorings` and return info about the header
 */
function processHeader(fields: string[]): {
  colorings: AttrColoring[];
  header: Header;
} {
  const strainKey = fields[0];

  /* There are a number of "special case" columns we currently ignore */
  const fieldsToIgnore = new Set(["name", "div", "vaccine", "labels", "hidden", "mutations", "url", "authors", "accession", "traits", "children"]);
  fieldsToIgnore.add("num_date").add("year").add("month").add("date");
  const latLongFields = new Set(["__latitude", "__longitude", "latitude", "longitude"]);

  const ignoredFields = new Set<string>();

  const colorings: AttrColoring[] = fields.slice(1)
    .map((fieldName): AttrColoring | null => {
      if (fieldsToIgnore.has(fieldName)) {
        ignoredFields.add(fieldName);
        return null;
      }
      if (latLongFields.has(fieldName) || fieldName==='') {
        return null;
      }
      let attrName = fieldName;
      const scaleType = "categorical";
      /* interpret column names using microreact-style syntax */
      if (fieldName.includes("__")) {
        const [prefix, suffix] = fieldName.split("__");
        if (["shape", "colour", "color"].includes(suffix)) {
          // don't track in `ignoredFields` as we don't want a user-facing warning
          return null;
        }
        if (suffix === "autocolour") {
          attrName = prefix; /* MicroReact uses this to colour things, but we do this by default */
        }
      }
      return { attrName, colors: [], fieldName, colorScaleFieldName: undefined, scaleType, strains: {} };
    })
    .filter((x): x is AttrColoring => !!x)
    .map((data) => {
      if (fields.includes(`${data.attrName}__colour`)) {
        data.colorScaleFieldName = `${data.attrName}__colour`;
      } else if (fields.includes(`${data.attrName}__color`)) {
        data.colorScaleFieldName = `${data.attrName}__color`;
      }
      return data;
    });

  /* check for the presence of lat/long fields */
  const latLongKeys: LatLongKeys | undefined = (fields.includes("latitude") && fields.includes("longitude")) ?
    {latitude: "latitude", longitude: "longitude"} :
    (fields.includes("__latitude") && fields.includes("__longitude")) ?
      {latitude: "__latitude", longitude: "__longitude"} :
      undefined;

  const header = { fields, strainKey, latLongKeys, ignoredFields }
  return {colorings, header};
}

/**
 * Adds a (boolean) coloring to show presence of strains in the file
 */
function addExistenceColoring(colorings: AttrColoring[], fileName: string): void {
  colorings.push({
    attrName: fileName,
    scaleType: 'boolean',
    strains: Object.fromEntries(
      colorings
        .flatMap((c) => Array.from(Object.keys(c.strains)))
        .map((s) => [s, { value: `Strains in ${fileName}` }])
    ),
    colors: [],
    colorScaleFieldName: undefined,
  })
}

function processRows(colorings: AttrColoring[], header: Header, data: ParseResults['data'], nodeNames: Set<string>): void {
  for (const attrInfo of colorings) {
    if (!attrInfo.fieldName) {
      console.error("[internal error] fieldName not set")
      continue;
    }
    /* Track per-row colors defined in the CSV. Since all CSV-based color schemes are non-continuous this data type will be ok. */
    const colorsObserved: Record<string, string[]> = {};
    const fieldName = attrInfo.fieldName;
    if (header.ignoredFields.has(fieldName) || fieldName === header.strainKey) continue;
    for (const row of data) {
      const strain = row[header.strainKey];
      if (!nodeNames.has(strain)) continue;
      const value = row[fieldName];
      if (!value) continue; // skip empty strings (Note: values of `0`, `false` etc are all strings so not skipped)
      attrInfo.strains[strain] = { value };
      // Colors are defined per strain, so store them in a list so we can average as needed
      if (attrInfo.colorScaleFieldName) {
        const hex = row[attrInfo.colorScaleFieldName];
        if (hex) {
          if (!colorsObserved[value]) colorsObserved[value] = [];
          colorsObserved[value].push(hex);
        }
      }
    }
    // Turn colors into [value, hex] pairs, averaging multiple reported colors as needed
    const colors: [string, string][] = Object.entries(colorsObserved)
      .map(([value, hexes]) => [value, _averageColor(hexes)] as const)
      .filter((entry): entry is [string, string] => entry[1] !== null);
    if (colors.length) attrInfo.colors = colors;
  }
}

/**
 * Returns the average of a list of hex color values.
 */
function _averageColor(hexes: string[]): string | null {
  if (hexes.length === 0) return null
  const validatedHexes = hexes.filter((h) => h.match(/^#[A-Fa-f0-9]{3}([A-Fa-f0-9]{3})?$/));
  if (validatedHexes.length !== hexes.length) {
    const dropped = Array.from((new Set(hexes)).difference(new Set(validatedHexes)));
    console.warn(`Validation of color hexes dropped these invalid values: ${dropped.join(', ')}`);
  }
  if (validatedHexes.length === 0) return null;
  if (validatedHexes.length === 1) return hexes[0]
  let r=0, g=0, b=0; // same algorithm as `getAverageColorFromNodes`
  validatedHexes.forEach((c) => {
    const tmpRGB = rgb(c);
    r += tmpRGB.r;
    g += tmpRGB.g;
    b += tmpRGB.b;
  });
  const total = validatedHexes.length;
  return rgb(r / total, g / total, b / total).toString();
}


/**
 * Metadata defines lat/longs per-sample which is orthogonal to Nextstrain's approach
 * which associates lat/longs to specific metadata values (e.g. to a specific country).
 * We get around this by creating a new placeholder attribute which represents the unique
 * lat/longs provided here.
 * P.S. Latitude: [-90, 90], Longitude: [-180, 180]
 */
function processLatLongs(
  data: ParseResults['data'],
  colorings: AttrColoring[],
  header: Header,
  attrName: string
): NewMetadata['geographic'] {
  const [latKey, longKey] = [header.latLongKeys.latitude, header.latLongKeys.longitude];
  const coordsStrains = new Map();
  let demeCounter = 0;

  /* Collect groups of strains with identical lat/longs */
  for (const row of data) {
    const strain = row[header.strainKey];
    const [latitude, longitude] = [Number(row[latKey]), Number(row[longKey])];
    if (isNaN(latitude) || isNaN(longitude) || latitude > 90 || latitude < -90 || longitude > 180 || longitude < -180) {
      continue;
    }
    const strKey = String(row[latKey])+String(row[longKey]);
    if (!coordsStrains.has(strKey)) {
      const deme = `deme_${demeCounter++}`; // visible to user as the attr value!
      coordsStrains.set(strKey, {deme, latitude, longitude, strains: new Set<string>()});
    }
    coordsStrains.get(strKey).strains.add(strain);
  }

  /* invert map to link each strain to a dummy value with lat/longs */
  // TODO XXX what about the shape here?
  const newGeoResolution = {key: attrName, demes: {}};
  const attrColoring: AttrColoring = {
    attrName,
    scaleType: 'categorical',
    strains: {},
    colors: [],
    colorScaleFieldName: undefined,
  }
  for (const { deme, latitude, longitude, strains } of coordsStrains.values()) {
    newGeoResolution.demes[deme] = { latitude, longitude };
    for (const strain of strains) {
      attrColoring.strains[strain] = { value: deme }
    }
  }
  colorings.push(attrColoring);
  return [newGeoResolution];
}