import dlBin_ from 'datalib/src/bins/bins';
import {inferAll} from 'datalib/src/import/type';
import {summary} from 'datalib/src/stats';
import {autoMaxBins} from 'vega-lite/build/src/bin';
import {Channel} from 'vega-lite/build/src/channel';
import {containsTimeUnit, convert, TimeUnit, TIMEUNIT_PARTS} from 'vega-lite/build/src/timeunit';
import * as TYPE from 'vega-lite/build/src/type';
import {DEFAULT_QUERY_CONFIG, QueryConfig} from './config';
import {BinQuery, EncodingQuery, FieldQuery, isAutoCountQuery} from './query/encoding';
import {ExpandedType} from './query/expandedtype';
import {cmp, duplicate, extend, keys} from './util';

const dlBin = dlBin_;

/**
 * Table Schema Field Descriptor interface
 * see: https://specs.frictionlessdata.io/table-schema/
 */
export interface TableSchemaFieldDescriptor {
  /* name of field **/
  name: string;

  /* A nicer human readable label or title for the field **/
  title?: string;

  /* number, integer, string, datetime  */
  type: PrimitiveType;

  /* A string specifying a format */
  format?: string;

  /* A description for the field */
  description?: string;
}

/**
 * Field Schema
 */
export interface FieldSchema extends TableSchemaFieldDescriptor {
  vlType?: ExpandedType;

  index?: number;
  // Need to keep original index for re-exporting TableSchema
  originalIndex?: number;

  stats: DLFieldProfile;
  binStats?: {[maxbins: string]: DLFieldProfile};
  timeStats?: {[timeUnit: string]: DLFieldProfile};

  // array of valid input values (fields)
  ordinalDomain?: string[];
}

/**
 * Table Schema
 * see: https://specs.frictionlessdata.io/table-schema/
 */
export interface TableSchema<F extends TableSchemaFieldDescriptor> {
  fields: F[];
  missingValues?: string[];
  primaryKey?: string | string[];
  foreignKeys?: object[];
}

/**
 * Build a Schema object.
 *
 * @param data - a set of raw data in the same format that Vega-Lite / Vega takes
 * Basically, it's an array in the form of:
 *
 * [
 *   {a: 1, b:2},
 *   {a: 2, b:3},
 *   ...
 * ]
 *
 * @return a Schema object
 */
export function build(
  data: any,
  opt: QueryConfig = {},
  tableSchema: TableSchema<TableSchemaFieldDescriptor> = {fields: []}
): Schema {
  opt = extend({}, DEFAULT_QUERY_CONFIG, opt);

  // create profiles for each variable
  let summaries: DLFieldProfile[] = summary(data);
  let types = inferAll(data); // inferAll does stronger type inference than summary

  let tableSchemaFieldIndex = tableSchema.fields.reduce((m, field: TableSchemaFieldDescriptor) => {
    m[field.name] = field;
    return m;
  }, {});

  let fieldSchemas: FieldSchema[] = summaries.map(function(fieldProfile, index) {
    const name: string = fieldProfile.field;
    // In Table schema, 'date' doesn't include time so use 'datetime'
    const type: PrimitiveType = types[name] === 'date' ? PrimitiveType.DATETIME : (types[name] as any);
    let distinct: number = fieldProfile.distinct;
    let vlType: ExpandedType;

    if (type === PrimitiveType.NUMBER) {
      vlType = TYPE.QUANTITATIVE;
    } else if (type === PrimitiveType.INTEGER) {
      // use ordinal or nominal when cardinality of integer type is relatively low and the distinct values are less than an amount specified in options
      if (distinct < opt.numberNominalLimit && distinct / fieldProfile.count < opt.numberNominalProportion) {
        vlType = TYPE.NOMINAL;
      } else {
        vlType = TYPE.QUANTITATIVE;
      }
    } else if (type === PrimitiveType.DATETIME) {
      vlType = TYPE.TEMPORAL;
      // need to get correct min/max of date data because datalib's summary method does not
      // calculate this correctly for date types.
      fieldProfile.min = new Date(data[0][name]);
      fieldProfile.max = new Date(data[0][name]);
      for (const dataEntry of data) {
        const time = new Date(dataEntry[name]).getTime();
        if (time < (fieldProfile.min as Date).getTime()) {
          fieldProfile.min = new Date(time);
        }
        if (time > (fieldProfile.max as Date).getTime()) {
          fieldProfile.max = new Date(time);
        }
      }
    } else {
      vlType = TYPE.NOMINAL;
    }

    if (
      vlType === TYPE.NOMINAL &&
      distinct / fieldProfile.count > opt.minPercentUniqueForKey &&
      fieldProfile.count > opt.minCardinalityForKey
    ) {
      vlType = ExpandedType.KEY;
    }

    let fieldSchema = {
      name: name,
      // Need to keep original index for re-exporting TableSchema
      originalIndex: index,
      vlType: vlType,
      type: type,
      stats: fieldProfile,
      timeStats: {} as {[timeUnit: string]: DLFieldProfile},
      binStats: {} as {[key: string]: DLFieldProfile}
    };

    // extend field schema with table schema field - if present
    const orgFieldSchema = tableSchemaFieldIndex[fieldSchema.name];
    fieldSchema = extend(fieldSchema, orgFieldSchema);

    return fieldSchema;
  });

  // calculate preset bins for quantitative and temporal data
  for (let fieldSchema of fieldSchemas) {
    if (fieldSchema.vlType === TYPE.QUANTITATIVE) {
      for (let maxbins of opt.enum.binProps.maxbins) {
        fieldSchema.binStats[maxbins] = binSummary(maxbins, fieldSchema.stats);
      }
    } else if (fieldSchema.vlType === TYPE.TEMPORAL) {
      for (let unit of opt.enum.timeUnit) {
        if (unit !== undefined) {
          fieldSchema.timeStats[unit] = timeSummary(unit, fieldSchema.stats);
        }
      }
    }
  }

  const derivedTableSchema: TableSchema<FieldSchema> = {
    ...tableSchema,
    fields: fieldSchemas
  };

  return new Schema(derivedTableSchema);
}

// order the field schema when we construct a new Schema
// this orders the fields in the UI
const order = {
  nominal: 0,
  key: 1,
  ordinal: 2,
  temporal: 3,
  quantitative: 4
};

export class Schema {
  private _tableSchema: TableSchema<FieldSchema>;
  private _fieldSchemaIndex: {[field: string]: FieldSchema};

  constructor(tableSchema: TableSchema<FieldSchema>) {
    this._tableSchema = tableSchema;

    tableSchema.fields.sort(function(a: FieldSchema, b: FieldSchema) {
      // first order by vlType: nominal < temporal < quantitative < ordinal
      if (order[a.vlType] < order[b.vlType]) {
        return -1;
      } else if (order[a.vlType] > order[b.vlType]) {
        return 1;
      } else {
        // then order by field (alphabetically)
        return a.name.localeCompare(b.name);
      }
    });

    // Add index for sorting
    tableSchema.fields.forEach((fieldSchema, index) => (fieldSchema.index = index));

    this._fieldSchemaIndex = tableSchema.fields.reduce((m, fieldSchema: FieldSchema) => {
      m[fieldSchema.name] = fieldSchema;
      return m;
    }, {});
  }

  /** @return a list of the field names (for enumerating). */
  public fieldNames() {
    return this._tableSchema.fields.map(fieldSchema => fieldSchema.name);
  }

  /** @return a list of FieldSchemas */
  public get fieldSchemas() {
    return this._tableSchema.fields;
  }

  public fieldSchema(fieldName: string) {
    return this._fieldSchemaIndex[fieldName];
  }

  public tableSchema() {
    // the fieldschemas are re-arranged
    // but this is not allowed in table schema.
    // so we will re-order based on original index.
    const tableSchema = duplicate(this._tableSchema);
    tableSchema.fields.sort((a, b) => a.originalIndex - b.originalIndex);
    return tableSchema;
  }

  /**
   * @return primitive type of the field if exist, otherwise return null
   */
  public primitiveType(fieldName: string) {
    return this._fieldSchemaIndex[fieldName] ? this._fieldSchemaIndex[fieldName].type : null;
  }

  /**
   * @return vlType of measturement of the field if exist, otherwise return null
   */
  public vlType(fieldName: string) {
    return this._fieldSchemaIndex[fieldName] ? this._fieldSchemaIndex[fieldName].vlType : null;
  }

  /** @return cardinality of the field associated with encQ, null if it doesn't exist.
   *  @param augmentTimeUnitDomain - TimeUnit field domains will not be augmented if explicitly set to false.
   */
  public cardinality(fieldQ: FieldQuery, augmentTimeUnitDomain: boolean = true, excludeInvalid: boolean = false) {
    const fieldSchema = this._fieldSchemaIndex[fieldQ.field as string];
    if (fieldQ.aggregate || (isAutoCountQuery(fieldQ) && fieldQ.autoCount)) {
      return 1;
    } else if (fieldQ.bin) {
      // encQ.bin will either be a boolean or a BinQuery
      let bin: BinQuery;
      if (typeof fieldQ.bin === 'boolean') {
        // autoMaxBins defaults to 10 if channel is Wildcard
        bin = {
          maxbins: autoMaxBins(fieldQ.channel as Channel)
        };
      } else if (fieldQ.bin === '?') {
        bin = {
          enum: [true, false]
        };
      } else {
        bin = fieldQ.bin;
      }
      const maxbins: any = bin.maxbins;
      if (!fieldSchema.binStats[maxbins]) {
        // need to calculate
        fieldSchema.binStats[maxbins] = binSummary(maxbins, fieldSchema.stats);
      }
      // don't need to worry about excludeInvalid here because invalid values don't affect linearly binned field's cardinality
      return fieldSchema.binStats[maxbins].distinct;
    } else if (fieldQ.timeUnit) {
      if (augmentTimeUnitDomain) {
        switch (fieldQ.timeUnit) {
          // TODO: this should not always be the case once Vega-Lite supports turning off domain augmenting (VL issue #1385)
          case TimeUnit.SECONDS:
            return 60;
          case TimeUnit.MINUTES:
            return 60;
          case TimeUnit.HOURS:
            return 24;
          case TimeUnit.DAY:
            return 7;
          case TimeUnit.DATE:
            return 31;
          case TimeUnit.MONTH:
            return 12;
          case TimeUnit.QUARTER:
            return 4;
          case TimeUnit.MILLISECONDS:
            return 1000;
        }
      }
      let unit = fieldQ.timeUnit as string;
      let timeStats = fieldSchema.timeStats;
      // if the cardinality for the timeUnit is not cached, calculate it
      if (!timeStats || !timeStats[unit]) {
        timeStats = {
          ...timeStats,
          [unit]: timeSummary(fieldQ.timeUnit as TimeUnit, fieldSchema.stats)
        };
      }

      if (excludeInvalid) {
        return timeStats[unit].distinct - invalidCount(timeStats[unit].unique, ['Invalid Date', null]);
      } else {
        return timeStats[unit].distinct;
      }
    } else {
      if (fieldSchema) {
        if (excludeInvalid) {
          return fieldSchema.stats.distinct - invalidCount(fieldSchema.stats.unique, [NaN, null]);
        } else {
          return fieldSchema.stats.distinct;
        }
      } else {
        return null;
      }
    }
  }

  /**
   * Given an EncodingQuery with a timeUnit, returns true if the date field
   * has multiple distinct values for all parts of the timeUnit. Returns undefined
   * if the timeUnit is undefined.
   * i.e.
   * ('yearmonth', [Jan 1 2000, Feb 2 2000] returns false)
   * ('yearmonth', [Jan 1 2000, Feb 2 2001] returns true)
   */
  public timeUnitHasVariation(fieldQ: FieldQuery): boolean {
    if (!fieldQ.timeUnit) {
      return;
    }

    // if there is no variation in `date`, there should not be variation in `day`
    if (fieldQ.timeUnit === TimeUnit.DAY) {
      const dateEncQ: EncodingQuery = extend({}, fieldQ, {timeUnit: TimeUnit.DATE});
      if (this.cardinality(dateEncQ, false, true) <= 1) {
        return false;
      }
    }

    let fullTimeUnit = fieldQ.timeUnit;
    for (let timeUnitPart of TIMEUNIT_PARTS) {
      if (containsTimeUnit(fullTimeUnit as TimeUnit, timeUnitPart)) {
        // Create a clone of encQ, but with singleTimeUnit
        const singleUnitEncQ = extend({}, fieldQ, {timeUnit: timeUnitPart});
        if (this.cardinality(singleUnitEncQ, false, true) <= 1) {
          return false;
        }
      }
    }
    return true;
  }

  public domain(fieldQueryParts: {field: string}): any[] {
    // TODO: differentiate for field with bin / timeUnit
    const fieldSchema = this._fieldSchemaIndex[fieldQueryParts.field as string];
    let domain: any[] = keys(fieldSchema.stats.unique);
    if (fieldSchema.vlType === TYPE.QUANTITATIVE) {
      // return [min, max], coerced into number types
      return [+fieldSchema.stats.min, +fieldSchema.stats.max];
    } else if (fieldSchema.type === PrimitiveType.DATETIME) {
      // return [min, max] dates
      return [fieldSchema.stats.min, fieldSchema.stats.max];
    } else if (fieldSchema.type === PrimitiveType.INTEGER || fieldSchema.type === PrimitiveType.NUMBER) {
      // coerce non-quantitative numerical data into number type
      domain = domain.map(x => +x);
      return domain.sort(cmp);
    } else if (fieldSchema.vlType === TYPE.ORDINAL && fieldSchema.ordinalDomain) {
      return fieldSchema.ordinalDomain;
    }

    return domain
      .map(x => {
        // Convert 'null' to null as it is encoded similarly in datalib.
        // This is wrong when it is a string 'null' but that rarely happens.
        return x === 'null' ? null : x;
      })
      .sort(cmp);
  }

  /**
   * @return a Summary corresponding to the field of the given EncodingQuery
   */
  public stats(fieldQ: FieldQuery) {
    // TODO: differentiate for field with bin / timeUnit vs without
    const fieldSchema = this._fieldSchemaIndex[fieldQ.field as string];
    return fieldSchema ? fieldSchema.stats : null;
  }
}

/**
 * @return a summary of the binning scheme determined from the given max number of bins
 */
function binSummary(maxbins: number, summary: DLFieldProfile): DLFieldProfile {
  const bin = dlBin({
    min: summary.min,
    max: summary.max,
    maxbins: maxbins
  });

  // start with summary, pre-binning
  const result = extend({}, summary);
  result.unique = binUnique(bin, summary.unique);
  result.distinct = (bin.stop - bin.start) / bin.step;
  result.min = bin.start;
  result.max = bin.stop;

  return result;
}

/** @return a modified version of the passed summary with unique and distinct set according to the timeunit.
 *  Maps 'null' (string) keys to the null value and invalid dates to 'Invalid Date' in the unique dictionary.
 */
function timeSummary(timeunit: TimeUnit, summary: DLFieldProfile): DLFieldProfile {
  const result = extend({}, summary);

  let unique: {[value: string]: number} = {};
  keys(summary.unique).forEach(function(dateString) {
    // don't convert null value because the Date constructor will actually convert it to a date
    let date: Date = dateString === 'null' ? null : new Date(dateString);
    // at this point, `date` is either the null value, a valid Date object, or "Invalid Date" which is a Date
    let key: string;
    if (date === null) {
      key = null;
    } else if (isNaN(date.getTime())) {
      key = 'Invalid Date';
    } else {
      key = (timeunit === TimeUnit.DAY ? date.getDay() : convert(timeunit, date)).toString();
    }
    unique[key] = (unique[key] || 0) + summary.unique[dateString];
  });

  result.unique = unique;
  result.distinct = keys(unique).length;

  return result;
}

/**
 * @return a new unique object based off of the old unique count and a binning scheme
 */
function binUnique(bin: any, oldUnique: any) {
  const newUnique = {};
  for (let value in oldUnique) {
    let bucket: number;
    if (value === null) {
      bucket = null;
    } else if (isNaN(Number(value))) {
      bucket = NaN;
    } else {
      bucket = bin.value(Number(value)) as number;
    }
    newUnique[bucket] = (newUnique[bucket] || 0) + oldUnique[value];
  }
  return newUnique;
}

/** @return the number of items in list that occur as keys of unique */
function invalidCount(unique: {}, list: any[]) {
  return list.reduce(function(prev, cur) {
    return unique[cur] ? prev + 1 : prev;
  }, 0);
}

export enum PrimitiveType {
  STRING = 'string' as any,
  NUMBER = 'number' as any,
  INTEGER = 'integer' as any,
  BOOLEAN = 'boolean' as any,
  DATETIME = 'datetime' as any
}
