// This Source Code Form is subject to the terms of the Mozilla Public
// License, v. 2.0. If a copy of the MPL was not distributed with this
// file, You can obtain one at http://mozilla.org/MPL/2.0/.

// Copyright (c) 2018 Alexandre Storelli

// Online implementation of the landmark audio fingerprinting algorithm.
// inspired by D. Ellis (2009), "Robust Landmark-Based Audio Fingerprinting"
// http://labrosa.ee.columbia.edu/matlab/fingerprint/
// itself inspired by Wang 2003 paper

// This module exports Codegen, an instance of stream.Transform
// By default, the writable side must be fed with an input signal with the following properties:
// - single channel
// - 16bit PCM
// - 22050 Hz sampling rate
//
// The readable side outputs objects of the form
// { tcodes: [time stamps], hcodes: [fingerprints] }

import FFT from './lib/fft.ts'

interface CodegenOptions {
  verbose: boolean
  samplingRate: number
  bps: number
  mnlm: number
  mppp: number
  nfft: number
  step: number
  dt: number
  hwin: number[]
  maskDecayLog: number
  ifMin: number
  ifMax: number
  windowDf: number
  windowDt: number
  pruningDt: number
  maskDf: number
  eww: number[][]
}

interface CodegenUserOpts {
  verbose?: boolean
  samplingRate?: number
  bps?: number
  mnlm?: number
  mppp?: number
  nfft?: number
  step?: number
  dt?: number
  hwin?: number[]
  maskDecayLog?: number
  ifMin?: number
  ifMax?: number
  windowDf?: number
  windowDt?: number
  pruningDt?: number
  maskDf?: number
  eww?: number[][]
}

const buildOptions = (options: CodegenUserOpts): CodegenOptions => {
  const verbose = options.verbose ?? false

  // sampling rate in Hz. If you change this, you must adapt windowDt and pruningDt below to match your needs
  // set the Nyquist frequency, SAMPLING_RATE/2,
  // so as to match the max frequencies you want to get landmark fingerprints.
  const samplingRate = options.samplingRate ?? 22050

  // bytes per sample, 2 for 16 bit PCM. If you change this, you must change readInt16LE methods in the code.
  const bps = options.bps ?? 2

  // maximum number of local maxima for each spectrum. useful to tune the amount of fingerprints at output
  const mnlm = options.mnlm ?? 5

  // maximum of hashes each peak can lead to. useful to tune the amount of fingerprints at output
  const mppp = options.mppp ?? 3

  // size of the FFT window. As we use real signals, the spectra will have nfft/2 points.
  // Increasing it will give more spectral precision, less temporal precision.
  // It may be good or bad depending on the sounds you want to match,
  // and on whether your input is deformed by EQ or noise.
  const nfft = options.nfft ?? 512

  // 50 % overlap
  // if SAMPLING_RATE is 22050 Hz, this leads to a sampling frequency
  // fs = (SAMPLING_RATE / step) /s = 86/s, or dt = 1/fs = 11,61 ms.
  // It's not really useful to change the overlap ratio.
  const step = options.step ?? (nfft / 2)

  const dt = options.dt ?? (1 / (samplingRate / step))

  const hwin = options.hwin ??
    Array(nfft).fill(null).map((_f, i) => (
      0.5 * (1 - Math.cos(((2 * Math.PI) * i) / (nfft - 1)))
    ))

  // threshold decay factor between frames.
  const maskDecayLog = options.maskDecayLog ?? Math.log(0.995)

  // frequency window to generate landmark pairs, in units of DF = SAMPLING_RATE / NFFT. Values between 0 and NFFT/2
  // you can increase this to avoid having fingerprints for low frequencies
  const ifMin = options.ifMin ?? 0
  // you don't really want to decrease this, better reduce SAMPLING_RATE instead for faster computation.
  const ifMax = options.ifMax ?? nfft / 2

  // we set this to avoid getting fingerprints linking very different frequencies.
  // useful to reduce the amount of fingerprints. this can be maxed at NFFT/2 if you wish.
  const windowDf = options.windowDf ?? 60

  // time window to generate landmark pairs. time in units of dt (see definition above)
  // a little more than 1 sec.
  const windowDt = options.windowDt ?? 96
  // about 250 ms, window to remove previous peaks that are superseded by later ones.
  // tune the pruningDt value to match the effects of maskDecayLog.
  // also, pruningDt controls the latency of the pipeline. higher pruningDt = higher latency
  const pruningDt = options.pruningDt ?? 24

  // prepare the values of exponential masks.
  // mask decay scale in DF units on the frequency axis.
  const maskDf = options.maskDf ?? 3
  // gaussian mask is a polynom when working on the log-spectrum. log(exp()) = Id()
  // MASK_DF is multiplied by Math.sqrt(i+3) to have wider masks at higher frequencies
  // see the visualization out-thr.png for better insight of what is happening
  const eww = options.eww ??
    Array(nfft / 2).fill(null).map((_f, i) => (
      Array(nfft / 2).fill(null).map((_f, j) => (
        -0.5 * Math.pow((j - i) / maskDf / Math.sqrt(i + 3), 2)
      ))
    ))

  return {
    verbose,
    samplingRate,
    bps,
    mnlm,
    mppp,
    nfft,
    step,
    dt,
    hwin,
    maskDecayLog,
    ifMin,
    ifMax,
    windowDf,
    windowDt,
    pruningDt,
    maskDf,
    eww
  }
}

interface Mark {
  t: number
  i: number[]
  v: number[]
}

export interface CodegenBuffer {
  tcodes: number[]
  hcodes: number[]
}

class Codegen {
  options: CodegenOptions
  buffer: Uint8Array
  bufferDelta: number
  stepIndex: number
  marks: Mark[]
  threshold: number[]
  fft: FFT

  constructor (options?: CodegenUserOpts) {
    this.options = buildOptions(options ?? {})

    this.buffer = new Uint8Array(0)
    this.bufferDelta = 0

    this.stepIndex = 0
    this.marks = []
    this.threshold = Array(this.options.nfft).fill(null).map(() => -3)

    this.fft = new FFT(this.options.nfft)
  }

  process (chunk: Uint8Array): CodegenBuffer {
    const {
      verbose,
      bps,
      mnlm,
      mppp,
      nfft,
      step,
      hwin,
      maskDecayLog,
      ifMin,
      ifMax,
      windowDf,
      windowDt,
      pruningDt,
      eww
    } = this.options

    if (verbose) {
      const t = Math.round(this.stepIndex / step).toString()
      const received = chunk.length.toString()
      console.log(`t=${t} received ${received} bytes`)
    }

    const tcodes: number[] = []
    const hcodes: number[] = []

    const concatedBuffer = new Uint8Array(this.buffer.length + chunk.length)
    concatedBuffer.set(this.buffer, 0)
    concatedBuffer.set(chunk, this.buffer.length)
    this.buffer = concatedBuffer

    const bufferView = new DataView(concatedBuffer.buffer)

    while ((this.stepIndex + nfft) * bps < this.buffer.length + this.bufferDelta) {
      const data = new Array(nfft) // window data
      const image = new Array(nfft).fill(0)

      // fill the data, windowed (HWIN) and scaled
      for (let i = 0, limit = nfft; i < limit; i++) {
        const readInt = bufferView.getInt16((this.stepIndex + i) * bps - this.bufferDelta, true)
        data[i] = (hwin[i] * readInt) / Math.pow(2, 8 * bps - 1)
      }
      this.stepIndex += step

      this.fft.forward(data, image) // compute FFT

      // log-normal surface
      for (let i = ifMin; i < ifMax; i += 1) {
        // the lower part of the spectrum is damped,
        // the higher part is boosted, leading to a better peaks detection.
        this.fft.spectrum[i] = Math.abs(this.fft.spectrum[i]) * Math.sqrt(i + 16)
      }

      // positive values of the difference between log spectrum and threshold
      const diff = new Array(nfft / 2)
      for (let i = ifMin; i < ifMax; i += 1) {
        diff[i] = Math.max(Math.log(Math.max(1e-6, this.fft.spectrum[i])) - this.threshold[i], 0)
      }

      // find at most MNLM local maxima in the spectrum at this timestamp.
      const iLocMax = new Array(mnlm)
      const vLocMax = new Array(mnlm)
      for (let i = 0; i < mnlm; i += 1) {
        iLocMax[i] = NaN
        vLocMax[i] = Number.NEGATIVE_INFINITY
      }
      for (let i = ifMin + 1; i < ifMax - 1; i += 1) {
        if (
          diff[i] > diff[i - 1] &&
          diff[i] > diff[i + 1] &&
          this.fft.spectrum[i] > vLocMax[mnlm - 1]
        ) { // if local maximum big enough
          // insert the newly found local maximum in the ordered list of maxima
          for (let j = mnlm - 1; j >= 0; j -= 1) {
            // navigate the table of previously saved maxima
            if (j >= 1 && this.fft.spectrum[i] > vLocMax[j - 1]) continue
            for (let k = mnlm - 1; k >= j + 1; k -= 1) {
              iLocMax[k] = iLocMax[k - 1] // offset the bottom values
              vLocMax[k] = vLocMax[k - 1]
            }
            iLocMax[j] = i
            vLocMax[j] = this.fft.spectrum[i]
            break
          }
        }
      }

      // now that we have the MNLM highest local maxima of the spectrum,
      // update the local maximum threshold so that only major peaks are taken into account.
      for (let i = 0; i < mnlm; i += 1) {
        if (vLocMax[i] > Number.NEGATIVE_INFINITY) {
          for (let j = ifMin; j < ifMax; j += 1) {
            this.threshold[j] = (
              Math.max(this.threshold[j], Math.log(this.fft.spectrum[iLocMax[i]]) + eww[iLocMax[i]][j])
            )
          }
        } else {
          vLocMax.splice(i, mnlm - i) // remove the last elements.
          iLocMax.splice(i, mnlm - i)
          break
        }
      }

      // array that stores local maxima for each time step
      this.marks.push({ t: Math.round(this.stepIndex / step), i: iLocMax, v: vLocMax })

      // remove previous (in time) maxima that would be too close and/or too low.
      const nm = this.marks.length
      const t0 = nm - pruningDt - 1
      for (let i = nm - 1; i >= Math.max(t0 + 1, 0); i -= 1) {
        for (let j = 0; j < this.marks[i].v.length; j += 1) {
          if (
            this.marks[i].i[j] !== 0 &&
            Math.log(this.marks[i].v[j]) < (
              this.threshold[this.marks[i].i[j]] + maskDecayLog * (nm - 1 - i)
            )
          ) {
            this.marks[i].v[j] = Number.NEGATIVE_INFINITY
            this.marks[i].i[j] = Number.NEGATIVE_INFINITY
          }
        }
      }

      // generate hashes for peaks that can no longer be pruned. stepIndex:{f1:f2:deltaindex}
      let nFingersTotal = 0
      if (t0 >= 0) {
        const m = this.marks[t0]

        for (let i = 0; i < m.i.length; i += 1) {
          let nFingers = 0
          let canBreak = false

          for (let j = t0; j >= Math.max(0, t0 - windowDt); j -= 1) {
            if (canBreak) break
            const m2 = this.marks[j]

            for (let k = 0; k < m2.i.length; k += 1) {
              if (canBreak) break
              if (m2.i[k] !== m.i[i] && Math.abs(m2.i[k] - m.i[i]) < windowDf) {
                tcodes.push(m.t)
                hcodes.push(m2.i[k] + (nfft / 2) * (m.i[i] + (nfft / 2) * (t0 - j)))
                nFingers += 1
                nFingersTotal += 1
                if (nFingers >= mppp) canBreak = true
              }
            }
          }
        }
      }
      if (nFingersTotal > 0 && verbose) {
        console.log(`t=${Math.round(this.stepIndex / step)} generated ${nFingersTotal} fingerprints`)
      }

      this.marks.splice(0, t0 + 1 - windowDt)

      // decrease the threshold for the next iteration
      for (let j = 0; j < this.threshold.length; j += 1) {
        this.threshold[j] += maskDecayLog
      }
    }

    if (this.buffer.length > 1000000) {
      const delta = this.buffer.length - 20000
      this.bufferDelta += delta
      this.buffer = this.buffer.slice(delta)
    }

    return { tcodes, hcodes }
  }
}

export default Codegen
