import type { TranscribeOptions, TranscribeResult, VadOptions } from '../index'
import type { WavFileWriterFs } from '../utils/WavFileWriter'

// === Audio Stream Interfaces ===

export interface AudioStreamData {
  data: Uint8Array
  sampleRate: number
  channels: number
  timestamp: number
}

export interface AudioStreamConfig {
  sampleRate?: number
  channels?: number
  bitsPerSample?: number
  bufferSize?: number
  audioSource?: number
}

export interface AudioStreamInterface {
  initialize(config: AudioStreamConfig): Promise<void>
  start(): Promise<void>
  stop(): Promise<void>
  isRecording(): boolean
  onData(callback: (data: AudioStreamData) => void): void
  onError(callback: (error: string) => void): void
  onStatusChange(callback: (isRecording: boolean) => void): void
  onEnd?(callback: () => void): void
  release(): Promise<void>
}

// === Enhanced VAD Options ===

// Pre-defined VAD configurations for different use cases
/**
 * VAD Presets Overview:
 *
 *                            VAD Presets
 *                         /      |      \
 *                Conservative  Default  Sensitive
 *                /        |        |        \
 *        conservative  very-conservative  sensitive  very-sensitive
 *        (0.7 thresh)   (0.8 thresh)    (0.3 thresh) (0.2 thresh)
 *        500ms min      750ms min       100ms min    100ms min
 *        Clear speech   Very clear      Quiet env    Catches whispers
 *
 *                         Specialized Presets
 *                      /        |        \
 *                continuous   meeting    noisy
 *                (60s max)    (45s max)  (0.75 thresh)
 *                Lectures     Multi-spk   Strict for noise
 *
 * Key Parameters:
 * - threshold: 0.0-1.0 (lower = more sensitive)
 * - minSpeechDurationMs: Min duration to consider speech
 * - minSilenceDurationMs: Min silence before ending speech
 * - maxSpeechDurationS: Max continuous speech duration
 * - speechPadMs: Padding around detected speech
 * - samplesOverlap: Analysis window overlap (0.0-1.0)
 */
export const VAD_PRESETS = {
  // Default - balanced performance
  default: {
    threshold: 0.5,
    minSpeechDurationMs: 250,
    minSilenceDurationMs: 100,
    maxSpeechDurationS: 30,
    speechPadMs: 30,
    samplesOverlap: 0.1,
  },

  // Sensitive - good for quiet environments
  sensitive: {
    threshold: 0.3,
    minSpeechDurationMs: 100,
    minSilenceDurationMs: 50,
    maxSpeechDurationS: 15,
    speechPadMs: 50,
    samplesOverlap: 0.2,
  },

  // Very sensitive - catches even quiet speech
  'very-sensitive': {
    threshold: 0.2,
    minSpeechDurationMs: 100,
    minSilenceDurationMs: 50,
    maxSpeechDurationS: 15,
    speechPadMs: 100,
    samplesOverlap: 0.3,
  },

  // Conservative - avoids false positives
  conservative: {
    threshold: 0.7,
    minSpeechDurationMs: 500,
    minSilenceDurationMs: 200,
    maxSpeechDurationS: 25,
    speechPadMs: 20,
    samplesOverlap: 0.05,
  },

  // Very conservative - only clear speech
  'very-conservative': {
    threshold: 0.8,
    minSpeechDurationMs: 750,
    minSilenceDurationMs: 300,
    maxSpeechDurationS: 20,
    speechPadMs: 10,
    samplesOverlap: 0.05,
  },

  // Continuous speech - for presentations/lectures
  continuous: {
    threshold: 0.4,
    minSpeechDurationMs: 200,
    minSilenceDurationMs: 300,
    maxSpeechDurationS: 60, // Longer segments
    speechPadMs: 50,
    samplesOverlap: 0.15,
  },

  // Meeting mode - handles multiple speakers
  meeting: {
    threshold: 0.45,
    minSpeechDurationMs: 300,
    minSilenceDurationMs: 150,
    maxSpeechDurationS: 45,
    speechPadMs: 75,
    samplesOverlap: 0.2,
  },

  // Noisy environment - more strict thresholds
  noisy: {
    threshold: 0.75,
    minSpeechDurationMs: 400,
    minSilenceDurationMs: 100,
    maxSpeechDurationS: 25,
    speechPadMs: 40,
    samplesOverlap: 0.1,
  },
}

export interface RealtimeVadEvent {
  type: 'speech_start' | 'speech_end' | 'speech_continue' | 'silence'
  timestamp: number
  lastSpeechDetectedTime: number
  confidence: number
  duration: number
  sliceIndex: number

  // Additional context
  analysis?: {
    averageAmplitude: number
    peakAmplitude: number
    spectralCentroid?: number
    zeroCrossingRate?: number
  }

  // Adaptive threshold info
  currentThreshold?: number
  environmentNoise?: number
}

export interface RealtimeTranscribeEvent {
  type: 'start' | 'transcribe' | 'end' | 'error'
  sliceIndex: number
  data?: TranscribeResult
  isCapturing: boolean
  processTime: number
  recordingTime: number
  memoryUsage?: {
    slicesInMemory: number
    totalSamples: number
    estimatedMB: number
  }
  vadEvent?: RealtimeVadEvent
}

export interface RealtimeOptions {
  // Audio settings
  audioSliceSec?: number // default: 25
  audioMinSec?: number // default: 1
  maxSlicesInMemory?: number // default: 3

  // Transcription settings
  transcribeOptions?: TranscribeOptions

  // Prompt settings
  initialPrompt?: string // Initial prompt to use for transcription
  promptPreviousSlices?: boolean // Add transcription results from previous slices as prompt (default: true)

  // File settings (Only used if fs dependency is provided)
  audioOutputPath?: string

  // Audio stream configuration
  audioStreamConfig?: AudioStreamConfig

  // Logger settings
  logger?: (message: string) => void // default: noop - custom logger function

  // Realtime transcription settings
  realtimeProcessingPauseMs?: number // default: 200 - interval between realtime updates
  initRealtimeAfterMs?: number // default: 200 - wait before first realtime transcription
}

export interface AudioSlice {
  index: number
  data: Uint8Array
  sampleCount: number
  startTime: number
  endTime: number
  isProcessed: boolean
  isReleased: boolean
}

export interface AudioSliceNoData extends Omit<AudioSlice, 'data'> { }

export interface MemoryUsage {
  slicesInMemory: number
  totalSamples: number
  estimatedMB: number
}

export interface RealtimeStatsEvent {
  timestamp: number
  type:
  | 'slice_processed'
  | 'vad_change'
  | 'memory_change'
  | 'status_change'
  data: {
    isActive: boolean
    isTranscribing: boolean
    vadEnabled: boolean
    audioStats: any
    vadStats: any
    sliceStats: any
  }
}

export interface RealtimeTranscriberCallbacks {
  onBeginTranscribe?: (sliceInfo: {
    audioData: Uint8Array
    sliceIndex: number
    duration: number
    vadEvent?: RealtimeVadEvent
  }) => Promise<boolean>
  onTranscribe?: (event: RealtimeTranscribeEvent) => void
  onBeginVad?: (sliceInfo: {
    audioData: Uint8Array
    sliceIndex: number
    duration: number
  }) => Promise<boolean>
  onVad?: (event: RealtimeVadEvent) => void
  onError?: (error: string) => void
  onStatusChange?: (isActive: boolean) => void
  onStatsUpdate?: (event: RealtimeStatsEvent) => void
  onSliceTranscriptionStabilized?: (text: string) => void
}

// === Context Interfaces ===

export type WhisperContextLike = {
  transcribeData: (
    data: ArrayBuffer,
    options: TranscribeOptions,
  ) => {
    stop: () => Promise<void>
    promise: Promise<TranscribeResult>
  }
}

// VAD context interface
export type WhisperVadContextLike = {
  detectSpeechData: (
    data: ArrayBuffer,
    options: VadOptions,
  ) => Promise<Array<{ t0: number; t1: number }>>
}

export interface RealtimeVadContextLike {
  // Push audio data to the VAD context
  processAudio(data: Uint8Array): void
  // Callback for when speech is detected
  onSpeechStart: (callback: (confidence: number, data: Uint8Array) => void) => void
  // Callback for when speech is detected and continues
  onSpeechContinue: (callback: (confidence: number, data: Uint8Array) => void) => void
  // Callback for when speech ends
  onSpeechEnd: (callback: (confidence: number) => void) => void
  // Callback for when VAD encounters an error
  onError: (callback: (error: string) => void) => void
  // Update VAD options
  updateOptions(options: Partial<VadOptions>): void
  // Force flush remaining audio data in the VAD context
  flush(): Promise<void>
  // Reset the VAD context
  reset(): Promise<void>
}

export interface RealtimeTranscriberDependencies {
  whisperContext: WhisperContextLike
  vadContext?: RealtimeVadContextLike
  audioStream: AudioStreamInterface
  fs?: WavFileWriterFs
}
