import { ResourceSource } from './common';
import { RnExecutorchError } from '../errors/errorUtils';

/**
 * Union of all built-in Text to Speech model names.
 * @category Types
 */
export type TextToSpeechModelName = 'kokoro-small' | 'kokoro-medium';

/**
 * List all the languages available in TTS models (as lang shorthands)
 * @category Types
 */
export type TextToSpeechLanguage =
  | 'en-us' // American English
  | 'en-gb'; // British English

/**
 * Voice configuration
 *
 * So far in Kokoro, each voice is directly associated with a language.
 * @category Types
 * @property {TextToSpeechLanguage} lang - speaker's language
 * @property {ResourceSource} voiceSource - a source to a binary file with voice embedding
 * @property {KokoroVoiceExtras} [extra] - an optional extra sources or properties related to specific voice
 */
export interface VoiceConfig {
  lang: TextToSpeechLanguage;
  voiceSource: ResourceSource;
  extra?: KokoroVoiceExtras; // ... add more possible types
}

/**
 * Kokoro-specific voice extra props
 * @category Types
 * @property {ResourceSource} taggerSource - source to Kokoro's tagger model binary
 * @property {ResourceSource} lexiconSource - source to Kokoro's lexicon binary
 */
export interface KokoroVoiceExtras {
  taggerSource: ResourceSource;
  lexiconSource: ResourceSource;
}

/**
 * Kokoro model configuration.
 * Only the core Kokoro model sources, as phonemizer sources are included in voice configuration.
 * @category Types
 * @property {TextToSpeechModelName} modelName - model name identifier
 * @property {ResourceSource} durationPredictorSource - source to Kokoro's duration predictor model binary
 * @property {ResourceSource} synthesizerSource - source to Kokoro's synthesizer model binary
 */
export interface KokoroConfig {
  modelName: TextToSpeechModelName;
  durationPredictorSource: ResourceSource;
  synthesizerSource: ResourceSource;
}

/**
 * General Text to Speech module configuration
 * @category Types
 * @property {KokoroConfig} model - a selected T2S model
 * @property {VoiceConfig} voice - a selected speaker's voice
 * @property {KokoroOptions} [options] - a completely optional model-specific configuration
 */
export interface TextToSpeechConfig {
  model: KokoroConfig; // ... add other model types in the future
  voice: VoiceConfig;
}

/**
 * Props for the useTextToSpeech hook.
 * @category Types
 * @augments TextToSpeechConfig
 * @property {boolean} [preventLoad] - Boolean that can prevent automatic model loading (and downloading the data if you load it for the first time) after running the hook.
 */
export interface TextToSpeechProps extends TextToSpeechConfig {
  preventLoad?: boolean;
}

/**
 * Text to Speech module input definition
 * @category Types
 * @property {string} text - a text to be spoken
 * @property {number} [speed] - optional speed argument - the higher it is, the faster the speech becomes
 */
export interface TextToSpeechInput {
  text?: string;
  speed?: number;
}

/**
 * Text to Speech module input for pre-computed phonemes.
 * Use this when you have your own phonemizer (e.g. the Python `phonemizer`
 * library, espeak-ng, or any custom G2P system) and want to bypass the
 * built-in phonemizer pipeline.
 * @category Types
 * @property {string} phonemes - pre-computed IPA phoneme string
 * @property {number} [speed] - optional speed argument - the higher it is, the faster the speech becomes
 */
export interface TextToSpeechPhonemeInput {
  phonemes: string;
  speed?: number;
}

/**
 * Return type for the `useTextToSpeech` hook.
 * Manages the state and operations for Text-to-Speech generation.
 * @category Types
 */
export interface TextToSpeechType {
  /**
   * Contains the error object if the model failed to load or encountered an error during inference.
   */
  error: RnExecutorchError | null;

  /**
   * Indicates whether the Text-to-Speech model is loaded and ready to accept inputs.
   */
  isReady: boolean;

  /**
   * Indicates whether the model is currently generating audio.
   */
  isGenerating: boolean;

  /**
   * Represents the download progress of the model and voice assets as a value between 0 and 1.
   */
  downloadProgress: number;

  /**
   * Runs the model to convert the provided text into speech audio in a single pass.
   * @param input - The `TextToSpeechInput` object containing the `text` to synthesize and optional `speed`.
   * @returns A Promise that resolves with the generated audio data (typically a `Float32Array`).
   * @throws {RnExecutorchError} If the model is not loaded or is currently generating.
   */
  forward: (input: TextToSpeechInput) => Promise<Float32Array>;

  /**
   * Synthesizes pre-computed phonemes into speech audio in a single pass.
   * Bypasses the built-in phonemizer, allowing use of external G2P systems.
   * @param input - The `TextToSpeechPhonemeInput` object containing pre-computed `phonemes` and optional `speed`.
   * @returns A Promise that resolves with the generated audio data.
   * @throws {RnExecutorchError} If the model is not loaded or is currently generating.
   */
  forwardFromPhonemes: (
    input: TextToSpeechPhonemeInput
  ) => Promise<Float32Array>;

  /**
   * Streams the generated audio data incrementally.
   * This is optimal for real-time playback, allowing audio to start playing before the full text is synthesized.
   * @param input - The `TextToSpeechStreamingInput` object containing `text`, optional `speed`, and lifecycle callbacks (`onBegin`, `onNext`, `onEnd`).
   * @returns A Promise that resolves when the streaming process is complete.
   * @throws {RnExecutorchError} If the model is not loaded or is currently generating.
   */
  stream: (input: TextToSpeechStreamingInput) => Promise<void>;

  /**
   * Streams pre-computed phonemes incrementally, bypassing the built-in phonemizer.
   * @param input - The streaming input with pre-computed `phonemes` instead of `text`.
   * @returns A Promise that resolves when the streaming process is complete.
   * @throws {RnExecutorchError} If the model is not loaded or is currently generating.
   */
  streamFromPhonemes: (
    input: TextToSpeechStreamingPhonemeInput
  ) => Promise<void>;

  /**
   * Inserts new text chunk into the buffer to be processed in streaming mode.
   */
  streamInsert: (textChunk: string) => void;

  /**
   * Interrupts and stops the currently active audio generation stream.
   * @param instant If true, stops the streaming as soon as possible. Otherwise
   *                allows the module to complete processing for the remains of the buffer.
   */
  streamStop: (instant?: boolean) => void;
}

/**
 * Shared streaming lifecycle callbacks for TTS streaming modes.
 * @category Types
 * @property {() => void | Promise<void>} [onBegin] - Called when streaming begins
 * @property {(audio: Float32Array) => void | Promise<void>} [onNext] - Called after each audio chunk gets calculated.
 * @property {() => void | Promise<void>} [onEnd] - Called when streaming ends
 */
export interface TextToSpeechStreamingCallbacks {
  onBegin?: () => void | Promise<void>;
  onNext?: (audio: Float32Array) => void | Promise<void>;
  onEnd?: () => void | Promise<void>;
}

/**
 * Text to Speech streaming input definition
 *
 * Streaming mode in T2S is synchronized by passing specific callbacks
 * executed at given moments of the streaming.
 * Actions such as playing the audio should happen within the onNext callback.
 * Callbacks can be both synchronous or asynchronous.
 *
 * Enables an incrementally expanded input, in other words adding
 * new text chunks with streamInsert() as the streaming is running.
 * @category Types
 * @property {boolean} [stopAutomatically] - If true, streaming will stop automatically when the buffer is empty.
 */
export interface TextToSpeechStreamingInput
  extends TextToSpeechInput, TextToSpeechStreamingCallbacks {
  stopAutomatically?: boolean;
}

/**
 * Streaming input definition for pre-computed phonemes.
 * Same as `TextToSpeechStreamingInput` but accepts `phonemes` instead of `text`.
 * @category Types
 */
export interface TextToSpeechStreamingPhonemeInput
  extends TextToSpeechPhonemeInput, TextToSpeechStreamingCallbacks {}