{
  "$schema": "http://json-schema.org/draft-07/schema#",
  "$ref": "#/definitions/APIOptions",
  "definitions": {
    "APIOptions": {
      "type": "object",
      "properties": {
        "SynthesisOptions": {
          "$ref": "#/definitions/SynthesisOptions"
        },
        "VoiceListRequestOptions": {
          "$ref": "#/definitions/VoiceListRequestOptions"
        },
        "RecognitionOptions": {
          "$ref": "#/definitions/RecognitionOptions"
        },
        "AlignmentOptions": {
          "$ref": "#/definitions/AlignmentOptions"
        },
        "TranslationAlignmentOptions": {
          "$ref": "#/definitions/TranslationAlignmentOptions"
        },
        "TranscriptAndTranslationAlignmentOptions": {
          "$ref": "#/definitions/TranscriptAndTranslationAlignmentOptions"
        },
        "TimelineTranslationAlignmentOptions": {
          "$ref": "#/definitions/TimelineTranslationAlignmentOptions"
        },
        "SpeechTranslationOptions": {
          "$ref": "#/definitions/SpeechTranslationOptions"
        },
        "TextTranslationOptions": {
          "$ref": "#/definitions/TextTranslationOptions"
        },
        "SpeechLanguageDetectionOptions": {
          "$ref": "#/definitions/SpeechLanguageDetectionOptions"
        },
        "TextLanguageDetectionOptions": {
          "$ref": "#/definitions/TextLanguageDetectionOptions"
        },
        "VADOptions": {
          "$ref": "#/definitions/VADOptions"
        },
        "DenoisingOptions": {
          "$ref": "#/definitions/DenoisingOptions"
        },
        "SourceSeparationOptions": {
          "$ref": "#/definitions/SourceSeparationOptions"
        },
        "ServerOptions": {
          "$ref": "#/definitions/ServerOptions"
        },
        "GlobalOptions": {
          "$ref": "#/definitions/GlobalOptions"
        },
        "CLIOptions": {
          "$ref": "#/definitions/CLIOptions"
        }
      },
      "required": [
        "SynthesisOptions",
        "VoiceListRequestOptions",
        "RecognitionOptions",
        "AlignmentOptions",
        "TranslationAlignmentOptions",
        "TranscriptAndTranslationAlignmentOptions",
        "TimelineTranslationAlignmentOptions",
        "SpeechTranslationOptions",
        "TextTranslationOptions",
        "SpeechLanguageDetectionOptions",
        "TextLanguageDetectionOptions",
        "VADOptions",
        "DenoisingOptions",
        "SourceSeparationOptions",
        "ServerOptions",
        "GlobalOptions",
        "CLIOptions"
      ],
      "additionalProperties": false
    },
    "SynthesisOptions": {
      "type": "object",
      "properties": {
        "engine": {
          "$ref": "#/definitions/SynthesisEngine"
        },
        "language": {
          "type": "string"
        },
        "voice": {
          "type": "string"
        },
        "voiceGender": {
          "$ref": "#/definitions/VoiceGender"
        },
        "speed": {
          "type": "number"
        },
        "pitch": {
          "type": "number"
        },
        "pitchVariation": {
          "type": "number"
        },
        "splitToSentences": {
          "type": "boolean"
        },
        "ssml": {
          "type": "boolean"
        },
        "segmentEndPause": {
          "type": "number"
        },
        "sentenceEndPause": {
          "type": "number"
        },
        "customLexiconPaths": {
          "type": "array",
          "items": {
            "type": "string"
          }
        },
        "plainText": {
          "$ref": "#/definitions/PlainTextOptions"
        },
        "alignment": {
          "$ref": "#/definitions/AlignmentOptions"
        },
        "postProcessing": {
          "type": "object",
          "properties": {
            "normalizeAudio": {
              "type": "boolean"
            },
            "targetPeak": {
              "type": "number"
            },
            "maxGainIncrease": {
              "type": "number"
            },
            "speed": {
              "type": "number"
            },
            "pitch": {
              "type": "number"
            },
            "timePitchShiftingMethod": {
              "$ref": "#/definitions/TimePitchShiftingMethod"
            },
            "rubberband": {
              "$ref": "#/definitions/RubberbandOptions"
            }
          },
          "additionalProperties": false
        },
        "outputAudioFormat": {
          "type": "object",
          "properties": {
            "codec": {
              "type": "string",
              "enum": [
                "wav",
                "mp3",
                "opus",
                "m4a",
                "ogg",
                "flac"
              ]
            },
            "bitrate": {
              "type": "number"
            }
          },
          "additionalProperties": false
        },
        "languageDetection": {
          "$ref": "#/definitions/TextLanguageDetectionOptions"
        },
        "subtitles": {
          "$ref": "#/definitions/SubtitlesConfig"
        },
        "vits": {
          "type": "object",
          "properties": {
            "speakerId": {
              "type": "number"
            },
            "provider": {
              "$ref": "#/definitions/OnnxExecutionProvider"
            }
          },
          "additionalProperties": false
        },
        "kokoro": {
          "type": "object",
          "properties": {
            "provider": {
              "$ref": "#/definitions/OnnxExecutionProvider"
            },
            "model": {
              "type": "string",
              "enum": [
                "82m-v1.0-fp32",
                "82m-v1.0-quantized"
              ]
            }
          },
          "additionalProperties": false
        },
        "pico": {
          "type": "object",
          "additionalProperties": false
        },
        "flite": {
          "type": "object",
          "additionalProperties": false
        },
        "gnuspeech": {
          "type": "object",
          "properties": {
            "tempo": {
              "type": "number"
            },
            "controlRate": {
              "type": "number"
            },
            "debug": {
              "type": "boolean"
            }
          },
          "additionalProperties": false
        },
        "espeak": {
          "type": "object",
          "properties": {
            "rate": {
              "type": "number"
            },
            "pitch": {
              "type": "number"
            },
            "pitchRange": {
              "type": "number"
            },
            "useKlatt": {
              "type": "boolean"
            },
            "insertSeparators": {
              "type": "boolean"
            }
          },
          "additionalProperties": false
        },
        "sam": {
          "type": "object",
          "properties": {
            "pitch": {
              "type": "number"
            },
            "speed": {
              "type": "number"
            },
            "mouth": {
              "type": "number"
            },
            "throat": {
              "type": "number"
            }
          },
          "additionalProperties": false
        },
        "sapi": {
          "type": "object",
          "properties": {
            "rate": {
              "type": "number"
            }
          },
          "additionalProperties": false
        },
        "msspeech": {
          "type": "object",
          "properties": {
            "rate": {
              "type": "number"
            }
          },
          "additionalProperties": false
        },
        "coquiServer": {
          "type": "object",
          "properties": {
            "serverUrl": {
              "type": "string"
            },
            "speakerId": {
              "type": [
                "string",
                "null"
              ]
            }
          },
          "additionalProperties": false
        },
        "googleCloud": {
          "type": "object",
          "properties": {
            "apiKey": {
              "type": "string"
            },
            "pitchDeltaSemitones": {
              "type": "number"
            },
            "customVoice": {
              "type": "object",
              "properties": {
                "model": {
                  "type": "string"
                },
                "reportedUsage": {
                  "type": "string"
                }
              },
              "additionalProperties": false
            }
          },
          "additionalProperties": false
        },
        "microsoftAzure": {
          "type": "object",
          "properties": {
            "subscriptionKey": {
              "type": "string"
            },
            "serviceRegion": {
              "type": "string"
            },
            "pitchDeltaHz": {
              "type": "number"
            }
          },
          "additionalProperties": false
        },
        "amazonPolly": {
          "type": "object",
          "properties": {
            "region": {
              "type": "string"
            },
            "accessKeyId": {
              "type": "string"
            },
            "secretAccessKey": {
              "type": "string"
            },
            "pollyEngine": {
              "type": "string",
              "enum": [
                "standard",
                "neural"
              ]
            },
            "lexiconNames": {
              "type": "array",
              "items": {
                "type": "string"
              }
            }
          },
          "additionalProperties": false
        },
        "openAICloud": {
          "$ref": "#/definitions/OpenAICloudTTSOptions"
        },
        "elevenLabs": {
          "$ref": "#/definitions/ElevenLabsTTSOptions"
        },
        "deepgram": {
          "$ref": "#/definitions/DeepgramTTSOptions"
        },
        "googleTranslate": {
          "type": "object",
          "properties": {
            "tld": {
              "type": "string"
            }
          },
          "additionalProperties": false
        },
        "microsoftEdge": {
          "type": "object",
          "properties": {
            "trustedClientToken": {
              "type": "string"
            },
            "pitchDeltaHz": {
              "type": "number"
            }
          },
          "additionalProperties": false
        },
        "streamlabsPolly": {
          "type": "object",
          "additionalProperties": false
        }
      },
      "additionalProperties": false
    },
    "SynthesisEngine": {
      "type": "string",
      "enum": [
        "vits",
        "kokoro",
        "pico",
        "flite",
        "gnuspeech",
        "espeak",
        "sam",
        "sapi",
        "msspeech",
        "coqui-server",
        "google-cloud",
        "microsoft-azure",
        "amazon-polly",
        "openai-cloud",
        "elevenlabs",
        "deepgram",
        "google-translate",
        "microsoft-edge",
        "streamlabs-polly"
      ]
    },
    "VoiceGender": {
      "type": "string",
      "enum": [
        "male",
        "female",
        "unknown"
      ]
    },
    "PlainTextOptions": {
      "type": "object",
      "properties": {
        "paragraphBreaks": {
          "$ref": "#/definitions/ParagraphBreakType"
        },
        "whitespace": {
          "$ref": "#/definitions/WhitespaceProcessing"
        }
      },
      "additionalProperties": false
    },
    "ParagraphBreakType": {
      "type": "string",
      "enum": [
        "single",
        "double"
      ]
    },
    "WhitespaceProcessing": {
      "type": "string",
      "enum": [
        "preserve",
        "removeLineBreaks",
        "collapse"
      ]
    },
    "AlignmentOptions": {
      "type": "object",
      "properties": {
        "engine": {
          "$ref": "#/definitions/AlignmentEngine"
        },
        "language": {
          "type": "string"
        },
        "isolate": {
          "type": "boolean"
        },
        "crop": {
          "type": "boolean"
        },
        "customLexiconPaths": {
          "type": "array",
          "items": {
            "type": "string"
          }
        },
        "languageDetection": {
          "$ref": "#/definitions/TextLanguageDetectionOptions"
        },
        "vad": {
          "$ref": "#/definitions/VADOptions"
        },
        "plainText": {
          "$ref": "#/definitions/PlainTextOptions"
        },
        "subtitles": {
          "$ref": "#/definitions/SubtitlesConfig"
        },
        "dtw": {
          "type": "object",
          "properties": {
            "granularity": {
              "anyOf": [
                {
                  "$ref": "#/definitions/DtwGranularity"
                },
                {
                  "type": "array",
                  "items": {
                    "$ref": "#/definitions/DtwGranularity"
                  }
                }
              ]
            },
            "windowDuration": {
              "anyOf": [
                {
                  "type": "number"
                },
                {
                  "type": "string"
                },
                {
                  "type": "array",
                  "items": {
                    "type": [
                      "string",
                      "number"
                    ]
                  }
                }
              ]
            },
            "phoneAlignmentMethod": {
              "$ref": "#/definitions/PhoneAlignmentMethod"
            }
          },
          "additionalProperties": false
        },
        "recognition": {
          "$ref": "#/definitions/RecognitionOptions"
        },
        "sourceSeparation": {
          "$ref": "#/definitions/SourceSeparationOptions"
        },
        "whisper": {
          "$ref": "#/definitions/WhisperAlignmentOptions"
        }
      },
      "additionalProperties": false
    },
    "AlignmentEngine": {
      "type": "string",
      "enum": [
        "dtw",
        "dtw-ra",
        "dtw-ea",
        "whisper"
      ]
    },
    "TextLanguageDetectionOptions": {
      "type": "object",
      "properties": {
        "engine": {
          "$ref": "#/definitions/TextLanguageDetectionEngine"
        },
        "defaultLanguage": {
          "type": "string"
        },
        "fallbackThresholdProbability": {
          "type": "number"
        }
      },
      "additionalProperties": false
    },
    "TextLanguageDetectionEngine": {
      "type": "string",
      "enum": [
        "tinyld",
        "fasttext"
      ]
    },
    "VADOptions": {
      "type": "object",
      "properties": {
        "engine": {
          "$ref": "#/definitions/VADEngine"
        },
        "activityThreshold": {
          "type": "number"
        },
        "webrtc": {
          "type": "object",
          "properties": {
            "frameDuration": {
              "type": "number",
              "enum": [
                10,
                20,
                30
              ]
            },
            "mode": {
              "type": "number",
              "enum": [
                0,
                1,
                2,
                3
              ]
            }
          },
          "additionalProperties": false
        },
        "silero": {
          "type": "object",
          "properties": {
            "frameDuration": {
              "type": "number",
              "enum": [
                30,
                60,
                90
              ]
            },
            "provider": {
              "$ref": "#/definitions/OnnxExecutionProvider"
            }
          },
          "additionalProperties": false
        },
        "rnnoise": {
          "type": "object",
          "additionalProperties": false
        },
        "whisper": {
          "$ref": "#/definitions/WhisperVADOptions"
        },
        "adaptiveGate": {
          "$ref": "#/definitions/AdaptiveGateVADOptions"
        }
      },
      "additionalProperties": false
    },
    "VADEngine": {
      "type": "string",
      "enum": [
        "webrtc",
        "silero",
        "rnnoise",
        "whisper",
        "adaptive-gate"
      ]
    },
    "OnnxExecutionProvider": {
      "type": "string",
      "enum": [
        "cpu",
        "dml",
        "cuda"
      ]
    },
    "WhisperVADOptions": {
      "type": "object",
      "properties": {
        "model": {
          "$ref": "#/definitions/WhisperModelName"
        },
        "temperature": {
          "type": "number"
        },
        "encoderProvider": {
          "$ref": "#/definitions/OnnxExecutionProvider"
        },
        "decoderProvider": {
          "$ref": "#/definitions/OnnxExecutionProvider"
        }
      },
      "additionalProperties": false
    },
    "WhisperModelName": {
      "type": "string",
      "enum": [
        "tiny",
        "tiny.en",
        "base",
        "base.en",
        "small",
        "small.en",
        "medium",
        "medium.en",
        "large-v1",
        "large-v2",
        "large-v3",
        "large-v3-turbo"
      ]
    },
    "AdaptiveGateVADOptions": {
      "type": "object",
      "properties": {
        "lowCutoff": {
          "type": "number"
        },
        "highCutoff": {
          "type": "number"
        },
        "positiveAdaptationRate": {
          "type": "number"
        },
        "negativeAdaptationRate": {
          "type": "number"
        },
        "peakLoudnessDecay": {
          "type": "number"
        },
        "backwardExtensionDuration": {
          "type": "number"
        },
        "relativeThreshold": {
          "type": "number"
        }
      },
      "additionalProperties": false
    },
    "SubtitlesConfig": {
      "type": "object",
      "properties": {
        "format": {
          "type": "string",
          "enum": [
            "srt",
            "webvtt"
          ]
        },
        "language": {
          "type": "string"
        },
        "mode": {
          "$ref": "#/definitions/SubtitlesMode"
        },
        "maxLineCount": {
          "type": "number"
        },
        "maxLineWidth": {
          "type": "number"
        },
        "minWordsInLine": {
          "type": "number"
        },
        "separatePhrases": {
          "type": "boolean"
        },
        "maxAddedDuration": {
          "type": "number"
        },
        "decimalSeparator": {
          "type": "string",
          "enum": [
            ",",
            "."
          ]
        },
        "includeCueIndexes": {
          "type": "boolean"
        },
        "includeHours": {
          "type": "boolean"
        },
        "lineBreakString": {
          "type": "string",
          "enum": [
            "\n",
            "\r\n"
          ]
        },
        "originalText": {
          "type": "string"
        },
        "totalDuration": {
          "type": "number"
        }
      },
      "additionalProperties": false
    },
    "SubtitlesMode": {
      "type": "string",
      "enum": [
        "line",
        "segment",
        "sentence",
        "word",
        "phone",
        "word+phone"
      ]
    },
    "DtwGranularity": {
      "type": "string",
      "enum": [
        "xx-low",
        "x-low",
        "low",
        "medium",
        "high",
        "x-high"
      ]
    },
    "PhoneAlignmentMethod": {
      "type": "string",
      "enum": [
        "interpolation",
        "dtw"
      ]
    },
    "RecognitionOptions": {
      "type": "object",
      "properties": {
        "engine": {
          "$ref": "#/definitions/RecognitionEngine"
        },
        "language": {
          "type": "string"
        },
        "maxAlternatives": {
          "type": "number"
        },
        "isolate": {
          "type": "boolean"
        },
        "crop": {
          "type": "boolean"
        },
        "alignment": {
          "$ref": "#/definitions/AlignmentOptions"
        },
        "languageDetection": {
          "$ref": "#/definitions/SpeechLanguageDetectionOptions"
        },
        "subtitles": {
          "$ref": "#/definitions/SubtitlesConfig"
        },
        "vad": {
          "$ref": "#/definitions/VADOptions"
        },
        "sourceSeparation": {
          "$ref": "#/definitions/SourceSeparationOptions"
        },
        "whisper": {
          "$ref": "#/definitions/WhisperOptions"
        },
        "whisperCpp": {
          "$ref": "#/definitions/WhisperCppOptions"
        },
        "vosk": {
          "type": "object",
          "properties": {
            "modelPath": {
              "type": "string"
            }
          },
          "additionalProperties": false
        },
        "silero": {
          "$ref": "#/definitions/SileroRecognitionOptions"
        },
        "googleCloud": {
          "type": "object",
          "properties": {
            "apiKey": {
              "type": "string"
            },
            "alternativeLanguageCodes": {
              "type": "array",
              "items": {
                "type": "string"
              }
            },
            "profanityFilter": {
              "type": "boolean"
            },
            "autoPunctuation": {
              "type": "boolean"
            },
            "useEnhancedModel": {
              "type": "boolean"
            }
          },
          "additionalProperties": false
        },
        "microsoftAzure": {
          "type": "object",
          "properties": {
            "subscriptionKey": {
              "type": "string"
            },
            "serviceRegion": {
              "type": "string"
            }
          },
          "additionalProperties": false
        },
        "amazonTranscribe": {
          "type": "object",
          "properties": {
            "region": {
              "type": "string"
            },
            "accessKeyId": {
              "type": "string"
            },
            "secretAccessKey": {
              "type": "string"
            }
          },
          "additionalProperties": false
        },
        "openAICloud": {
          "$ref": "#/definitions/OpenAICloudSTTOptions"
        },
        "deepgram": {
          "$ref": "#/definitions/DeepgramSTTOptions"
        }
      },
      "additionalProperties": false
    },
    "RecognitionEngine": {
      "type": "string",
      "enum": [
        "whisper",
        "whisper.cpp",
        "vosk",
        "silero",
        "google-cloud",
        "microsoft-azure",
        "amazon-transcribe",
        "openai-cloud",
        "deepgram"
      ]
    },
    "SpeechLanguageDetectionOptions": {
      "type": "object",
      "properties": {
        "engine": {
          "$ref": "#/definitions/SpeechLanguageDetectionEngine"
        },
        "defaultLanguage": {
          "type": "string"
        },
        "fallbackThresholdProbability": {
          "type": "number"
        },
        "crop": {
          "type": "boolean"
        },
        "silero": {
          "$ref": "#/definitions/SileroLanguageDetectionOptions"
        },
        "whisper": {
          "$ref": "#/definitions/WhisperLanguageDetectionOptions"
        },
        "whisperCpp": {
          "$ref": "#/definitions/WhisperCppOptions"
        },
        "vad": {
          "$ref": "#/definitions/VADOptions"
        }
      },
      "additionalProperties": false
    },
    "SpeechLanguageDetectionEngine": {
      "type": "string",
      "enum": [
        "silero",
        "whisper",
        "whisper.cpp"
      ]
    },
    "SileroLanguageDetectionOptions": {
      "type": "object",
      "properties": {
        "provider": {
          "$ref": "#/definitions/OnnxExecutionProvider"
        }
      },
      "additionalProperties": false
    },
    "WhisperLanguageDetectionOptions": {
      "type": "object",
      "properties": {
        "model": {
          "$ref": "#/definitions/WhisperModelName"
        },
        "temperature": {
          "type": "number"
        },
        "encoderProvider": {
          "$ref": "#/definitions/OnnxExecutionProvider"
        },
        "decoderProvider": {
          "$ref": "#/definitions/OnnxExecutionProvider"
        }
      },
      "additionalProperties": false
    },
    "WhisperCppOptions": {
      "type": "object",
      "properties": {
        "build": {
          "$ref": "#/definitions/WhisperCppBuild"
        },
        "executablePath": {
          "type": "string"
        },
        "enableGPU": {
          "type": "boolean"
        },
        "model": {
          "$ref": "#/definitions/WhisperCppModelId"
        },
        "threadCount": {
          "type": "number"
        },
        "splitCount": {
          "type": "number"
        },
        "topCandidateCount": {
          "type": "number"
        },
        "beamCount": {
          "type": "number"
        },
        "repetitionThreshold": {
          "type": "number"
        },
        "temperature": {
          "type": "number"
        },
        "temperatureIncrement": {
          "type": "number"
        },
        "prompt": {
          "type": "string"
        },
        "enableDTW": {
          "type": "boolean"
        },
        "enableFlashAttention": {
          "type": "boolean"
        },
        "verbose": {
          "type": "boolean"
        }
      },
      "additionalProperties": false
    },
    "WhisperCppBuild": {
      "type": "string",
      "enum": [
        "cpu",
        "cublas-12.4.0",
        "custom"
      ]
    },
    "WhisperCppModelId": {
      "type": "string",
      "enum": [
        "tiny",
        "tiny-q5_1",
        "tiny.en",
        "tiny.en-q5_1",
        "tiny.en-q8_0",
        "base",
        "base-q5_1",
        "base.en",
        "base.en-q5_1",
        "small",
        "small-q5_1",
        "small.en",
        "small.en-q5_1",
        "medium",
        "medium-q5_0",
        "medium.en",
        "medium.en-q5_0",
        "large",
        "large-v1",
        "large-v2",
        "large-v2-q5_0",
        "large-v3",
        "large-v3-q5_0",
        "large-v3-turbo",
        "large-v3-turbo-q5_0"
      ]
    },
    "SourceSeparationOptions": {
      "type": "object",
      "properties": {
        "engine": {
          "$ref": "#/definitions/SourceSeparationEngine"
        },
        "mdxNet": {
          "$ref": "#/definitions/MDXNetOptions"
        }
      },
      "additionalProperties": false
    },
    "SourceSeparationEngine": {
      "type": "string",
      "const": "mdx-net"
    },
    "MDXNetOptions": {
      "type": "object",
      "properties": {
        "model": {
          "$ref": "#/definitions/MDXNetModelName"
        },
        "provider": {
          "$ref": "#/definitions/OnnxExecutionProvider"
        }
      },
      "additionalProperties": false
    },
    "MDXNetModelName": {
      "type": "string",
      "enum": [
        "UVR_MDXNET_1_9703",
        "UVR_MDXNET_2_9682",
        "UVR_MDXNET_3_9662",
        "UVR_MDXNET_KARA",
        "UVR_MDXNET_Main",
        "Kim_Vocal_1",
        "Kim_Vocal_2"
      ]
    },
    "WhisperOptions": {
      "type": "object",
      "properties": {
        "model": {
          "$ref": "#/definitions/WhisperModelName"
        },
        "temperature": {
          "type": "number"
        },
        "prompt": {
          "type": "string"
        },
        "topCandidateCount": {
          "type": "number"
        },
        "punctuationThreshold": {
          "type": "number"
        },
        "autoPromptParts": {
          "type": "boolean"
        },
        "maxTokensPerPart": {
          "type": "number"
        },
        "suppressRepetition": {
          "type": "boolean"
        },
        "repetitionThreshold": {
          "type": "number"
        },
        "decodeTimestampTokens": {
          "type": "boolean"
        },
        "endTokenThreshold": {
          "type": "number"
        },
        "includeEndTokenInCandidates": {
          "type": "boolean"
        },
        "timestampAccuracy": {
          "type": "string",
          "enum": [
            "medium",
            "high"
          ]
        },
        "encoderProvider": {
          "$ref": "#/definitions/OnnxExecutionProvider"
        },
        "decoderProvider": {
          "$ref": "#/definitions/OnnxExecutionProvider"
        },
        "seed": {
          "type": "number"
        }
      },
      "additionalProperties": false
    },
    "SileroRecognitionOptions": {
      "type": "object",
      "properties": {
        "modelPath": {
          "type": "string"
        },
        "provider": {
          "$ref": "#/definitions/OnnxExecutionProvider"
        }
      },
      "additionalProperties": false
    },
    "OpenAICloudSTTOptions": {
      "type": "object",
      "properties": {
        "model": {
          "type": "string"
        },
        "apiKey": {
          "type": "string"
        },
        "organization": {
          "type": "string"
        },
        "baseURL": {
          "type": "string"
        },
        "temperature": {
          "type": "number"
        },
        "prompt": {
          "type": "string"
        },
        "timeout": {
          "type": "number"
        },
        "maxRetries": {
          "type": "number"
        },
        "requestWordTimestamps": {
          "type": "boolean"
        }
      },
      "additionalProperties": false
    },
    "DeepgramSTTOptions": {
      "type": "object",
      "properties": {
        "apiKey": {
          "type": "string"
        },
        "model": {
          "type": "string"
        },
        "punctuate": {
          "type": "boolean"
        }
      },
      "additionalProperties": false
    },
    "WhisperAlignmentOptions": {
      "type": "object",
      "properties": {
        "model": {
          "$ref": "#/definitions/WhisperModelName"
        },
        "endTokenThreshold": {
          "type": "number"
        },
        "maxTokensPerPart": {
          "type": "number"
        },
        "timestampAccuracy": {
          "type": "string",
          "enum": [
            "medium",
            "high"
          ]
        },
        "encoderProvider": {
          "$ref": "#/definitions/OnnxExecutionProvider"
        },
        "decoderProvider": {
          "$ref": "#/definitions/OnnxExecutionProvider"
        }
      },
      "additionalProperties": false
    },
    "TimePitchShiftingMethod": {
      "type": "string",
      "enum": [
        "sonic",
        "rubberband"
      ]
    },
    "RubberbandOptions": {
      "type": "object",
      "properties": {
        "stretch": {
          "type": "string",
          "enum": [
            "elastic",
            "precise"
          ]
        },
        "transients": {
          "type": "string",
          "enum": [
            "crisp",
            "mixed",
            "smooth"
          ]
        },
        "detector": {
          "type": "string",
          "enum": [
            "compound",
            "percussive",
            "soft"
          ]
        },
        "phase": {
          "type": "string",
          "enum": [
            "laminar",
            "independent"
          ]
        },
        "window": {
          "type": "string",
          "enum": [
            "standard",
            "long",
            "short"
          ]
        },
        "smoothing": {
          "type": "string",
          "enum": [
            "off",
            "on"
          ]
        },
        "formant": {
          "type": "string",
          "enum": [
            "shited",
            "preserved"
          ]
        },
        "pitch": {
          "type": "string",
          "enum": [
            "high-speed",
            "high-quality",
            "high-consistency"
          ]
        },
        "channels": {
          "type": "string",
          "enum": [
            "apart",
            "together"
          ]
        },
        "engine": {
          "type": "string",
          "enum": [
            "faster",
            "finer"
          ]
        }
      },
      "additionalProperties": false
    },
    "OpenAICloudTTSOptions": {
      "type": "object",
      "properties": {
        "apiKey": {
          "type": "string"
        },
        "organization": {
          "type": "string"
        },
        "baseURL": {
          "type": "string"
        },
        "model": {
          "type": "string",
          "enum": [
            "tts-1",
            "tts-1-hd",
            "gpt-4o-mini-tts"
          ]
        },
        "instructions": {
          "type": "string"
        },
        "timeout": {
          "type": "number"
        },
        "maxRetries": {
          "type": "number"
        }
      },
      "additionalProperties": false
    },
    "ElevenLabsTTSOptions": {
      "type": "object",
      "properties": {
        "apiKey": {
          "type": "string"
        },
        "modelId": {
          "type": "string"
        },
        "stability": {
          "type": "number"
        },
        "similarityBoost": {
          "type": "number"
        },
        "style": {
          "type": "number"
        },
        "useSpeakerBoost": {
          "type": "boolean"
        },
        "seed": {
          "type": "number"
        }
      },
      "additionalProperties": false
    },
    "DeepgramTTSOptions": {
      "type": "object",
      "properties": {
        "apiKey": {
          "type": "string"
        }
      },
      "additionalProperties": false
    },
    "VoiceListRequestOptions": {
      "type": "object",
      "properties": {
        "engine": {
          "$ref": "#/definitions/SynthesisEngine"
        },
        "language": {
          "type": "string"
        },
        "voice": {
          "type": "string"
        },
        "voiceGender": {
          "$ref": "#/definitions/VoiceGender"
        },
        "speed": {
          "type": "number"
        },
        "pitch": {
          "type": "number"
        },
        "pitchVariation": {
          "type": "number"
        },
        "splitToSentences": {
          "type": "boolean"
        },
        "ssml": {
          "type": "boolean"
        },
        "segmentEndPause": {
          "type": "number"
        },
        "sentenceEndPause": {
          "type": "number"
        },
        "customLexiconPaths": {
          "type": "array",
          "items": {
            "type": "string"
          }
        },
        "plainText": {
          "$ref": "#/definitions/PlainTextOptions"
        },
        "alignment": {
          "$ref": "#/definitions/AlignmentOptions"
        },
        "postProcessing": {
          "type": "object",
          "properties": {
            "normalizeAudio": {
              "type": "boolean"
            },
            "targetPeak": {
              "type": "number"
            },
            "maxGainIncrease": {
              "type": "number"
            },
            "speed": {
              "type": "number"
            },
            "pitch": {
              "type": "number"
            },
            "timePitchShiftingMethod": {
              "$ref": "#/definitions/TimePitchShiftingMethod"
            },
            "rubberband": {
              "$ref": "#/definitions/RubberbandOptions"
            }
          },
          "additionalProperties": false
        },
        "outputAudioFormat": {
          "type": "object",
          "properties": {
            "codec": {
              "type": "string",
              "enum": [
                "wav",
                "mp3",
                "opus",
                "m4a",
                "ogg",
                "flac"
              ]
            },
            "bitrate": {
              "type": "number"
            }
          },
          "additionalProperties": false
        },
        "languageDetection": {
          "$ref": "#/definitions/TextLanguageDetectionOptions"
        },
        "subtitles": {
          "$ref": "#/definitions/SubtitlesConfig"
        },
        "vits": {
          "type": "object",
          "properties": {
            "speakerId": {
              "type": "number"
            },
            "provider": {
              "$ref": "#/definitions/OnnxExecutionProvider"
            }
          },
          "additionalProperties": false
        },
        "kokoro": {
          "type": "object",
          "properties": {
            "provider": {
              "$ref": "#/definitions/OnnxExecutionProvider"
            },
            "model": {
              "type": "string",
              "enum": [
                "82m-v1.0-fp32",
                "82m-v1.0-quantized"
              ]
            }
          },
          "additionalProperties": false
        },
        "pico": {
          "type": "object",
          "additionalProperties": false
        },
        "flite": {
          "type": "object",
          "additionalProperties": false
        },
        "gnuspeech": {
          "type": "object",
          "properties": {
            "tempo": {
              "type": "number"
            },
            "controlRate": {
              "type": "number"
            },
            "debug": {
              "type": "boolean"
            }
          },
          "additionalProperties": false
        },
        "espeak": {
          "type": "object",
          "properties": {
            "rate": {
              "type": "number"
            },
            "pitch": {
              "type": "number"
            },
            "pitchRange": {
              "type": "number"
            },
            "useKlatt": {
              "type": "boolean"
            },
            "insertSeparators": {
              "type": "boolean"
            }
          },
          "additionalProperties": false
        },
        "sam": {
          "type": "object",
          "properties": {
            "pitch": {
              "type": "number"
            },
            "speed": {
              "type": "number"
            },
            "mouth": {
              "type": "number"
            },
            "throat": {
              "type": "number"
            }
          },
          "additionalProperties": false
        },
        "sapi": {
          "type": "object",
          "properties": {
            "rate": {
              "type": "number"
            }
          },
          "additionalProperties": false
        },
        "msspeech": {
          "type": "object",
          "properties": {
            "rate": {
              "type": "number"
            }
          },
          "additionalProperties": false
        },
        "coquiServer": {
          "type": "object",
          "properties": {
            "serverUrl": {
              "type": "string"
            },
            "speakerId": {
              "type": [
                "string",
                "null"
              ]
            }
          },
          "additionalProperties": false
        },
        "googleCloud": {
          "type": "object",
          "properties": {
            "apiKey": {
              "type": "string"
            },
            "pitchDeltaSemitones": {
              "type": "number"
            },
            "customVoice": {
              "type": "object",
              "properties": {
                "model": {
                  "type": "string"
                },
                "reportedUsage": {
                  "type": "string"
                }
              },
              "additionalProperties": false
            }
          },
          "additionalProperties": false
        },
        "microsoftAzure": {
          "type": "object",
          "properties": {
            "subscriptionKey": {
              "type": "string"
            },
            "serviceRegion": {
              "type": "string"
            },
            "pitchDeltaHz": {
              "type": "number"
            }
          },
          "additionalProperties": false
        },
        "amazonPolly": {
          "type": "object",
          "properties": {
            "region": {
              "type": "string"
            },
            "accessKeyId": {
              "type": "string"
            },
            "secretAccessKey": {
              "type": "string"
            },
            "pollyEngine": {
              "type": "string",
              "enum": [
                "standard",
                "neural"
              ]
            },
            "lexiconNames": {
              "type": "array",
              "items": {
                "type": "string"
              }
            }
          },
          "additionalProperties": false
        },
        "openAICloud": {
          "$ref": "#/definitions/OpenAICloudTTSOptions"
        },
        "elevenLabs": {
          "$ref": "#/definitions/ElevenLabsTTSOptions"
        },
        "deepgram": {
          "$ref": "#/definitions/DeepgramTTSOptions"
        },
        "googleTranslate": {
          "type": "object",
          "properties": {
            "tld": {
              "type": "string"
            }
          },
          "additionalProperties": false
        },
        "microsoftEdge": {
          "type": "object",
          "properties": {
            "trustedClientToken": {
              "type": "string"
            },
            "pitchDeltaHz": {
              "type": "number"
            }
          },
          "additionalProperties": false
        },
        "streamlabsPolly": {
          "type": "object",
          "additionalProperties": false
        },
        "cache": {
          "type": "object",
          "properties": {
            "path": {
              "type": "string"
            },
            "duration": {
              "type": "number"
            }
          },
          "additionalProperties": false
        }
      },
      "additionalProperties": false
    },
    "TranslationAlignmentOptions": {
      "type": "object",
      "properties": {
        "engine": {
          "$ref": "#/definitions/TranslationAlignmentEngine"
        },
        "sourceLanguage": {
          "type": "string"
        },
        "targetLanguage": {
          "type": "string"
        },
        "isolate": {
          "type": "boolean"
        },
        "crop": {
          "type": "boolean"
        },
        "languageDetection": {
          "$ref": "#/definitions/SpeechLanguageDetectionOptions"
        },
        "vad": {
          "$ref": "#/definitions/VADOptions"
        },
        "plainText": {
          "$ref": "#/definitions/PlainTextOptions"
        },
        "subtitles": {
          "$ref": "#/definitions/SubtitlesConfig"
        },
        "sourceSeparation": {
          "$ref": "#/definitions/SourceSeparationOptions"
        },
        "whisper": {
          "$ref": "#/definitions/WhisperAlignmentOptions"
        }
      },
      "additionalProperties": false
    },
    "TranslationAlignmentEngine": {
      "type": "string",
      "const": "whisper"
    },
    "TranscriptAndTranslationAlignmentOptions": {
      "type": "object",
      "properties": {
        "engine": {
          "$ref": "#/definitions/TranscriptAndTranslationAlignmentEngine"
        },
        "sourceLanguage": {
          "type": "string"
        },
        "targetLanguage": {
          "type": "string"
        },
        "isolate": {
          "type": "boolean"
        },
        "crop": {
          "type": "boolean"
        },
        "alignment": {
          "$ref": "#/definitions/AlignmentOptions"
        },
        "timelineAlignment": {
          "$ref": "#/definitions/TimelineTranslationAlignmentOptions"
        },
        "languageDetection": {
          "$ref": "#/definitions/TextLanguageDetectionOptions"
        },
        "vad": {
          "$ref": "#/definitions/VADOptions"
        },
        "plainText": {
          "$ref": "#/definitions/PlainTextOptions"
        },
        "subtitles": {
          "$ref": "#/definitions/SubtitlesConfig"
        },
        "sourceSeparation": {
          "$ref": "#/definitions/SourceSeparationOptions"
        }
      },
      "additionalProperties": false
    },
    "TranscriptAndTranslationAlignmentEngine": {
      "type": "string",
      "const": "two-stage"
    },
    "TimelineTranslationAlignmentOptions": {
      "type": "object",
      "properties": {
        "engine": {
          "type": "string",
          "const": "e5"
        },
        "sourceLanguage": {
          "type": "string"
        },
        "targetLanguage": {
          "type": "string"
        },
        "audio": {
          "$ref": "#/definitions/AudioSourceParam"
        },
        "languageDetection": {
          "$ref": "#/definitions/TextLanguageDetectionOptions"
        },
        "subtitles": {
          "$ref": "#/definitions/SubtitlesConfig"
        },
        "e5": {
          "type": "object",
          "properties": {
            "model": {
              "type": "string",
              "const": "small-fp16"
            }
          },
          "required": [
            "model"
          ],
          "additionalProperties": false
        }
      },
      "additionalProperties": false
    },
    "AudioSourceParam": {
      "anyOf": [
        {
          "type": "string"
        },
        {
          "type": "object",
          "properties": {
            "BYTES_PER_ELEMENT": {
              "type": "number"
            },
            "buffer": {
              "type": "object",
              "properties": {
                "byteLength": {
                  "type": "number"
                }
              },
              "required": [
                "byteLength"
              ],
              "additionalProperties": false
            },
            "byteLength": {
              "type": "number"
            },
            "byteOffset": {
              "type": "number"
            },
            "length": {
              "type": "number"
            }
          },
          "required": [
            "BYTES_PER_ELEMENT",
            "buffer",
            "byteLength",
            "byteOffset",
            "length"
          ],
          "additionalProperties": {
            "type": "number"
          }
        },
        {
          "$ref": "#/definitions/RawAudio"
        }
      ]
    },
    "RawAudio": {
      "type": "object",
      "properties": {
        "audioChannels": {
          "type": "array",
          "items": {
            "type": "object",
            "properties": {
              "BYTES_PER_ELEMENT": {
                "type": "number"
              },
              "buffer": {
                "type": "object",
                "properties": {
                  "byteLength": {
                    "type": "number"
                  }
                },
                "required": [
                  "byteLength"
                ],
                "additionalProperties": false
              },
              "byteLength": {
                "type": "number"
              },
              "byteOffset": {
                "type": "number"
              },
              "length": {
                "type": "number"
              }
            },
            "required": [
              "BYTES_PER_ELEMENT",
              "buffer",
              "byteLength",
              "byteOffset",
              "length"
            ],
            "additionalProperties": {
              "type": "number"
            }
          }
        },
        "sampleRate": {
          "type": "number"
        }
      },
      "required": [
        "audioChannels",
        "sampleRate"
      ],
      "additionalProperties": false
    },
    "SpeechTranslationOptions": {
      "type": "object",
      "properties": {
        "engine": {
          "$ref": "#/definitions/SpeechTranslationEngine"
        },
        "sourceLanguage": {
          "type": "string"
        },
        "targetLanguage": {
          "type": "string"
        },
        "crop": {
          "type": "boolean"
        },
        "isolate": {
          "type": "boolean"
        },
        "languageDetection": {
          "$ref": "#/definitions/SpeechLanguageDetectionOptions"
        },
        "subtitles": {
          "$ref": "#/definitions/SubtitlesConfig"
        },
        "vad": {
          "$ref": "#/definitions/VADOptions"
        },
        "sourceSeparation": {
          "$ref": "#/definitions/SourceSeparationOptions"
        },
        "whisper": {
          "$ref": "#/definitions/WhisperOptions"
        },
        "whisperCpp": {
          "$ref": "#/definitions/WhisperCppOptions"
        },
        "openAICloud": {
          "$ref": "#/definitions/OpenAICloudSTTOptions"
        }
      },
      "additionalProperties": false
    },
    "SpeechTranslationEngine": {
      "type": "string",
      "enum": [
        "whisper",
        "whisper.cpp",
        "openai-cloud"
      ]
    },
    "TextTranslationOptions": {
      "type": "object",
      "properties": {
        "engine": {
          "$ref": "#/definitions/TextTranslationEngine"
        },
        "sourceLanguage": {
          "type": "string"
        },
        "targetLanguage": {
          "type": "string"
        },
        "languageDetection": {
          "$ref": "#/definitions/TextLanguageDetectionOptions"
        },
        "plainText": {
          "$ref": "#/definitions/PlainTextOptions"
        },
        "nllb": {
          "type": "object",
          "additionalProperties": false
        },
        "googleTranslate": {
          "$ref": "#/definitions/GoogleTranslateTextTranslationOptions"
        },
        "deepl": {
          "type": "object",
          "additionalProperties": false
        }
      },
      "additionalProperties": false
    },
    "TextTranslationEngine": {
      "type": "string",
      "enum": [
        "nllb",
        "google-translate",
        "deepl"
      ]
    },
    "GoogleTranslateTextTranslationOptions": {
      "type": "object",
      "properties": {
        "tld": {
          "type": "string"
        },
        "maxCharactersPerPart": {
          "type": "number"
        }
      },
      "additionalProperties": false
    },
    "DenoisingOptions": {
      "type": "object",
      "properties": {
        "engine": {
          "$ref": "#/definitions/DenoisingEngine"
        },
        "postProcessing": {
          "type": "object",
          "properties": {
            "normalizeAudio": {
              "type": "boolean"
            },
            "targetPeak": {
              "type": "number"
            },
            "maxGainIncrease": {
              "type": "number"
            },
            "dryMixGain": {
              "type": "number"
            }
          },
          "required": [
            "normalizeAudio",
            "targetPeak",
            "maxGainIncrease"
          ],
          "additionalProperties": false
        },
        "nsnet2": {
          "$ref": "#/definitions/NSNet2Options"
        }
      },
      "additionalProperties": false
    },
    "DenoisingEngine": {
      "type": "string",
      "enum": [
        "rnnoise",
        "nsnet2"
      ]
    },
    "NSNet2Options": {
      "type": "object",
      "properties": {
        "model": {
          "$ref": "#/definitions/NSNet2ModelName"
        },
        "modelDirectoryPath": {
          "type": "string"
        },
        "provider": {
          "$ref": "#/definitions/OnnxExecutionProvider"
        },
        "maxAttenuation": {
          "type": "number"
        }
      },
      "additionalProperties": false
    },
    "NSNet2ModelName": {
      "type": "string",
      "enum": [
        "baseline-16khz",
        "baseline-48khz"
      ]
    },
    "ServerOptions": {
      "type": "object",
      "properties": {
        "port": {
          "type": "number"
        },
        "secure": {
          "type": "boolean"
        },
        "certPath": {
          "type": "string"
        },
        "keyPath": {
          "type": "string"
        },
        "deflate": {
          "type": "boolean"
        },
        "maxPayload": {
          "type": "number"
        },
        "useWorkerThread": {
          "type": "boolean"
        }
      },
      "additionalProperties": false
    },
    "GlobalOptions": {
      "type": "object",
      "properties": {
        "ffmpegPath": {
          "type": "string"
        },
        "soxPath": {
          "type": "string"
        },
        "packageBaseURL": {
          "type": "string"
        },
        "logLevel": {
          "$ref": "#/definitions/LogLevel"
        }
      },
      "additionalProperties": false
    },
    "LogLevel": {
      "type": "string",
      "enum": [
        "silent",
        "output",
        "error",
        "warning",
        "info",
        "trace"
      ]
    },
    "CLIOptions": {
      "type": "object",
      "properties": {
        "play": {
          "type": "boolean"
        },
        "player": {
          "$ref": "#/definitions/AudioPlayerID"
        },
        "overwrite": {
          "type": "boolean"
        },
        "debug": {
          "type": "boolean"
        },
        "config": {
          "type": "string"
        }
      },
      "additionalProperties": false
    },
    "AudioPlayerID": {
      "type": "string",
      "enum": [
        "audio-io",
        "sox"
      ]
    }
  }
}