syntax = "proto3";

package smartspeech.synthesis.v1;

import "task.proto";

import "google/protobuf/duration.proto";

option go_package = "./;protocol";
option java_package = "TODO";

import "google/api/client.proto";
service SmartSpeech {
  option (google.api.default_host) = "sber.ru";
  rpc Synthesize (SynthesisRequest) returns (stream SynthesisResponse);

  rpc AsyncSynthesize (AsyncSynthesisRequest) returns (smartspeech.task.v1.Task);
}

message SynthesisRequest {
  enum AudioEncoding {
    AUDIO_ENCODING_UNSPECIFIED = 0;
    PCM_S16LE = 1; // 16-bit signed little-endian (Linear PCM)
    OPUS = 2; // mime audio/ogg; codecs=opus
    WAV = 3; // mime audio/x-wav with 16-bit signed little-endian (Linear PCM)
  }

  enum ContentType {
    TEXT = 0;
    SSML = 1;
  }

  string text = 1;
  AudioEncoding audio_encoding = 2;
  string language = 3; // Language code in RFC-3066 format, i.e.: ru-RU
  ContentType content_type = 4;
  string voice = 5;
  bool rebuild_cache = 6;
}

message SynthesisResponse {
  bytes data = 1; // chunk of audio data
  google.protobuf.Duration audio_duration = 2; // time from start of audio so far
}

message AsyncSynthesisRequest {
  string request_file_id = 1;
  SynthesisRequest.AudioEncoding audio_encoding = 2;
  string voice = 3;
}
