// Copyright 2025 Google LLC
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

syntax = "proto3";

package google.ai.generativelanguage.v1beta;

import "google/ai/generativelanguage/v1beta/citation.proto";
import "google/ai/generativelanguage/v1beta/content.proto";
import "google/ai/generativelanguage/v1beta/retriever.proto";
import "google/ai/generativelanguage/v1beta/safety.proto";
import "google/api/annotations.proto";
import "google/api/client.proto";
import "google/api/field_behavior.proto";
import "google/api/resource.proto";
import "google/protobuf/duration.proto";

option go_package = "cloud.google.com/go/ai/generativelanguage/apiv1beta/generativelanguagepb;generativelanguagepb";
option java_multiple_files = true;
option java_outer_classname = "GenerativeServiceProto";
option java_package = "com.google.ai.generativelanguage.v1beta";

// API for using Large Models that generate multimodal content and have
// additional capabilities beyond text generation.
service GenerativeService {
  option (google.api.default_host) = "generativelanguage.googleapis.com";

  // Generates a model response given an input `GenerateContentRequest`.
  // Refer to the [text generation
  // guide](https://ai.google.dev/gemini-api/docs/text-generation) for detailed
  // usage information. Input capabilities differ between models, including
  // tuned models. Refer to the [model
  // guide](https://ai.google.dev/gemini-api/docs/models/gemini) and [tuning
  // guide](https://ai.google.dev/gemini-api/docs/model-tuning) for details.
  rpc GenerateContent(GenerateContentRequest)
      returns (GenerateContentResponse) {
    option (google.api.http) = {
      post: "/v1beta/{model=models/*}:generateContent"
      body: "*"
      additional_bindings {
        post: "/v1beta/{model=tunedModels/*}:generateContent"
        body: "*"
      }
      additional_bindings {
        post: "/v1beta/{model=dynamic/*}:generateContent"
        body: "*"
      }
    };
    option (google.api.method_signature) = "model,contents";
  }

  // Generates a grounded answer from the model given an input
  // `GenerateAnswerRequest`.
  rpc GenerateAnswer(GenerateAnswerRequest) returns (GenerateAnswerResponse) {
    option (google.api.http) = {
      post: "/v1beta/{model=models/*}:generateAnswer"
      body: "*"
    };
    option (google.api.method_signature) =
        "model,contents,safety_settings,answer_style";
  }

  // Generates a [streamed
  // response](https://ai.google.dev/gemini-api/docs/text-generation?lang=python#generate-a-text-stream)
  // from the model given an input `GenerateContentRequest`.
  rpc StreamGenerateContent(GenerateContentRequest)
      returns (stream GenerateContentResponse) {
    option (google.api.http) = {
      post: "/v1beta/{model=models/*}:streamGenerateContent"
      body: "*"
      additional_bindings {
        post: "/v1beta/{model=tunedModels/*}:streamGenerateContent"
        body: "*"
      }
      additional_bindings {
        post: "/v1beta/{model=dynamic/*}:streamGenerateContent"
        body: "*"
      }
    };
    option (google.api.method_signature) = "model,contents";
  }

  // Generates a text embedding vector from the input `Content` using the
  // specified [Gemini Embedding
  // model](https://ai.google.dev/gemini-api/docs/models/gemini#text-embedding).
  rpc EmbedContent(EmbedContentRequest) returns (EmbedContentResponse) {
    option (google.api.http) = {
      post: "/v1beta/{model=models/*}:embedContent"
      body: "*"
    };
    option (google.api.method_signature) = "model,content";
  }

  // Generates multiple embedding vectors from the input `Content` which
  // consists of a batch of strings represented as `EmbedContentRequest`
  // objects.
  rpc BatchEmbedContents(BatchEmbedContentsRequest)
      returns (BatchEmbedContentsResponse) {
    option (google.api.http) = {
      post: "/v1beta/{model=models/*}:batchEmbedContents"
      body: "*"
    };
    option (google.api.method_signature) = "model,requests";
  }

  // Runs a model's tokenizer on input `Content` and returns the token count.
  // Refer to the [tokens guide](https://ai.google.dev/gemini-api/docs/tokens)
  // to learn more about tokens.
  rpc CountTokens(CountTokensRequest) returns (CountTokensResponse) {
    option (google.api.http) = {
      post: "/v1beta/{model=models/*}:countTokens"
      body: "*"
    };
    option (google.api.method_signature) = "model,contents";
  }

  // Low-Latency bidirectional streaming API that supports audio and video
  // streaming inputs can produce multimodal output streams (audio and text).
  rpc BidiGenerateContent(stream BidiGenerateContentClientMessage)
      returns (stream BidiGenerateContentServerMessage) {}
}

// Type of task for which the embedding will be used.
enum TaskType {
  // Unset value, which will default to one of the other enum values.
  TASK_TYPE_UNSPECIFIED = 0;

  // Specifies the given text is a query in a search/retrieval setting.
  RETRIEVAL_QUERY = 1;

  // Specifies the given text is a document from the corpus being searched.
  RETRIEVAL_DOCUMENT = 2;

  // Specifies the given text will be used for STS.
  SEMANTIC_SIMILARITY = 3;

  // Specifies that the given text will be classified.
  CLASSIFICATION = 4;

  // Specifies that the embeddings will be used for clustering.
  CLUSTERING = 5;

  // Specifies that the given text will be used for question answering.
  QUESTION_ANSWERING = 6;

  // Specifies that the given text will be used for fact verification.
  FACT_VERIFICATION = 7;

  // Specifies that the given text will be used for code retrieval.
  CODE_RETRIEVAL_QUERY = 8;
}

// Request to generate a completion from the model.
message GenerateContentRequest {
  // Required. The name of the `Model` to use for generating the completion.
  //
  // Format: `models/{model}`.
  string model = 1 [
    (google.api.field_behavior) = REQUIRED,
    (google.api.resource_reference) = {
      type: "generativelanguage.googleapis.com/Model"
    }
  ];

  // Optional. Developer set [system
  // instruction(s)](https://ai.google.dev/gemini-api/docs/system-instructions).
  // Currently, text only.
  optional Content system_instruction = 8
      [(google.api.field_behavior) = OPTIONAL];

  // Required. The content of the current conversation with the model.
  //
  // For single-turn queries, this is a single instance. For multi-turn queries
  // like [chat](https://ai.google.dev/gemini-api/docs/text-generation#chat),
  // this is a repeated field that contains the conversation history and the
  // latest request.
  repeated Content contents = 2 [(google.api.field_behavior) = REQUIRED];

  // Optional. A list of `Tools` the `Model` may use to generate the next
  // response.
  //
  // A `Tool` is a piece of code that enables the system to interact with
  // external systems to perform an action, or set of actions, outside of
  // knowledge and scope of the `Model`. Supported `Tool`s are `Function` and
  // `code_execution`. Refer to the [Function
  // calling](https://ai.google.dev/gemini-api/docs/function-calling) and the
  // [Code execution](https://ai.google.dev/gemini-api/docs/code-execution)
  // guides to learn more.
  repeated Tool tools = 5 [(google.api.field_behavior) = OPTIONAL];

  // Optional. Tool configuration for any `Tool` specified in the request. Refer
  // to the [Function calling
  // guide](https://ai.google.dev/gemini-api/docs/function-calling#function_calling_mode)
  // for a usage example.
  ToolConfig tool_config = 7 [(google.api.field_behavior) = OPTIONAL];

  // Optional. A list of unique `SafetySetting` instances for blocking unsafe
  // content.
  //
  // This will be enforced on the `GenerateContentRequest.contents` and
  // `GenerateContentResponse.candidates`. There should not be more than one
  // setting for each `SafetyCategory` type. The API will block any contents and
  // responses that fail to meet the thresholds set by these settings. This list
  // overrides the default settings for each `SafetyCategory` specified in the
  // safety_settings. If there is no `SafetySetting` for a given
  // `SafetyCategory` provided in the list, the API will use the default safety
  // setting for that category. Harm categories HARM_CATEGORY_HATE_SPEECH,
  // HARM_CATEGORY_SEXUALLY_EXPLICIT, HARM_CATEGORY_DANGEROUS_CONTENT,
  // HARM_CATEGORY_HARASSMENT, HARM_CATEGORY_CIVIC_INTEGRITY are supported.
  // Refer to the [guide](https://ai.google.dev/gemini-api/docs/safety-settings)
  // for detailed information on available safety settings. Also refer to the
  // [Safety guidance](https://ai.google.dev/gemini-api/docs/safety-guidance) to
  // learn how to incorporate safety considerations in your AI applications.
  repeated SafetySetting safety_settings = 3
      [(google.api.field_behavior) = OPTIONAL];

  // Optional. Configuration options for model generation and outputs.
  optional GenerationConfig generation_config = 4
      [(google.api.field_behavior) = OPTIONAL];

  // Optional. The name of the content
  // [cached](https://ai.google.dev/gemini-api/docs/caching) to use as context
  // to serve the prediction. Format: `cachedContents/{cachedContent}`
  optional string cached_content = 9 [
    (google.api.field_behavior) = OPTIONAL,
    (google.api.resource_reference) = {
      type: "generativelanguage.googleapis.com/CachedContent"
    }
  ];
}

// The configuration for the prebuilt speaker to use.
message PrebuiltVoiceConfig {
  // The name of the preset voice to use.
  optional string voice_name = 1;
}

// The configuration for the voice to use.
message VoiceConfig {
  // The configuration for the speaker to use.
  oneof voice_config {
    // The configuration for the prebuilt voice to use.
    PrebuiltVoiceConfig prebuilt_voice_config = 1;
  }
}

// The speech generation config.
message SpeechConfig {
  // The configuration in case of single-voice output.
  VoiceConfig voice_config = 1;

  // Optional. Language code (in BCP 47 format, e.g. "en-US") for speech
  // synthesis.
  //
  // Valid values are: de-DE, en-AU, en-GB, en-IN, en-US, es-US, fr-FR, hi-IN,
  // pt-BR, ar-XA, es-ES, fr-CA, id-ID, it-IT, ja-JP, tr-TR, vi-VN, bn-IN,
  // gu-IN, kn-IN, ml-IN, mr-IN, ta-IN, te-IN, nl-NL, ko-KR, cmn-CN, pl-PL,
  // ru-RU, and th-TH.
  string language_code = 2 [(google.api.field_behavior) = OPTIONAL];
}

// Config for thinking features.
message ThinkingConfig {
  // Indicates whether to include thoughts in the response.
  // If true, thoughts are returned only when available.
  optional bool include_thoughts = 1;

  // The number of thoughts tokens that the model should generate.
  optional int32 thinking_budget = 2;
}

// Configuration options for model generation and outputs. Not all parameters
// are configurable for every model.
message GenerationConfig {
  // Supported modalities of the response.
  enum Modality {
    // Default value.
    MODALITY_UNSPECIFIED = 0;

    // Indicates the model should return text.
    TEXT = 1;

    // Indicates the model should return images.
    IMAGE = 2;

    // Indicates the model should return audio.
    AUDIO = 3;
  }

  // Media resolution for the input media.
  enum MediaResolution {
    // Media resolution has not been set.
    MEDIA_RESOLUTION_UNSPECIFIED = 0;

    // Media resolution set to low (64 tokens).
    MEDIA_RESOLUTION_LOW = 1;

    // Media resolution set to medium (256 tokens).
    MEDIA_RESOLUTION_MEDIUM = 2;

    // Media resolution set to high (zoomed reframing with 256 tokens).
    MEDIA_RESOLUTION_HIGH = 3;
  }

  // Optional. Number of generated responses to return. If unset, this will
  // default to 1. Please note that this doesn't work for previous generation
  // models (Gemini 1.0 family)
  optional int32 candidate_count = 1 [(google.api.field_behavior) = OPTIONAL];

  // Optional. The set of character sequences (up to 5) that will stop output
  // generation. If specified, the API will stop at the first appearance of a
  // `stop_sequence`. The stop sequence will not be included as part of the
  // response.
  repeated string stop_sequences = 2 [(google.api.field_behavior) = OPTIONAL];

  // Optional. The maximum number of tokens to include in a response candidate.
  //
  // Note: The default value varies by model, see the `Model.output_token_limit`
  // attribute of the `Model` returned from the `getModel` function.
  optional int32 max_output_tokens = 4 [(google.api.field_behavior) = OPTIONAL];

  // Optional. Controls the randomness of the output.
  //
  // Note: The default value varies by model, see the `Model.temperature`
  // attribute of the `Model` returned from the `getModel` function.
  //
  // Values can range from [0.0, 2.0].
  optional float temperature = 5 [(google.api.field_behavior) = OPTIONAL];

  // Optional. The maximum cumulative probability of tokens to consider when
  // sampling.
  //
  // The model uses combined Top-k and Top-p (nucleus) sampling.
  //
  // Tokens are sorted based on their assigned probabilities so that only the
  // most likely tokens are considered. Top-k sampling directly limits the
  // maximum number of tokens to consider, while Nucleus sampling limits the
  // number of tokens based on the cumulative probability.
  //
  // Note: The default value varies by `Model` and is specified by
  // the`Model.top_p` attribute returned from the `getModel` function. An empty
  // `top_k` attribute indicates that the model doesn't apply top-k sampling
  // and doesn't allow setting `top_k` on requests.
  optional float top_p = 6 [(google.api.field_behavior) = OPTIONAL];

  // Optional. The maximum number of tokens to consider when sampling.
  //
  // Gemini models use Top-p (nucleus) sampling or a combination of Top-k and
  // nucleus sampling. Top-k sampling considers the set of `top_k` most probable
  // tokens. Models running with nucleus sampling don't allow top_k setting.
  //
  // Note: The default value varies by `Model` and is specified by
  // the`Model.top_p` attribute returned from the `getModel` function. An empty
  // `top_k` attribute indicates that the model doesn't apply top-k sampling
  // and doesn't allow setting `top_k` on requests.
  optional int32 top_k = 7 [(google.api.field_behavior) = OPTIONAL];

  // Optional. Seed used in decoding. If not set, the request uses a randomly
  // generated seed.
  optional int32 seed = 8 [(google.api.field_behavior) = OPTIONAL];

  // Optional. MIME type of the generated candidate text.
  // Supported MIME types are:
  // `text/plain`: (default) Text output.
  // `application/json`: JSON response in the response candidates.
  // `text/x.enum`: ENUM as a string response in the response candidates.
  // Refer to the
  // [docs](https://ai.google.dev/gemini-api/docs/prompting_with_media#plain_text_formats)
  // for a list of all supported text MIME types.
  string response_mime_type = 13 [(google.api.field_behavior) = OPTIONAL];

  // Optional. Output schema of the generated candidate text. Schemas must be a
  // subset of the [OpenAPI schema](https://spec.openapis.org/oas/v3.0.3#schema)
  // and can be objects, primitives or arrays.
  //
  // If set, a compatible `response_mime_type` must also be set.
  // Compatible MIME types:
  // `application/json`: Schema for JSON response.
  // Refer to the [JSON text generation
  // guide](https://ai.google.dev/gemini-api/docs/json-mode) for more details.
  Schema response_schema = 14 [(google.api.field_behavior) = OPTIONAL];

  // Optional. Presence penalty applied to the next token's logprobs if the
  // token has already been seen in the response.
  //
  // This penalty is binary on/off and not dependant on the number of times the
  // token is used (after the first). Use
  // [frequency_penalty][google.ai.generativelanguage.v1beta.GenerationConfig.frequency_penalty]
  // for a penalty that increases with each use.
  //
  // A positive penalty will discourage the use of tokens that have already
  // been used in the response, increasing the vocabulary.
  //
  // A negative penalty will encourage the use of tokens that have already been
  // used in the response, decreasing the vocabulary.
  optional float presence_penalty = 15 [(google.api.field_behavior) = OPTIONAL];

  // Optional. Frequency penalty applied to the next token's logprobs,
  // multiplied by the number of times each token has been seen in the respponse
  // so far.
  //
  // A positive penalty will discourage the use of tokens that have already
  // been used, proportional to the number of times the token has been used:
  // The more a token is used, the more difficult it is for the model to use
  // that token again increasing the vocabulary of responses.
  //
  // Caution: A _negative_ penalty will encourage the model to reuse tokens
  // proportional to the number of times the token has been used. Small
  // negative values will reduce the vocabulary of a response. Larger negative
  // values will cause the model to start repeating a common token  until it
  // hits the
  // [max_output_tokens][google.ai.generativelanguage.v1beta.GenerationConfig.max_output_tokens]
  // limit.
  optional float frequency_penalty = 16
      [(google.api.field_behavior) = OPTIONAL];

  // Optional. If true, export the logprobs results in response.
  optional bool response_logprobs = 17 [(google.api.field_behavior) = OPTIONAL];

  // Optional. Only valid if
  // [response_logprobs=True][google.ai.generativelanguage.v1beta.GenerationConfig.response_logprobs].
  // This sets the number of top logprobs to return at each decoding step in the
  // [Candidate.logprobs_result][google.ai.generativelanguage.v1beta.Candidate.logprobs_result].
  optional int32 logprobs = 18 [(google.api.field_behavior) = OPTIONAL];

  // Optional. Enables enhanced civic answers. It may not be available for all
  // models.
  optional bool enable_enhanced_civic_answers = 19
      [(google.api.field_behavior) = OPTIONAL];

  // Optional. The requested modalities of the response. Represents the set of
  // modalities that the model can return, and should be expected in the
  // response. This is an exact match to the modalities of the response.
  //
  // A model may have multiple combinations of supported modalities. If the
  // requested modalities do not match any of the supported combinations, an
  // error will be returned.
  //
  // An empty list is equivalent to requesting only text.
  repeated Modality response_modalities = 20
      [(google.api.field_behavior) = OPTIONAL];

  // Optional. The speech generation config.
  optional SpeechConfig speech_config = 21
      [(google.api.field_behavior) = OPTIONAL];

  // Optional. Config for thinking features.
  // An error will be returned if this field is set for models that don't
  // support thinking.
  optional ThinkingConfig thinking_config = 22
      [(google.api.field_behavior) = OPTIONAL];

  // Optional. If specified, the media resolution specified will be used.
  optional MediaResolution media_resolution = 23
      [(google.api.field_behavior) = OPTIONAL];
}

// Configuration for retrieving grounding content from a `Corpus` or
// `Document` created using the Semantic Retriever API.
message SemanticRetrieverConfig {
  // Required. Name of the resource for retrieval. Example: `corpora/123` or
  // `corpora/123/documents/abc`.
  string source = 1 [(google.api.field_behavior) = REQUIRED];

  // Required. Query to use for matching `Chunk`s in the given resource by
  // similarity.
  Content query = 2 [(google.api.field_behavior) = REQUIRED];

  // Optional. Filters for selecting `Document`s and/or `Chunk`s from the
  // resource.
  repeated MetadataFilter metadata_filters = 3
      [(google.api.field_behavior) = OPTIONAL];

  // Optional. Maximum number of relevant `Chunk`s to retrieve.
  optional int32 max_chunks_count = 4 [(google.api.field_behavior) = OPTIONAL];

  // Optional. Minimum relevance score for retrieved relevant `Chunk`s.
  optional float minimum_relevance_score = 5
      [(google.api.field_behavior) = OPTIONAL];
}

// Response from the model supporting multiple candidate responses.
//
// Safety ratings and content filtering are reported for both
// prompt in `GenerateContentResponse.prompt_feedback` and for each candidate
// in `finish_reason` and in `safety_ratings`. The API:
//  - Returns either all requested candidates or none of them
//  - Returns no candidates at all only if there was something wrong with the
//    prompt (check `prompt_feedback`)
//  - Reports feedback on each candidate in `finish_reason` and
//    `safety_ratings`.
message GenerateContentResponse {
  // A set of the feedback metadata the prompt specified in
  // `GenerateContentRequest.content`.
  message PromptFeedback {
    // Specifies the reason why the prompt was blocked.
    enum BlockReason {
      // Default value. This value is unused.
      BLOCK_REASON_UNSPECIFIED = 0;

      // Prompt was blocked due to safety reasons. Inspect `safety_ratings`
      // to understand which safety category blocked it.
      SAFETY = 1;

      // Prompt was blocked due to unknown reasons.
      OTHER = 2;

      // Prompt was blocked due to the terms which are included from the
      // terminology blocklist.
      BLOCKLIST = 3;

      // Prompt was blocked due to prohibited content.
      PROHIBITED_CONTENT = 4;

      // Candidates blocked due to unsafe image generation content.
      IMAGE_SAFETY = 5;
    }

    // Optional. If set, the prompt was blocked and no candidates are returned.
    // Rephrase the prompt.
    BlockReason block_reason = 1 [(google.api.field_behavior) = OPTIONAL];

    // Ratings for safety of the prompt.
    // There is at most one rating per category.
    repeated SafetyRating safety_ratings = 2;
  }

  // Metadata on the generation request's token usage.
  message UsageMetadata {
    // Number of tokens in the prompt. When `cached_content` is set, this is
    // still the total effective prompt size meaning this includes the number of
    // tokens in the cached content.
    int32 prompt_token_count = 1;

    // Number of tokens in the cached part of the prompt (the cached content)
    int32 cached_content_token_count = 4;

    // Total number of tokens across all the generated response candidates.
    int32 candidates_token_count = 2;

    // Output only. Number of tokens present in tool-use prompt(s).
    int32 tool_use_prompt_token_count = 8
        [(google.api.field_behavior) = OUTPUT_ONLY];

    // Output only. Number of tokens of thoughts for thinking models.
    int32 thoughts_token_count = 10 [(google.api.field_behavior) = OUTPUT_ONLY];

    // Total token count for the generation request (prompt + response
    // candidates).
    int32 total_token_count = 3;

    // Output only. List of modalities that were processed in the request input.
    repeated ModalityTokenCount prompt_tokens_details = 5
        [(google.api.field_behavior) = OUTPUT_ONLY];

    // Output only. List of modalities of the cached content in the request
    // input.
    repeated ModalityTokenCount cache_tokens_details = 6
        [(google.api.field_behavior) = OUTPUT_ONLY];

    // Output only. List of modalities that were returned in the response.
    repeated ModalityTokenCount candidates_tokens_details = 7
        [(google.api.field_behavior) = OUTPUT_ONLY];

    // Output only. List of modalities that were processed for tool-use request
    // inputs.
    repeated ModalityTokenCount tool_use_prompt_tokens_details = 9
        [(google.api.field_behavior) = OUTPUT_ONLY];
  }

  // Candidate responses from the model.
  repeated Candidate candidates = 1;

  // Returns the prompt's feedback related to the content filters.
  PromptFeedback prompt_feedback = 2;

  // Output only. Metadata on the generation requests' token usage.
  UsageMetadata usage_metadata = 3 [(google.api.field_behavior) = OUTPUT_ONLY];

  // Output only. The model version used to generate the response.
  string model_version = 4 [(google.api.field_behavior) = OUTPUT_ONLY];
}

// A response candidate generated from the model.
message Candidate {
  // Defines the reason why the model stopped generating tokens.
  enum FinishReason {
    // Default value. This value is unused.
    FINISH_REASON_UNSPECIFIED = 0;

    // Natural stop point of the model or provided stop sequence.
    STOP = 1;

    // The maximum number of tokens as specified in the request was reached.
    MAX_TOKENS = 2;

    // The response candidate content was flagged for safety reasons.
    SAFETY = 3;

    // The response candidate content was flagged for recitation reasons.
    RECITATION = 4;

    // The response candidate content was flagged for using an unsupported
    // language.
    LANGUAGE = 6;

    // Unknown reason.
    OTHER = 5;

    // Token generation stopped because the content contains forbidden terms.
    BLOCKLIST = 7;

    // Token generation stopped for potentially containing prohibited content.
    PROHIBITED_CONTENT = 8;

    // Token generation stopped because the content potentially contains
    // Sensitive Personally Identifiable Information (SPII).
    SPII = 9;

    // The function call generated by the model is invalid.
    MALFORMED_FUNCTION_CALL = 10;

    // Token generation stopped because generated images contain safety
    // violations.
    IMAGE_SAFETY = 11;
  }

  // Output only. Index of the candidate in the list of response candidates.
  optional int32 index = 3 [(google.api.field_behavior) = OUTPUT_ONLY];

  // Output only. Generated content returned from the model.
  Content content = 1 [(google.api.field_behavior) = OUTPUT_ONLY];

  // Optional. Output only. The reason why the model stopped generating tokens.
  //
  // If empty, the model has not stopped generating tokens.
  FinishReason finish_reason = 2 [
    (google.api.field_behavior) = OPTIONAL,
    (google.api.field_behavior) = OUTPUT_ONLY
  ];

  // List of ratings for the safety of a response candidate.
  //
  // There is at most one rating per category.
  repeated SafetyRating safety_ratings = 5;

  // Output only. Citation information for model-generated candidate.
  //
  // This field may be populated with recitation information for any text
  // included in the `content`. These are passages that are "recited" from
  // copyrighted material in the foundational LLM's training data.
  CitationMetadata citation_metadata = 6
      [(google.api.field_behavior) = OUTPUT_ONLY];

  // Output only. Token count for this candidate.
  int32 token_count = 7 [(google.api.field_behavior) = OUTPUT_ONLY];

  // Output only. Attribution information for sources that contributed to a
  // grounded answer.
  //
  // This field is populated for `GenerateAnswer` calls.
  repeated GroundingAttribution grounding_attributions = 8
      [(google.api.field_behavior) = OUTPUT_ONLY];

  // Output only. Grounding metadata for the candidate.
  //
  // This field is populated for `GenerateContent` calls.
  GroundingMetadata grounding_metadata = 9
      [(google.api.field_behavior) = OUTPUT_ONLY];

  // Output only. Average log probability score of the candidate.
  double avg_logprobs = 10 [(google.api.field_behavior) = OUTPUT_ONLY];

  // Output only. Log-likelihood scores for the response tokens and top tokens
  LogprobsResult logprobs_result = 11
      [(google.api.field_behavior) = OUTPUT_ONLY];
}

// Logprobs Result
message LogprobsResult {
  // Candidate for the logprobs token and score.
  message Candidate {
    // The candidate’s token string value.
    optional string token = 1;

    // The candidate’s token id value.
    optional int32 token_id = 3;

    // The candidate's log probability.
    optional float log_probability = 2;
  }

  // Candidates with top log probabilities at each decoding step.
  message TopCandidates {
    // Sorted by log probability in descending order.
    repeated Candidate candidates = 1;
  }

  // Length = total number of decoding steps.
  repeated TopCandidates top_candidates = 1;

  // Length = total number of decoding steps.
  // The chosen candidates may or may not be in top_candidates.
  repeated Candidate chosen_candidates = 2;
}

// Identifier for the source contributing to this attribution.
message AttributionSourceId {
  // Identifier for a part within a `GroundingPassage`.
  message GroundingPassageId {
    // Output only. ID of the passage matching the `GenerateAnswerRequest`'s
    // `GroundingPassage.id`.
    string passage_id = 1 [(google.api.field_behavior) = OUTPUT_ONLY];

    // Output only. Index of the part within the `GenerateAnswerRequest`'s
    // `GroundingPassage.content`.
    int32 part_index = 2 [(google.api.field_behavior) = OUTPUT_ONLY];
  }

  // Identifier for a `Chunk` retrieved via Semantic Retriever specified in the
  // `GenerateAnswerRequest` using `SemanticRetrieverConfig`.
  message SemanticRetrieverChunk {
    // Output only. Name of the source matching the request's
    // `SemanticRetrieverConfig.source`. Example: `corpora/123` or
    // `corpora/123/documents/abc`
    string source = 1 [(google.api.field_behavior) = OUTPUT_ONLY];

    // Output only. Name of the `Chunk` containing the attributed text.
    // Example: `corpora/123/documents/abc/chunks/xyz`
    string chunk = 2 [(google.api.field_behavior) = OUTPUT_ONLY];
  }

  oneof source {
    // Identifier for an inline passage.
    GroundingPassageId grounding_passage = 1;

    // Identifier for a `Chunk` fetched via Semantic Retriever.
    SemanticRetrieverChunk semantic_retriever_chunk = 2;
  }
}

// Attribution for a source that contributed to an answer.
message GroundingAttribution {
  // Output only. Identifier for the source contributing to this attribution.
  AttributionSourceId source_id = 3 [(google.api.field_behavior) = OUTPUT_ONLY];

  // Grounding source content that makes up this attribution.
  Content content = 2;
}

// Metadata related to retrieval in the grounding flow.
message RetrievalMetadata {
  // Optional. Score indicating how likely information from google search could
  // help answer the prompt. The score is in the range [0, 1], where 0 is the
  // least likely and 1 is the most likely. This score is only populated when
  // google search grounding and dynamic retrieval is enabled. It will be
  // compared to the threshold to determine whether to trigger google search.
  float google_search_dynamic_retrieval_score = 2
      [(google.api.field_behavior) = OPTIONAL];
}

// Metadata returned to client when grounding is enabled.
message GroundingMetadata {
  // Optional. Google search entry for the following-up web searches.
  optional SearchEntryPoint search_entry_point = 1
      [(google.api.field_behavior) = OPTIONAL];

  // List of supporting references retrieved from specified grounding source.
  repeated GroundingChunk grounding_chunks = 2;

  // List of grounding support.
  repeated GroundingSupport grounding_supports = 3;

  // Metadata related to retrieval in the grounding flow.
  optional RetrievalMetadata retrieval_metadata = 4;

  // Web search queries for the following-up web search.
  repeated string web_search_queries = 5;
}

// Google search entry point.
message SearchEntryPoint {
  // Optional. Web content snippet that can be embedded in a web page or an app
  // webview.
  string rendered_content = 1 [(google.api.field_behavior) = OPTIONAL];

  // Optional. Base64 encoded JSON representing array of <search term, search
  // url> tuple.
  bytes sdk_blob = 2 [(google.api.field_behavior) = OPTIONAL];
}

// Grounding chunk.
message GroundingChunk {
  // Chunk from the web.
  message Web {
    // URI reference of the chunk.
    optional string uri = 1;

    // Title of the chunk.
    optional string title = 2;
  }

  // Chunk type.
  oneof chunk_type {
    // Grounding chunk from the web.
    Web web = 1;
  }
}

// Segment of the content.
message Segment {
  // Output only. The index of a Part object within its parent Content object.
  int32 part_index = 1 [(google.api.field_behavior) = OUTPUT_ONLY];

  // Output only. Start index in the given Part, measured in bytes. Offset from
  // the start of the Part, inclusive, starting at zero.
  int32 start_index = 2 [(google.api.field_behavior) = OUTPUT_ONLY];

  // Output only. End index in the given Part, measured in bytes. Offset from
  // the start of the Part, exclusive, starting at zero.
  int32 end_index = 3 [(google.api.field_behavior) = OUTPUT_ONLY];

  // Output only. The text corresponding to the segment from the response.
  string text = 4 [(google.api.field_behavior) = OUTPUT_ONLY];
}

// Grounding support.
message GroundingSupport {
  // Segment of the content this support belongs to.
  optional Segment segment = 1;

  // A list of indices (into 'grounding_chunk') specifying the
  // citations associated with the claim. For instance [1,3,4] means
  // that grounding_chunk[1], grounding_chunk[3],
  // grounding_chunk[4] are the retrieved content attributed to the claim.
  repeated int32 grounding_chunk_indices = 2;

  // Confidence score of the support references. Ranges from 0 to 1. 1 is the
  // most confident. This list must have the same size as the
  // grounding_chunk_indices.
  repeated float confidence_scores = 3;
}

// Request to generate a grounded answer from the `Model`.
message GenerateAnswerRequest {
  // Style for grounded answers.
  enum AnswerStyle {
    // Unspecified answer style.
    ANSWER_STYLE_UNSPECIFIED = 0;

    // Succint but abstract style.
    ABSTRACTIVE = 1;

    // Very brief and extractive style.
    EXTRACTIVE = 2;

    // Verbose style including extra details. The response may be formatted as a
    // sentence, paragraph, multiple paragraphs, or bullet points, etc.
    VERBOSE = 3;
  }

  // The sources in which to ground the answer.
  oneof grounding_source {
    // Passages provided inline with the request.
    GroundingPassages inline_passages = 6;

    // Content retrieved from resources created via the Semantic Retriever
    // API.
    SemanticRetrieverConfig semantic_retriever = 7;
  }

  // Required. The name of the `Model` to use for generating the grounded
  // response.
  //
  // Format: `model=models/{model}`.
  string model = 1 [
    (google.api.field_behavior) = REQUIRED,
    (google.api.resource_reference) = {
      type: "generativelanguage.googleapis.com/Model"
    }
  ];

  // Required. The content of the current conversation with the `Model`. For
  // single-turn queries, this is a single question to answer. For multi-turn
  // queries, this is a repeated field that contains conversation history and
  // the last `Content` in the list containing the question.
  //
  // Note: `GenerateAnswer` only supports queries in English.
  repeated Content contents = 2 [(google.api.field_behavior) = REQUIRED];

  // Required. Style in which answers should be returned.
  AnswerStyle answer_style = 5 [(google.api.field_behavior) = REQUIRED];

  // Optional. A list of unique `SafetySetting` instances for blocking unsafe
  // content.
  //
  // This will be enforced on the `GenerateAnswerRequest.contents` and
  // `GenerateAnswerResponse.candidate`. There should not be more than one
  // setting for each `SafetyCategory` type. The API will block any contents and
  // responses that fail to meet the thresholds set by these settings. This list
  // overrides the default settings for each `SafetyCategory` specified in the
  // safety_settings. If there is no `SafetySetting` for a given
  // `SafetyCategory` provided in the list, the API will use the default safety
  // setting for that category. Harm categories HARM_CATEGORY_HATE_SPEECH,
  // HARM_CATEGORY_SEXUALLY_EXPLICIT, HARM_CATEGORY_DANGEROUS_CONTENT,
  // HARM_CATEGORY_HARASSMENT are supported.
  // Refer to the
  // [guide](https://ai.google.dev/gemini-api/docs/safety-settings)
  // for detailed information on available safety settings. Also refer to the
  // [Safety guidance](https://ai.google.dev/gemini-api/docs/safety-guidance) to
  // learn how to incorporate safety considerations in your AI applications.
  repeated SafetySetting safety_settings = 3
      [(google.api.field_behavior) = OPTIONAL];

  // Optional. Controls the randomness of the output.
  //
  // Values can range from [0.0,1.0], inclusive. A value closer to 1.0 will
  // produce responses that are more varied and creative, while a value closer
  // to 0.0 will typically result in more straightforward responses from the
  // model. A low temperature (~0.2) is usually recommended for
  // Attributed-Question-Answering use cases.
  optional float temperature = 4 [(google.api.field_behavior) = OPTIONAL];
}

// Response from the model for a grounded answer.
message GenerateAnswerResponse {
  // Feedback related to the input data used to answer the question, as opposed
  // to the model-generated response to the question.
  message InputFeedback {
    // Specifies what was the reason why input was blocked.
    enum BlockReason {
      // Default value. This value is unused.
      BLOCK_REASON_UNSPECIFIED = 0;

      // Input was blocked due to safety reasons. Inspect
      // `safety_ratings` to understand which safety category blocked it.
      SAFETY = 1;

      // Input was blocked due to other reasons.
      OTHER = 2;
    }

    // Optional. If set, the input was blocked and no candidates are returned.
    // Rephrase the input.
    optional BlockReason block_reason = 1
        [(google.api.field_behavior) = OPTIONAL];

    // Ratings for safety of the input.
    // There is at most one rating per category.
    repeated SafetyRating safety_ratings = 2;
  }

  // Candidate answer from the model.
  //
  // Note: The model *always* attempts to provide a grounded answer, even when
  // the answer is unlikely to be answerable from the given passages.
  // In that case, a low-quality or ungrounded answer may be provided, along
  // with a low `answerable_probability`.
  Candidate answer = 1;

  // Output only. The model's estimate of the probability that its answer is
  // correct and grounded in the input passages.
  //
  // A low `answerable_probability` indicates that the answer might not be
  // grounded in the sources.
  //
  // When `answerable_probability` is low, you may want to:
  //
  // * Display a message to the effect of "We couldn’t answer that question" to
  // the user.
  // * Fall back to a general-purpose LLM that answers the question from world
  // knowledge. The threshold and nature of such fallbacks will depend on
  // individual use cases. `0.5` is a good starting threshold.
  optional float answerable_probability = 2
      [(google.api.field_behavior) = OUTPUT_ONLY];

  // Output only. Feedback related to the input data used to answer the
  // question, as opposed to the model-generated response to the question.
  //
  // The input data can be one or more of the following:
  //
  // - Question specified by the last entry in `GenerateAnswerRequest.content`
  // - Conversation history specified by the other entries in
  // `GenerateAnswerRequest.content`
  // - Grounding sources (`GenerateAnswerRequest.semantic_retriever` or
  // `GenerateAnswerRequest.inline_passages`)
  optional InputFeedback input_feedback = 3
      [(google.api.field_behavior) = OUTPUT_ONLY];
}

// Request containing the `Content` for the model to embed.
message EmbedContentRequest {
  // Required. The model's resource name. This serves as an ID for the Model to
  // use.
  //
  // This name should match a model name returned by the `ListModels` method.
  //
  // Format: `models/{model}`
  string model = 1 [
    (google.api.field_behavior) = REQUIRED,
    (google.api.resource_reference) = {
      type: "generativelanguage.googleapis.com/Model"
    }
  ];

  // Required. The content to embed. Only the `parts.text` fields will be
  // counted.
  Content content = 2 [(google.api.field_behavior) = REQUIRED];

  // Optional. Optional task type for which the embeddings will be used. Not
  // supported on earlier models (`models/embedding-001`).
  optional TaskType task_type = 3 [(google.api.field_behavior) = OPTIONAL];

  // Optional. An optional title for the text. Only applicable when TaskType is
  // `RETRIEVAL_DOCUMENT`.
  //
  // Note: Specifying a `title` for `RETRIEVAL_DOCUMENT` provides better quality
  // embeddings for retrieval.
  optional string title = 4 [(google.api.field_behavior) = OPTIONAL];

  // Optional. Optional reduced dimension for the output embedding. If set,
  // excessive values in the output embedding are truncated from the end.
  // Supported by newer models since 2024 only. You cannot set this value if
  // using the earlier model (`models/embedding-001`).
  optional int32 output_dimensionality = 5
      [(google.api.field_behavior) = OPTIONAL];
}

// A list of floats representing an embedding.
message ContentEmbedding {
  // The embedding values.
  repeated float values = 1;
}

// The response to an `EmbedContentRequest`.
message EmbedContentResponse {
  // Output only. The embedding generated from the input content.
  ContentEmbedding embedding = 1 [(google.api.field_behavior) = OUTPUT_ONLY];
}

// Batch request to get embeddings from the model for a list of prompts.
message BatchEmbedContentsRequest {
  // Required. The model's resource name. This serves as an ID for the Model to
  // use.
  //
  // This name should match a model name returned by the `ListModels` method.
  //
  // Format: `models/{model}`
  string model = 1 [
    (google.api.field_behavior) = REQUIRED,
    (google.api.resource_reference) = {
      type: "generativelanguage.googleapis.com/Model"
    }
  ];

  // Required. Embed requests for the batch. The model in each of these requests
  // must match the model specified `BatchEmbedContentsRequest.model`.
  repeated EmbedContentRequest requests = 2
      [(google.api.field_behavior) = REQUIRED];
}

// The response to a `BatchEmbedContentsRequest`.
message BatchEmbedContentsResponse {
  // Output only. The embeddings for each request, in the same order as provided
  // in the batch request.
  repeated ContentEmbedding embeddings = 1
      [(google.api.field_behavior) = OUTPUT_ONLY];
}

// Counts the number of tokens in the `prompt` sent to a model.
//
// Models may tokenize text differently, so each model may return a different
// `token_count`.
message CountTokensRequest {
  // Required. The model's resource name. This serves as an ID for the Model to
  // use.
  //
  // This name should match a model name returned by the `ListModels` method.
  //
  // Format: `models/{model}`
  string model = 1 [
    (google.api.field_behavior) = REQUIRED,
    (google.api.resource_reference) = {
      type: "generativelanguage.googleapis.com/Model"
    }
  ];

  // Optional. The input given to the model as a prompt. This field is ignored
  // when `generate_content_request` is set.
  repeated Content contents = 2 [(google.api.field_behavior) = OPTIONAL];

  // Optional. The overall input given to the `Model`. This includes the prompt
  // as well as other model steering information like [system
  // instructions](https://ai.google.dev/gemini-api/docs/system-instructions),
  // and/or function declarations for [function
  // calling](https://ai.google.dev/gemini-api/docs/function-calling).
  // `Model`s/`Content`s and `generate_content_request`s are mutually
  // exclusive. You can either send `Model` + `Content`s or a
  // `generate_content_request`, but never both.
  GenerateContentRequest generate_content_request = 3
      [(google.api.field_behavior) = OPTIONAL];
}

// A response from `CountTokens`.
//
// It returns the model's `token_count` for the `prompt`.
message CountTokensResponse {
  // The number of tokens that the `Model` tokenizes the `prompt` into. Always
  // non-negative.
  int32 total_tokens = 1;

  // Number of tokens in the cached part of the prompt (the cached content).
  int32 cached_content_token_count = 5;

  // Output only. List of modalities that were processed in the request input.
  repeated ModalityTokenCount prompt_tokens_details = 6
      [(google.api.field_behavior) = OUTPUT_ONLY];

  // Output only. List of modalities that were processed in the cached content.
  repeated ModalityTokenCount cache_tokens_details = 7
      [(google.api.field_behavior) = OUTPUT_ONLY];
}

// Configures the realtime input behavior in `BidiGenerateContent`.
message RealtimeInputConfig {
  // Configures automatic detection of activity.
  message AutomaticActivityDetection {
    // Determines how start of speech is detected.
    enum StartSensitivity {
      // The default is START_SENSITIVITY_HIGH.
      START_SENSITIVITY_UNSPECIFIED = 0;

      // Automatic detection will detect the start of speech more often.
      START_SENSITIVITY_HIGH = 1;

      // Automatic detection will detect the start of speech less often.
      START_SENSITIVITY_LOW = 2;
    }

    // Determines how end of speech is detected.
    enum EndSensitivity {
      // The default is END_SENSITIVITY_HIGH.
      END_SENSITIVITY_UNSPECIFIED = 0;

      // Automatic detection ends speech more often.
      END_SENSITIVITY_HIGH = 1;

      // Automatic detection ends speech less often.
      END_SENSITIVITY_LOW = 2;
    }

    // Optional. If enabled (the default), detected voice and text input count
    // as activity. If disabled, the client must send activity signals.
    optional bool disabled = 2 [(google.api.field_behavior) = OPTIONAL];

    // Optional. Determines how likely speech is to be detected.
    optional StartSensitivity start_of_speech_sensitivity = 3
        [(google.api.field_behavior) = OPTIONAL];

    // Optional. The required duration of detected speech before start-of-speech
    // is committed. The lower this value, the more sensitive the
    // start-of-speech detection is and shorter speech can be recognized.
    // However, this also increases the probability of false positives.
    optional int32 prefix_padding_ms = 4
        [(google.api.field_behavior) = OPTIONAL];

    // Optional. Determines how likely detected speech is ended.
    optional EndSensitivity end_of_speech_sensitivity = 5
        [(google.api.field_behavior) = OPTIONAL];

    // Optional. The required duration of detected non-speech (e.g. silence)
    // before end-of-speech is committed. The larger this value, the longer
    // speech gaps can be without interrupting the user's activity but this will
    // increase the model's latency.
    optional int32 silence_duration_ms = 6
        [(google.api.field_behavior) = OPTIONAL];
  }

  // The different ways of handling user activity.
  enum ActivityHandling {
    // If unspecified, the default behavior is `START_OF_ACTIVITY_INTERRUPTS`.
    ACTIVITY_HANDLING_UNSPECIFIED = 0;

    // If true, start of activity will interrupt the model's response (also
    // called "barge in"). The model's current response will be cut-off in the
    // moment of the interruption. This is the default behavior.
    START_OF_ACTIVITY_INTERRUPTS = 1;

    // The model's response will not be interrupted.
    NO_INTERRUPTION = 2;
  }

  // Options about which input is included in the user's turn.
  enum TurnCoverage {
    // If unspecified, the default behavior is `TURN_INCLUDES_ONLY_ACTIVITY`.
    TURN_COVERAGE_UNSPECIFIED = 0;

    // The users turn only includes activity since the last turn, excluding
    // inactivity (e.g. silence on the audio stream). This is the default
    // behavior.
    TURN_INCLUDES_ONLY_ACTIVITY = 1;

    // The users turn includes all realtime input since the last turn, including
    // inactivity (e.g. silence on the audio stream).
    TURN_INCLUDES_ALL_INPUT = 2;
  }

  // Optional. If not set, automatic activity detection is enabled by default.
  // If automatic voice detection is disabled, the client must send activity
  // signals.
  AutomaticActivityDetection automatic_activity_detection = 1
      [(google.api.field_behavior) = OPTIONAL];

  // Optional. Defines what effect activity has.
  optional ActivityHandling activity_handling = 3
      [(google.api.field_behavior) = OPTIONAL];

  // Optional. Defines which input is included in the user's turn.
  optional TurnCoverage turn_coverage = 4
      [(google.api.field_behavior) = OPTIONAL];
}

// Session resumption configuration.
//
// This message is included in the session configuration as
// `BidiGenerateContentSetup.session_resumption`. If configured, the server
// will send `SessionResumptionUpdate` messages.
message SessionResumptionConfig {
  // The handle of a previous session. If not present then a new session is
  // created.
  //
  // Session handles come from `SessionResumptionUpdate.token` values in
  // previous connections.
  optional string handle = 1;
}

// Enables context window compression — a mechanism for managing the model's
// context window so that it does not exceed a given length.
message ContextWindowCompressionConfig {
  // The SlidingWindow method operates by discarding content at the beginning of
  // the context window. The resulting context will always begin at the start of
  // a USER role turn. System instructions and any
  // `BidiGenerateContentSetup.prefix_turns` will always remain at the beginning
  // of the result.
  message SlidingWindow {
    // The target number of tokens to keep. The default value is
    // trigger_tokens/2.
    //
    // Discarding parts of the context window causes a temporary latency
    // increase so this value should be calibrated to avoid frequent compression
    // operations.
    optional int64 target_tokens = 1;
  }

  // The context window compression mechanism used.
  oneof compression_mechanism {
    // A sliding-window mechanism.
    SlidingWindow sliding_window = 2;
  }

  // The number of tokens (before running a turn) required to trigger a context
  // window compression.
  //
  // This can be used to balance quality against latency as shorter context
  // windows may result in faster model responses. However, any compression
  // operation will cause a temporary latency increase, so they should not be
  // triggered frequently.
  //
  // If not set, the default is 80% of the model's context window limit. This
  // leaves 20% for the next user request/model response.
  optional int64 trigger_tokens = 1;
}

// The audio transcription configuration.
message AudioTranscriptionConfig {}

// Message to be sent in the first (and only in the first)
// `BidiGenerateContentClientMessage`. Contains configuration that will apply
// for the duration of the streaming RPC.
//
// Clients should wait for a `BidiGenerateContentSetupComplete` message before
// sending any additional messages.
message BidiGenerateContentSetup {
  // Required. The model's resource name. This serves as an ID for the Model to
  // use.
  //
  // Format: `models/{model}`
  string model = 1 [(google.api.field_behavior) = REQUIRED];

  // Optional. Generation config.
  //
  // The following fields are not supported:
  //
  //  - `response_logprobs`
  //  - `response_mime_type`
  //  - `logprobs`
  //  - `response_schema`
  //  - `stop_sequence`
  //  - `routing_config`
  //  - `audio_timestamp`
  GenerationConfig generation_config = 2
      [(google.api.field_behavior) = OPTIONAL];

  // Optional. The user provided system instructions for the model.
  //
  // Note: Only text should be used in parts and content in each part will be
  // in a separate paragraph.
  Content system_instruction = 3 [(google.api.field_behavior) = OPTIONAL];

  // Optional. A list of `Tools` the model may use to generate the next
  // response.
  //
  // A `Tool` is a piece of code that enables the system to interact with
  // external systems to perform an action, or set of actions, outside of
  // knowledge and scope of the model.
  repeated Tool tools = 4 [(google.api.field_behavior) = OPTIONAL];

  // Optional. Configures the handling of realtime input.
  RealtimeInputConfig realtime_input_config = 6
      [(google.api.field_behavior) = OPTIONAL];

  // Optional. Configures session resumption mechanism.
  //
  // If included, the server will send `SessionResumptionUpdate` messages.
  SessionResumptionConfig session_resumption = 7
      [(google.api.field_behavior) = OPTIONAL];

  // Optional. Configures a context window compression mechanism.
  //
  // If included, the server will automatically reduce the size of the context
  // when it exceeds the configured length.
  ContextWindowCompressionConfig context_window_compression = 8
      [(google.api.field_behavior) = OPTIONAL];

  // Optional. If set, enables transcription of the model's audio output. The
  // transcription aligns with the language code specified for the output
  // audio, if configured.
  AudioTranscriptionConfig output_audio_transcription = 11
      [(google.api.field_behavior) = OPTIONAL];
}

// Incremental update of the current conversation delivered from the client.
// All of the content here is unconditionally appended to the conversation
// history and used as part of the prompt to the model to generate content.
//
// A message here will interrupt any current model generation.
message BidiGenerateContentClientContent {
  // Optional. The content appended to the current conversation with the model.
  //
  // For single-turn queries, this is a single instance. For multi-turn
  // queries, this is a repeated field that contains conversation history and
  // the latest request.
  repeated Content turns = 1 [(google.api.field_behavior) = OPTIONAL];

  // Optional. If true, indicates that the server content generation should
  // start with the currently accumulated prompt. Otherwise, the server awaits
  // additional messages before starting generation.
  bool turn_complete = 2 [(google.api.field_behavior) = OPTIONAL];
}

// User input that is sent in real time.
//
// The different modalities (audio, video and text) are handled as concurrent
// streams. The ordering across these streams is not guaranteed.
//
// This is different from
// [BidiGenerateContentClientContent][google.ai.generativelanguage.v1beta.BidiGenerateContentClientContent]
// in a few ways:
//
// * Can be sent continuously without interruption to model generation.
// * If there is a need to mix data interleaved across the
//   [BidiGenerateContentClientContent][google.ai.generativelanguage.v1beta.BidiGenerateContentClientContent]
//   and the
//   [BidiGenerateContentRealtimeInput][google.ai.generativelanguage.v1beta.BidiGenerateContentRealtimeInput],
//   the server attempts to optimize for best response, but there are no
//   guarantees.
// * End of turn is not explicitly specified, but is rather derived from user
//   activity (for example, end of speech).
// * Even before the end of turn, the data is processed incrementally
//   to optimize for a fast start of the response from the model.
message BidiGenerateContentRealtimeInput {
  // Marks the start of user activity.
  message ActivityStart {}

  // Marks the end of user activity.
  message ActivityEnd {}

  // Optional. Inlined bytes data for media input. Multiple `media_chunks` are
  // not supported, all but the first will be ignored.
  //
  // DEPRECATED: Use one of `audio`, `video`, or `text` instead.
  repeated Blob media_chunks = 1 [(google.api.field_behavior) = OPTIONAL];

  // Optional. These form the realtime audio input stream.
  Blob audio = 2 [(google.api.field_behavior) = OPTIONAL];

  // Optional. Indicates that the audio stream has ended, e.g. because the
  // microphone was turned off.
  //
  // This should only be sent when automatic activity detection is enabled
  // (which is the default).
  //
  // The client can reopen the stream by sending an audio message.
  optional bool audio_stream_end = 3 [(google.api.field_behavior) = OPTIONAL];

  // Optional. These form the realtime video input stream.
  Blob video = 4 [(google.api.field_behavior) = OPTIONAL];

  // Optional. These form the realtime text input stream.
  optional string text = 5 [(google.api.field_behavior) = OPTIONAL];

  // Optional. Marks the start of user activity. This can only be sent if
  // automatic (i.e. server-side) activity detection is disabled.
  ActivityStart activity_start = 6 [(google.api.field_behavior) = OPTIONAL];

  // Optional. Marks the end of user activity. This can only be sent if
  // automatic (i.e. server-side) activity detection is disabled.
  ActivityEnd activity_end = 7 [(google.api.field_behavior) = OPTIONAL];
}

// Client generated response to a `ToolCall` received from the server.
// Individual `FunctionResponse` objects are matched to the respective
// `FunctionCall` objects by the `id` field.
//
// Note that in the unary and server-streaming GenerateContent APIs function
// calling happens by exchanging the `Content` parts, while in the bidi
// GenerateContent APIs function calling happens over these dedicated set of
// messages.
message BidiGenerateContentToolResponse {
  // Optional. The response to the function calls.
  repeated FunctionResponse function_responses = 1
      [(google.api.field_behavior) = OPTIONAL];
}

// Messages sent by the client in the BidiGenerateContent call.
message BidiGenerateContentClientMessage {
  // The type of the message.
  oneof message_type {
    // Optional. Session configuration sent in the first and only first client
    // message.
    BidiGenerateContentSetup setup = 1 [(google.api.field_behavior) = OPTIONAL];

    // Optional. Incremental update of the current conversation delivered from
    // the client.
    BidiGenerateContentClientContent client_content = 2
        [(google.api.field_behavior) = OPTIONAL];

    // Optional. User input that is sent in real time.
    BidiGenerateContentRealtimeInput realtime_input = 3
        [(google.api.field_behavior) = OPTIONAL];

    // Optional. Response to a `ToolCallMessage` received from the server.
    BidiGenerateContentToolResponse tool_response = 4
        [(google.api.field_behavior) = OPTIONAL];
  }
}

// Sent in response to a `BidiGenerateContentSetup` message from the client.
message BidiGenerateContentSetupComplete {}

// Incremental server update generated by the model in response to client
// messages.
//
// Content is generated as quickly as possible, and not in real time. Clients
// may choose to buffer and play it out in real time.
message BidiGenerateContentServerContent {
  // Output only. The content that the model has generated as part of the
  // current conversation with the user.
  optional Content model_turn = 1 [(google.api.field_behavior) = OUTPUT_ONLY];

  // Output only. If true, indicates that the model is done generating.
  //
  // When model is interrupted while generating there will be no
  // 'generation_complete' message in interrupted turn, it will go through
  // 'interrupted > turn_complete'.
  //
  // When model assumes realtime playback there will be delay between
  // generation_complete and turn_complete that is caused by model waiting for
  // playback to finish.
  bool generation_complete = 5 [(google.api.field_behavior) = OUTPUT_ONLY];

  // Output only. If true, indicates that the model has completed its turn.
  // Generation will only start in response to additional client messages.
  bool turn_complete = 2 [(google.api.field_behavior) = OUTPUT_ONLY];

  // Output only. If true, indicates that a client message has interrupted
  // current model generation. If the client is playing out the content in real
  // time, this is a good signal to stop and empty the current playback queue.
  bool interrupted = 3 [(google.api.field_behavior) = OUTPUT_ONLY];

  // Output only. Grounding metadata for the generated content.
  GroundingMetadata grounding_metadata = 4
      [(google.api.field_behavior) = OUTPUT_ONLY];

  // Output only. Output audio transcription. The transcription is sent
  // independently of the other server messages and there is no guaranteed
  // ordering, in particular not between `server_content` and this
  // `output_transcription`.
  BidiGenerateContentTranscription output_transcription = 7
      [(google.api.field_behavior) = OUTPUT_ONLY];
}

// Request for the client to execute the `function_calls` and return the
// responses with the matching `id`s.
message BidiGenerateContentToolCall {
  // Output only. The function call to be executed.
  repeated FunctionCall function_calls = 2
      [(google.api.field_behavior) = OUTPUT_ONLY];
}

// Notification for the client that a previously issued `ToolCallMessage`
// with the specified `id`s should not have been executed and should be
// cancelled. If there were side-effects to those tool calls, clients may
// attempt to undo the tool calls. This message occurs only in cases where the
// clients interrupt server turns.
message BidiGenerateContentToolCallCancellation {
  // Output only. The ids of the tool calls to be cancelled.
  repeated string ids = 1 [(google.api.field_behavior) = OUTPUT_ONLY];
}

// A notice that the server will soon disconnect.
message GoAway {
  // The remaining time before the connection will be terminated as ABORTED.
  //
  // This duration will never be less than a model-specific minimum, which will
  // be specified together with the rate limits for the model.
  google.protobuf.Duration time_left = 1;
}

// Update of the session resumption state.
//
// Only sent if `BidiGenerateContentSetup.session_resumption` was set.
message SessionResumptionUpdate {
  // New handle that represents a state that can be resumed. Empty if
  // `resumable`=false.
  string new_handle = 1;

  // True if the current session can be resumed at this point.
  //
  // Resumption is not possible at some points in the session. For example, when
  // the model is executing function calls or generating. Resuming the session
  // (using a previous session token) in such a state will result in some data
  // loss. In these cases, `new_handle` will be empty and `resumable` will be
  // false.
  bool resumable = 2;
}

// Transcription of audio (input or output).
message BidiGenerateContentTranscription {
  // Transcription text.
  string text = 1;
}

// Response message for the BidiGenerateContent call.
message BidiGenerateContentServerMessage {
  // The type of the message.
  oneof message_type {
    // Output only. Sent in response to a `BidiGenerateContentSetup` message
    // from the client when setup is complete.
    BidiGenerateContentSetupComplete setup_complete = 2
        [(google.api.field_behavior) = OUTPUT_ONLY];

    // Output only. Content generated by the model in response to client
    // messages.
    BidiGenerateContentServerContent server_content = 3
        [(google.api.field_behavior) = OUTPUT_ONLY];

    // Output only. Request for the client to execute the `function_calls` and
    // return the responses with the matching `id`s.
    BidiGenerateContentToolCall tool_call = 4
        [(google.api.field_behavior) = OUTPUT_ONLY];

    // Output only. Notification for the client that a previously issued
    // `ToolCallMessage` with the specified `id`s should be cancelled.
    BidiGenerateContentToolCallCancellation tool_call_cancellation = 5
        [(google.api.field_behavior) = OUTPUT_ONLY];

    // Output only. A notice that the server will soon disconnect.
    GoAway go_away = 6 [(google.api.field_behavior) = OUTPUT_ONLY];

    // Output only. Update of the session resumption state.
    SessionResumptionUpdate session_resumption_update = 7
        [(google.api.field_behavior) = OUTPUT_ONLY];
  }

  // Output only. Usage metadata about the response(s).
  UsageMetadata usage_metadata = 10 [(google.api.field_behavior) = OUTPUT_ONLY];
}

// Usage metadata about response(s).
message UsageMetadata {
  // Output only. Number of tokens in the prompt. When `cached_content` is set,
  // this is still the total effective prompt size meaning this includes the
  // number of tokens in the cached content.
  int32 prompt_token_count = 1 [(google.api.field_behavior) = OUTPUT_ONLY];

  // Number of tokens in the cached part of the prompt (the cached content)
  int32 cached_content_token_count = 4;

  // Output only. Total number of tokens across all the generated response
  // candidates.
  int32 response_token_count = 2 [(google.api.field_behavior) = OUTPUT_ONLY];

  // Output only. Number of tokens present in tool-use prompt(s).
  int32 tool_use_prompt_token_count = 8
      [(google.api.field_behavior) = OUTPUT_ONLY];

  // Output only. Number of tokens of thoughts for thinking models.
  int32 thoughts_token_count = 10 [(google.api.field_behavior) = OUTPUT_ONLY];

  // Output only. Total token count for the generation request (prompt +
  // response candidates).
  int32 total_token_count = 3 [(google.api.field_behavior) = OUTPUT_ONLY];

  // Output only. List of modalities that were processed in the request input.
  repeated ModalityTokenCount prompt_tokens_details = 5
      [(google.api.field_behavior) = OUTPUT_ONLY];

  // Output only. List of modalities of the cached content in the request input.
  repeated ModalityTokenCount cache_tokens_details = 6
      [(google.api.field_behavior) = OUTPUT_ONLY];

  // Output only. List of modalities that were returned in the response.
  repeated ModalityTokenCount response_tokens_details = 7
      [(google.api.field_behavior) = OUTPUT_ONLY];

  // Output only. List of modalities that were processed for tool-use request
  // inputs.
  repeated ModalityTokenCount tool_use_prompt_tokens_details = 9
      [(google.api.field_behavior) = OUTPUT_ONLY];
}