// Copyright 2025 Google LLC
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

syntax = "proto3";

package google.cloud.discoveryengine.v1;

import "google/api/field_behavior.proto";
import "google/api/resource.proto";
import "google/protobuf/struct.proto";

option csharp_namespace = "Google.Cloud.DiscoveryEngine.V1";
option go_package = "cloud.google.com/go/discoveryengine/apiv1/discoveryenginepb;discoveryenginepb";
option java_multiple_files = true;
option java_outer_classname = "ChunkProto";
option java_package = "com.google.cloud.discoveryengine.v1";
option objc_class_prefix = "DISCOVERYENGINE";
option php_namespace = "Google\\Cloud\\DiscoveryEngine\\V1";
option ruby_package = "Google::Cloud::DiscoveryEngine::V1";

// Chunk captures all raw metadata information of items to be recommended or
// searched in the chunk mode.
message Chunk {
  option (google.api.resource) = {
    type: "discoveryengine.googleapis.com/Chunk"
    pattern: "projects/{project}/locations/{location}/dataStores/{data_store}/branches/{branch}/documents/{document}/chunks/{chunk}"
    pattern: "projects/{project}/locations/{location}/collections/{collection}/dataStores/{data_store}/branches/{branch}/documents/{document}/chunks/{chunk}"
  };

  // Document metadata contains the information of the document of the current
  // chunk.
  message DocumentMetadata {
    // Uri of the document.
    string uri = 1;

    // Title of the document.
    string title = 2;

    // Data representation.
    // The structured JSON data for the document. It should conform to the
    // registered [Schema][google.cloud.discoveryengine.v1.Schema] or an
    // `INVALID_ARGUMENT` error is thrown.
    google.protobuf.Struct struct_data = 3;
  }

  // Page span of the chunk.
  message PageSpan {
    // The start page of the chunk.
    int32 page_start = 1;

    // The end page of the chunk.
    int32 page_end = 2;
  }

  // Metadata of the current chunk. This field is only populated on
  // [SearchService.Search][google.cloud.discoveryengine.v1.SearchService.Search]
  // API.
  message ChunkMetadata {
    // The previous chunks of the current chunk. The number is controlled by
    // [SearchRequest.ContentSearchSpec.ChunkSpec.num_previous_chunks][google.cloud.discoveryengine.v1.SearchRequest.ContentSearchSpec.ChunkSpec.num_previous_chunks].
    // This field is only populated on
    // [SearchService.Search][google.cloud.discoveryengine.v1.SearchService.Search]
    // API.
    repeated Chunk previous_chunks = 1;

    // The next chunks of the current chunk. The number is controlled by
    // [SearchRequest.ContentSearchSpec.ChunkSpec.num_next_chunks][google.cloud.discoveryengine.v1.SearchRequest.ContentSearchSpec.ChunkSpec.num_next_chunks].
    // This field is only populated on
    // [SearchService.Search][google.cloud.discoveryengine.v1.SearchService.Search]
    // API.
    repeated Chunk next_chunks = 2;
  }

  // The structured content information.
  message StructuredContent {
    // Output only. The structure type of the structured content.
    StructureType structure_type = 1
        [(google.api.field_behavior) = OUTPUT_ONLY];

    // Output only. The content of the structured content.
    string content = 2 [(google.api.field_behavior) = OUTPUT_ONLY];
  }

  // The annotation metadata includes structured content in the current chunk.
  message AnnotationMetadata {
    // Output only. The structured content information.
    StructuredContent structured_content = 1
        [(google.api.field_behavior) = OUTPUT_ONLY];

    // Output only. Image id is provided if the structured content is based on
    // an image.
    string image_id = 2 [(google.api.field_behavior) = OUTPUT_ONLY];
  }

  // Defines the types of the structured content that can be extracted.
  enum StructureType {
    // Default value.
    STRUCTURE_TYPE_UNSPECIFIED = 0;

    // Shareholder structure.
    SHAREHOLDER_STRUCTURE = 1;

    // Signature structure.
    SIGNATURE_STRUCTURE = 2;

    // Checkbox structure.
    CHECKBOX_STRUCTURE = 3;
  }

  // The full resource name of the chunk.
  // Format:
  // `projects/{project}/locations/{location}/collections/{collection}/dataStores/{data_store}/branches/{branch}/documents/{document_id}/chunks/{chunk_id}`.
  //
  // This field must be a UTF-8 encoded string with a length limit of 1024
  // characters.
  string name = 1;

  // Unique chunk ID of the current chunk.
  string id = 2;

  // Content is a string from a document (parsed content).
  string content = 3;

  // Output only. Represents the relevance score based on similarity.
  // Higher score indicates higher chunk relevance.
  // The score is in range [-1.0, 1.0].
  // Only populated on
  // [SearchResponse][google.cloud.discoveryengine.v1.SearchResponse].
  optional double relevance_score = 8
      [(google.api.field_behavior) = OUTPUT_ONLY];

  // Metadata of the document from the current chunk.
  DocumentMetadata document_metadata = 5;

  // Output only. This field is OUTPUT_ONLY.
  // It contains derived data that are not in the original input document.
  google.protobuf.Struct derived_struct_data = 4
      [(google.api.field_behavior) = OUTPUT_ONLY];

  // Page span of the chunk.
  PageSpan page_span = 6;

  // Output only. Metadata of the current chunk.
  ChunkMetadata chunk_metadata = 7 [(google.api.field_behavior) = OUTPUT_ONLY];

  // Output only. Image Data URLs if the current chunk contains images.
  // Data URLs are composed of four parts: a prefix (data:), a MIME type
  // indicating the type of data, an optional base64 token if non-textual,
  // and the data itself:
  // data:[<mediatype>][;base64],<data>
  repeated string data_urls = 9 [(google.api.field_behavior) = OUTPUT_ONLY];

  // Output only. Annotation contents if the current chunk contains annotations.
  repeated string annotation_contents = 11
      [(google.api.field_behavior) = OUTPUT_ONLY];

  // Output only. The annotation metadata includes structured content in the
  // current chunk.
  repeated AnnotationMetadata annotation_metadata = 12
      [(google.api.field_behavior) = OUTPUT_ONLY];
}
