// Copyright 2025 Google LLC
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

syntax = "proto3";

package google.cloud.contentwarehouse.v1;

import "google/api/field_behavior.proto";
import "google/api/resource.proto";
import "google/cloud/documentai/v1/document.proto";
import "google/protobuf/timestamp.proto";
import "google/type/datetime.proto";

option csharp_namespace = "Google.Cloud.ContentWarehouse.V1";
option go_package = "cloud.google.com/go/contentwarehouse/apiv1/contentwarehousepb;contentwarehousepb";
option java_multiple_files = true;
option java_outer_classname = "DocumentProto";
option java_package = "com.google.cloud.contentwarehouse.v1";
option php_namespace = "Google\\Cloud\\ContentWarehouse\\V1";
option ruby_package = "Google::Cloud::ContentWarehouse::V1";

// Defines the structure for content warehouse document proto.
message Document {
  option (google.api.resource) = {
    type: "contentwarehouse.googleapis.com/Document"
    pattern: "projects/{project}/locations/{location}/documents/{document}"
    pattern: "projects/{project}/locations/{location}/documents/referenceId/{reference_id}"
  };

  // The resource name of the document.
  // Format:
  // projects/{project_number}/locations/{location}/documents/{document_id}.
  //
  // The name is ignored when creating a document.
  string name = 1;

  // The reference ID set by customers. Must be unique per project and location.
  string reference_id = 11;

  // Required. Display name of the document given by the user. This name will be
  // displayed in the UI. Customer can populate this field with the name of the
  // document. This differs from the 'title' field as 'title' is optional and
  // stores the top heading in the document.
  string display_name = 2 [(google.api.field_behavior) = REQUIRED];

  // Title that describes the document.
  // This can be the top heading or text that describes the document.
  string title = 18;

  // Uri to display the document, for example, in the UI.
  string display_uri = 17;

  // The Document schema name.
  // Format:
  // projects/{project_number}/locations/{location}/documentSchemas/{document_schema_id}.
  string document_schema_name = 3 [(google.api.resource_reference) = {
    type: "contentwarehouse.googleapis.com/DocumentSchema"
  }];

  oneof structured_content {
    // Other document format, such as PPTX, XLXS
    string plain_text = 15;

    // Document AI format to save the structured content, including OCR.
    google.cloud.documentai.v1.Document cloud_ai_document = 4;
  }

  // A path linked to structured content file.
  string structured_content_uri = 16 [deprecated = true];

  // Raw document file.
  oneof raw_document {
    // Raw document file in Cloud Storage path.
    string raw_document_path = 5;

    // Raw document content.
    bytes inline_raw_document = 6;
  }

  // List of values that are user supplied metadata.
  repeated Property properties = 7;

  // Output only. The time when the document is last updated.
  google.protobuf.Timestamp update_time = 8
      [(google.api.field_behavior) = OUTPUT_ONLY];

  // Output only. The time when the document is created.
  google.protobuf.Timestamp create_time = 9
      [(google.api.field_behavior) = OUTPUT_ONLY];

  // This is used when DocAI was not used to load the document and parsing/
  // extracting is needed for the inline_raw_document.  For example, if
  // inline_raw_document is the byte representation of a PDF file, then
  // this should be set to: RAW_DOCUMENT_FILE_TYPE_PDF.
  RawDocumentFileType raw_document_file_type = 10;

  // If true, makes the document visible to asynchronous policies and rules.
  bool async_enabled = 12 [deprecated = true];

  // Indicates the category (image, audio, video etc.) of the original content.
  ContentCategory content_category = 20;

  // If true, text extraction will not be performed.
  bool text_extraction_disabled = 19 [deprecated = true];

  // If true, text extraction will be performed.
  bool text_extraction_enabled = 21;

  // The user who creates the document.
  string creator = 13;

  // The user who lastly updates the document.
  string updater = 14;

  // Output only. If linked to a Collection with RetentionPolicy, the date when
  // the document becomes mutable.
  google.protobuf.Timestamp disposition_time = 22
      [(google.api.field_behavior) = OUTPUT_ONLY];

  // Output only. Indicates if the document has a legal hold on it.
  bool legal_hold = 23 [(google.api.field_behavior) = OUTPUT_ONLY];
}

// References to the documents.
message DocumentReference {
  // Required. Name of the referenced document.
  string document_name = 1 [
    (google.api.field_behavior) = REQUIRED,
    (google.api.resource_reference) = {
      type: "contentwarehouse.googleapis.com/Document"
    }
  ];

  // display_name of the referenced document; this name does not need to be
  // consistent to the display_name in the Document proto, depending on the ACL
  // constraint.
  string display_name = 2;

  // Stores the subset of the referenced document's content.
  // This is useful to allow user peek the information of the referenced
  // document.
  string snippet = 3;

  // The document type of the document being referenced.
  bool document_is_folder = 4;

  // Output only. The time when the document is last updated.
  google.protobuf.Timestamp update_time = 5
      [(google.api.field_behavior) = OUTPUT_ONLY];

  // Output only. The time when the document is created.
  google.protobuf.Timestamp create_time = 6
      [(google.api.field_behavior) = OUTPUT_ONLY];

  // Output only. The time when the document is deleted.
  google.protobuf.Timestamp delete_time = 7
      [(google.api.field_behavior) = OUTPUT_ONLY];

  // Document is a folder with retention policy.
  bool document_is_retention_folder = 8;

  // Document is a folder with legal hold.
  bool document_is_legal_hold_folder = 9;
}

// Property of a document.
message Property {
  // Required. Must match the name of a PropertyDefinition in the
  // DocumentSchema.
  string name = 1 [(google.api.field_behavior) = REQUIRED];

  // Type of the property.
  // Must match the property_options type of the matching PropertyDefinition.
  // Value of the Property parsed into a specific data type.
  // Specific type value(s) obtained from Document AIs Property.mention_text
  // field.
  oneof values {
    // Integer property values.
    IntegerArray integer_values = 2;

    // Float property values.
    FloatArray float_values = 3;

    // String/text property values.
    TextArray text_values = 4;

    // Enum property values.
    EnumArray enum_values = 5;

    // Nested structured data property values.
    PropertyArray property_values = 6;

    // Date time property values.
    // It is not supported by CMEK compliant deployment.
    DateTimeArray date_time_values = 7;

    // Map property values.
    MapProperty map_property = 8;

    // Timestamp property values.
    // It is not supported by CMEK compliant deployment.
    TimestampArray timestamp_values = 9;
  }
}

// Integer values.
message IntegerArray {
  // List of integer values.
  repeated int32 values = 1;
}

// Float values.
message FloatArray {
  // List of float values.
  repeated float values = 1;
}

// String/text values.
message TextArray {
  // List of text values.
  repeated string values = 1;
}

// Enum values.
message EnumArray {
  // List of enum values.
  repeated string values = 1;
}

// DateTime values.
message DateTimeArray {
  // List of datetime values.
  // Both OffsetDateTime and ZonedDateTime are supported.
  repeated google.type.DateTime values = 1;
}

// Timestamp values.
message TimestampArray {
  // List of timestamp values.
  repeated TimestampValue values = 1;
}

// Timestamp value type.
message TimestampValue {
  oneof value {
    // Timestamp value
    google.protobuf.Timestamp timestamp_value = 1;

    // The string must represent a valid instant in UTC and is parsed using
    // java.time.format.DateTimeFormatter.ISO_INSTANT.
    // e.g. "2013-09-29T18:46:19Z"
    string text_value = 2;
  }
}

// Property values.
message PropertyArray {
  // List of property values.
  repeated Property properties = 1;
}

// Map property value.
// Represents a structured entries of key value pairs, consisting of field names
// which map to dynamically typed values.
message MapProperty {
  // Unordered map of dynamically typed values.
  map<string, Value> fields = 1;
}

// `Value` represents a dynamically typed value which can be either be
// a float, a integer, a string, or a datetime value. A producer of value is
// expected to set one of these variants. Absence of any variant indicates an
// error.
message Value {
  // The kind of value.
  oneof kind {
    // Represents a float value.
    float float_value = 1;

    // Represents a integer value.
    int32 int_value = 2;

    // Represents a string value.
    string string_value = 3;

    // Represents an enum value.
    EnumValue enum_value = 4;

    // Represents a datetime value.
    google.type.DateTime datetime_value = 5;

    // Represents a timestamp value.
    TimestampValue timestamp_value = 6;

    // Represents a boolean value.
    bool boolean_value = 7;
  }
}

// Represents the string value of the enum field.
message EnumValue {
  // String value of the enum field. This must match defined set of enums
  // in document schema using EnumTypeOptions.
  string value = 1;
}

// When a raw document is supplied, this indicates the file format
enum RawDocumentFileType {
  // No raw document specified or it is non-parsable
  RAW_DOCUMENT_FILE_TYPE_UNSPECIFIED = 0;

  // Adobe PDF format
  RAW_DOCUMENT_FILE_TYPE_PDF = 1;

  // Microsoft Word format
  RAW_DOCUMENT_FILE_TYPE_DOCX = 2;

  // Microsoft Excel format
  RAW_DOCUMENT_FILE_TYPE_XLSX = 3;

  // Microsoft Powerpoint format
  RAW_DOCUMENT_FILE_TYPE_PPTX = 4;

  // UTF-8 encoded text format
  RAW_DOCUMENT_FILE_TYPE_TEXT = 5;

  // TIFF or TIF image file format
  RAW_DOCUMENT_FILE_TYPE_TIFF = 6;
}

// When a raw document or structured content is supplied, this stores the
// content category.
enum ContentCategory {
  // No category is specified.
  CONTENT_CATEGORY_UNSPECIFIED = 0;

  // Content is of image type.
  CONTENT_CATEGORY_IMAGE = 1;

  // Content is of audio type.
  CONTENT_CATEGORY_AUDIO = 2;

  // Content is of video type.
  CONTENT_CATEGORY_VIDEO = 3;
}
