// Copyright 2022 Google LLC
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

syntax = "proto3";

package google.cloud.discoveryengine.v1beta;

import "google/api/field_behavior.proto";
import "google/api/resource.proto";
import "google/cloud/discoveryengine/v1beta/document.proto";
import "google/cloud/discoveryengine/v1beta/user_event.proto";
import "google/protobuf/timestamp.proto";
import "google/rpc/status.proto";
import "google/type/date.proto";

option csharp_namespace = "Google.Cloud.DiscoveryEngine.V1Beta";
option go_package = "cloud.google.com/go/discoveryengine/apiv1beta/discoveryenginepb;discoveryenginepb";
option java_multiple_files = true;
option java_outer_classname = "ImportConfigProto";
option java_package = "com.google.cloud.discoveryengine.v1beta";
option objc_class_prefix = "DISCOVERYENGINE";
option php_namespace = "Google\\Cloud\\DiscoveryEngine\\V1beta";
option ruby_package = "Google::Cloud::DiscoveryEngine::V1beta";

// Cloud Storage location for input content.
message GcsSource {
  // Required. Cloud Storage URIs to input files. URI can be up to
  // 2000 characters long. URIs can match the full object path (for example,
  // `gs://bucket/directory/object.json`) or a pattern matching one or more
  // files, such as `gs://bucket/directory/*.json`.
  //
  // A request can contain at most 100 files (or 100,000 files if `data_schema`
  // is `content`). Each file can be up to 2 GB (or 100 MB if `data_schema` is
  // `content`).
  repeated string input_uris = 1 [(google.api.field_behavior) = REQUIRED];

  // The schema to use when parsing the data from the source.
  //
  // Supported values for document imports:
  //
  // * `document` (default): One JSON
  // [Document][google.cloud.discoveryengine.v1beta.Document] per line. Each
  // document must
  //   have a valid
  //   [Document.id][google.cloud.discoveryengine.v1beta.Document.id].
  // * `content`: Unstructured data (e.g. PDF, HTML). Each file matched by
  //   `input_uris` becomes a document, with the ID set to the first 128
  //   bits of SHA256(URI) encoded as a hex string.
  // * `custom`: One custom data JSON per row in arbitrary format that conforms
  //   to the defined [Schema][google.cloud.discoveryengine.v1beta.Schema] of
  //   the data store. This can only be used by Gen App Builder.
  // * `csv`: A CSV file with header conforming to the defined
  // [Schema][google.cloud.discoveryengine.v1beta.Schema] of the
  //   data store. Each entry after the header is imported as a Document.
  //   This can only be used by Gen App Builder.
  //
  // Supported values for user even imports:
  //
  // * `user_event` (default): One JSON
  // [UserEvent][google.cloud.discoveryengine.v1beta.UserEvent] per line.
  string data_schema = 2;
}

// BigQuery source import data from.
message BigQuerySource {
  // BigQuery table partition info. Leave this empty if the BigQuery table
  // is not partitioned.
  oneof partition {
    // BigQuery time partitioned table's _PARTITIONDATE in YYYY-MM-DD format.
    google.type.Date partition_date = 5;
  }

  // The project ID (can be project # or ID) that the BigQuery source is in with
  // a length limit of 128 characters. If not specified, inherits the project
  // ID from the parent request.
  string project_id = 1;

  // Required. The BigQuery data set to copy the data from with a length limit
  // of 1,024 characters.
  string dataset_id = 2 [(google.api.field_behavior) = REQUIRED];

  // Required. The BigQuery table to copy the data from with a length limit of
  // 1,024 characters.
  string table_id = 3 [(google.api.field_behavior) = REQUIRED];

  // Intermediate Cloud Storage directory used for the import with a length
  // limit of 2,000 characters. Can be specified if one wants to have the
  // BigQuery export to a specific Cloud Storage directory.
  string gcs_staging_dir = 4;

  // The schema to use when parsing the data from the source.
  //
  // Supported values for user event imports:
  //
  // * `user_event` (default): One
  // [UserEvent][google.cloud.discoveryengine.v1beta.UserEvent] per row.
  //
  // Supported values for document imports:
  //
  // * `document` (default): One
  // [Document][google.cloud.discoveryengine.v1beta.Document] format per
  //   row. Each document must have a valid
  //   [Document.id][google.cloud.discoveryengine.v1beta.Document.id] and one of
  //   [Document.json_data][google.cloud.discoveryengine.v1beta.Document.json_data]
  //   or
  //   [Document.struct_data][google.cloud.discoveryengine.v1beta.Document.struct_data].
  // * `custom`: One custom data per row in arbitrary format that conforms to
  //   the defined [Schema][google.cloud.discoveryengine.v1beta.Schema] of the
  //   data store. This can only be used by Gen App Builder.
  string data_schema = 6;
}

// Configuration of destination for Import related errors.
message ImportErrorConfig {
  // Required. Errors destination.
  oneof destination {
    // Cloud Storage prefix for import errors. This must be an empty,
    // existing Cloud Storage directory. Import errors are written to
    // sharded files in this directory, one per line, as a JSON-encoded
    // `google.rpc.Status` message.
    string gcs_prefix = 1;
  }
}

// Request message for the ImportUserEvents request.
message ImportUserEventsRequest {
  // The inline source for the input config for ImportUserEvents method.
  message InlineSource {
    // Required. A list of user events to import. Recommended max of 10k items.
    repeated UserEvent user_events = 1 [(google.api.field_behavior) = REQUIRED];
  }

  // Required - The desired input source of the user event data.
  oneof source {
    // The Inline source for the input content for UserEvents.
    InlineSource inline_source = 2;

    // Cloud Storage location for the input content.
    GcsSource gcs_source = 3;

    // BigQuery input source.
    BigQuerySource bigquery_source = 4;
  }

  // Required. Parent DataStore resource name, of the form
  // `projects/{project}/locations/{location}/collections/{collection}/dataStores/{data_store}`
  string parent = 1 [
    (google.api.field_behavior) = REQUIRED,
    (google.api.resource_reference) = {
      type: "discoveryengine.googleapis.com/DataStore"
    }
  ];

  // The desired location of errors incurred during the Import. Cannot be set
  // for inline user event imports.
  ImportErrorConfig error_config = 5;
}

// Response of the ImportUserEventsRequest. If the long running
// operation was successful, then this message is returned by the
// google.longrunning.Operations.response field if the operation was successful.
message ImportUserEventsResponse {
  // A sample of errors encountered while processing the request.
  repeated google.rpc.Status error_samples = 1;

  // Echoes the destination for the complete errors if this field was set in
  // the request.
  ImportErrorConfig error_config = 2;

  // Count of user events imported with complete existing Documents.
  int64 joined_events_count = 3;

  // Count of user events imported, but with Document information not found
  // in the existing Branch.
  int64 unjoined_events_count = 4;
}

// Metadata related to the progress of the Import operation. This is
// returned by the google.longrunning.Operation.metadata field.
message ImportUserEventsMetadata {
  // Operation create time.
  google.protobuf.Timestamp create_time = 1;

  // Operation last update time. If the operation is done, this is also the
  // finish time.
  google.protobuf.Timestamp update_time = 2;

  // Count of entries that were processed successfully.
  int64 success_count = 3;

  // Count of entries that encountered errors while processing.
  int64 failure_count = 4;
}

// Metadata related to the progress of the ImportDocuments operation. This is
// returned by the google.longrunning.Operation.metadata field.
message ImportDocumentsMetadata {
  // Operation create time.
  google.protobuf.Timestamp create_time = 1;

  // Operation last update time. If the operation is done, this is also the
  // finish time.
  google.protobuf.Timestamp update_time = 2;

  // Count of entries that were processed successfully.
  int64 success_count = 3;

  // Count of entries that encountered errors while processing.
  int64 failure_count = 4;
}

// Request message for Import methods.
message ImportDocumentsRequest {
  // The inline source for the input config for ImportDocuments method.
  message InlineSource {
    // Required. A list of documents to update/create. Each document must have a
    // valid [Document.id][google.cloud.discoveryengine.v1beta.Document.id].
    // Recommended max of 100 items.
    repeated Document documents = 1 [(google.api.field_behavior) = REQUIRED];
  }

  // Indicates how imported documents are reconciled with the existing documents
  // created or imported before.
  enum ReconciliationMode {
    // Defaults to `INCREMENTAL`.
    RECONCILIATION_MODE_UNSPECIFIED = 0;

    // Inserts new documents or updates existing documents.
    INCREMENTAL = 1;

    // Calculates diff and replaces the entire document dataset. Existing
    // documents may be deleted if they are not present in the source location.
    FULL = 2;
  }

  // Required. The source of the input.
  oneof source {
    // The Inline source for the input content for documents.
    InlineSource inline_source = 2;

    // Cloud Storage location for the input content.
    GcsSource gcs_source = 3;

    // BigQuery input source.
    BigQuerySource bigquery_source = 4;
  }

  // Required. The parent branch resource name, such as
  // `projects/{project}/locations/{location}/collections/{collection}/dataStores/{data_store}/branches/{branch}`.
  // Requires create/update permission.
  string parent = 1 [
    (google.api.field_behavior) = REQUIRED,
    (google.api.resource_reference) = {
      type: "discoveryengine.googleapis.com/Branch"
    }
  ];

  // The desired location of errors incurred during the Import.
  ImportErrorConfig error_config = 5;

  // The mode of reconciliation between existing documents and the documents to
  // be imported. Defaults to
  // [ReconciliationMode.INCREMENTAL][google.cloud.discoveryengine.v1beta.ImportDocumentsRequest.ReconciliationMode.INCREMENTAL].
  ReconciliationMode reconciliation_mode = 6;

  // Whether to automatically generate IDs for the documents if absent.
  //
  // If set to `true`,
  // [Document.id][google.cloud.discoveryengine.v1beta.Document.id]s are
  // automatically generated based on the hash of the payload, where IDs may not
  // be consistent during multiple imports. In which case
  // [ReconciliationMode.FULL][google.cloud.discoveryengine.v1beta.ImportDocumentsRequest.ReconciliationMode.FULL]
  // is highly recommended to avoid duplicate contents. If unset or set to
  // `false`, [Document.id][google.cloud.discoveryengine.v1beta.Document.id]s
  // have to be specified using
  // [id_field][google.cloud.discoveryengine.v1beta.ImportDocumentsRequest.id_field],
  // otherwise, documents without IDs fail to be imported.
  //
  // Only set this field when using
  // [GcsSource][google.cloud.discoveryengine.v1beta.GcsSource] or
  // [BigQuerySource][google.cloud.discoveryengine.v1beta.BigQuerySource], and
  // when
  // [GcsSource.data_schema][google.cloud.discoveryengine.v1beta.GcsSource.data_schema]
  // or
  // [BigQuerySource.data_schema][google.cloud.discoveryengine.v1beta.BigQuerySource.data_schema]
  // is `custom` or `csv`. Otherwise, an INVALID_ARGUMENT error is thrown.
  bool auto_generate_ids = 8;

  // The field in the Cloud Storage and BigQuery sources that indicates the
  // unique IDs of the documents.
  //
  // For [GcsSource][google.cloud.discoveryengine.v1beta.GcsSource] it is the
  // key of the JSON field. For instance, `my_id` for JSON `{"my_id":
  // "some_uuid"}`. For
  // [BigQuerySource][google.cloud.discoveryengine.v1beta.BigQuerySource] it is
  // the column name of the BigQuery table where the unique ids are stored.
  //
  // The values of the JSON field or the BigQuery column are used as the
  // [Document.id][google.cloud.discoveryengine.v1beta.Document.id]s. The JSON
  // field or the BigQuery column must be of string type, and the values must be
  // set as valid strings conform to
  // [RFC-1034](https://tools.ietf.org/html/rfc1034) with 1-63 characters.
  // Otherwise, documents without valid IDs fail to be imported.
  //
  // Only set this field when using
  // [GcsSource][google.cloud.discoveryengine.v1beta.GcsSource] or
  // [BigQuerySource][google.cloud.discoveryengine.v1beta.BigQuerySource], and
  // when
  // [GcsSource.data_schema][google.cloud.discoveryengine.v1beta.GcsSource.data_schema]
  // or
  // [BigQuerySource.data_schema][google.cloud.discoveryengine.v1beta.BigQuerySource.data_schema]
  // is `custom`. And only set this field when
  // [auto_generate_ids][google.cloud.discoveryengine.v1beta.ImportDocumentsRequest.auto_generate_ids]
  // is unset or set as `false`. Otherwise, an INVALID_ARGUMENT error is thrown.
  //
  // If it is unset, a default value `_id` is used when importing from the
  // allowed data sources.
  string id_field = 9;
}

// Response of the
// [ImportDocumentsRequest][google.cloud.discoveryengine.v1beta.ImportDocumentsRequest].
// If the long running operation is done, then this message is returned by the
// google.longrunning.Operations.response field if the operation was successful.
message ImportDocumentsResponse {
  // A sample of errors encountered while processing the request.
  repeated google.rpc.Status error_samples = 1;

  // Echoes the destination for the complete errors in the request if set.
  ImportErrorConfig error_config = 2;
}
