// Copyright 2025 Google LLC
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

syntax = "proto3";

package google.cloud.aiplatform.v1;

import "google/api/field_behavior.proto";
import "google/api/resource.proto";
import "google/cloud/aiplatform/v1/machine_resources.proto";
import "google/protobuf/timestamp.proto";

option csharp_namespace = "Google.Cloud.AIPlatform.V1";
option go_package = "cloud.google.com/go/aiplatform/apiv1/aiplatformpb;aiplatformpb";
option java_multiple_files = true;
option java_outer_classname = "FeatureViewProto";
option java_package = "com.google.cloud.aiplatform.v1";
option php_namespace = "Google\\Cloud\\AIPlatform\\V1";
option ruby_package = "Google::Cloud::AIPlatform::V1";

// FeatureView is representation of values that the FeatureOnlineStore will
// serve based on its syncConfig.
message FeatureView {
  option (google.api.resource) = {
    type: "aiplatform.googleapis.com/FeatureView"
    pattern: "projects/{project}/locations/{location}/featureOnlineStores/{feature_online_store}/featureViews/{feature_view}"
  };

  message BigQuerySource {
    // Required. The BigQuery view URI that will be materialized on each sync
    // trigger based on FeatureView.SyncConfig.
    string uri = 1 [(google.api.field_behavior) = REQUIRED];

    // Required. Columns to construct entity_id / row keys.
    repeated string entity_id_columns = 2
        [(google.api.field_behavior) = REQUIRED];
  }

  // Configuration for Sync. Only one option is set.
  message SyncConfig {
    // Cron schedule (https://en.wikipedia.org/wiki/Cron) to launch scheduled
    // runs. To explicitly set a timezone to the cron tab, apply a prefix in
    // the cron tab: "CRON_TZ=${IANA_TIME_ZONE}" or "TZ=${IANA_TIME_ZONE}".
    // The ${IANA_TIME_ZONE} may only be a valid string from IANA time zone
    // database. For example, "CRON_TZ=America/New_York 1 * * * *", or
    // "TZ=America/New_York 1 * * * *".
    string cron = 1;

    // Optional. If true, syncs the FeatureView in a continuous manner to Online
    // Store.
    bool continuous = 2 [(google.api.field_behavior) = OPTIONAL];
  }

  // Configuration for vector indexing.
  message IndexConfig {
    // Configuration options for using brute force search.
    message BruteForceConfig {}

    // Configuration options for the tree-AH algorithm.
    message TreeAHConfig {
      // Optional. Number of embeddings on each leaf node. The default value is
      // 1000 if not set.
      optional int64 leaf_node_embedding_count = 1
          [(google.api.field_behavior) = OPTIONAL];
    }

    // The distance measure used in nearest neighbor search.
    enum DistanceMeasureType {
      // Should not be set.
      DISTANCE_MEASURE_TYPE_UNSPECIFIED = 0;

      // Euclidean (L_2) Distance.
      SQUARED_L2_DISTANCE = 1;

      // Cosine Distance. Defined as 1 - cosine similarity.
      //
      // We strongly suggest using DOT_PRODUCT_DISTANCE + UNIT_L2_NORM instead
      // of COSINE distance. Our algorithms have been more optimized for
      // DOT_PRODUCT distance which, when combined with UNIT_L2_NORM, is
      // mathematically equivalent to COSINE distance and results in the same
      // ranking.
      COSINE_DISTANCE = 2;

      // Dot Product Distance. Defined as a negative of the dot product.
      DOT_PRODUCT_DISTANCE = 3;
    }

    // The configuration with regard to the algorithms used for efficient
    // search.
    oneof algorithm_config {
      // Optional. Configuration options for the tree-AH algorithm (Shallow tree
      // + Asymmetric Hashing). Please refer to this paper for more details:
      // https://arxiv.org/abs/1908.10396
      TreeAHConfig tree_ah_config = 6 [(google.api.field_behavior) = OPTIONAL];

      // Optional. Configuration options for using brute force search, which
      // simply implements the standard linear search in the database for each
      // query. It is primarily meant for benchmarking and to generate the
      // ground truth for approximate search.
      BruteForceConfig brute_force_config = 7
          [(google.api.field_behavior) = OPTIONAL];
    }

    // Optional. Column of embedding. This column contains the source data to
    // create index for vector search. embedding_column must be set when using
    // vector search.
    string embedding_column = 1 [(google.api.field_behavior) = OPTIONAL];

    // Optional. Columns of features that're used to filter vector search
    // results.
    repeated string filter_columns = 2 [(google.api.field_behavior) = OPTIONAL];

    // Optional. Column of crowding. This column contains crowding attribute
    // which is a constraint on a neighbor list produced by
    // [FeatureOnlineStoreService.SearchNearestEntities][google.cloud.aiplatform.v1.FeatureOnlineStoreService.SearchNearestEntities]
    // to diversify search results. If
    // [NearestNeighborQuery.per_crowding_attribute_neighbor_count][google.cloud.aiplatform.v1.NearestNeighborQuery.per_crowding_attribute_neighbor_count]
    // is set to K in
    // [SearchNearestEntitiesRequest][google.cloud.aiplatform.v1.SearchNearestEntitiesRequest],
    // it's guaranteed that no more than K entities of the same crowding
    // attribute are returned in the response.
    string crowding_column = 3 [(google.api.field_behavior) = OPTIONAL];

    // Optional. The number of dimensions of the input embedding.
    optional int32 embedding_dimension = 4
        [(google.api.field_behavior) = OPTIONAL];

    // Optional. The distance measure used in nearest neighbor search.
    DistanceMeasureType distance_measure_type = 5
        [(google.api.field_behavior) = OPTIONAL];
  }

  // A Feature Registry source for features that need to be synced to Online
  // Store.
  message FeatureRegistrySource {
    // Features belonging to a single feature group that will be
    // synced to Online Store.
    message FeatureGroup {
      // Required. Identifier of the feature group.
      string feature_group_id = 1 [(google.api.field_behavior) = REQUIRED];

      // Required. Identifiers of features under the feature group.
      repeated string feature_ids = 2 [(google.api.field_behavior) = REQUIRED];
    }

    // Required. List of features that need to be synced to Online Store.
    repeated FeatureGroup feature_groups = 1
        [(google.api.field_behavior) = REQUIRED];

    // Optional. The project number of the parent project of the Feature Groups.
    optional int64 project_number = 2 [(google.api.field_behavior) = OPTIONAL];
  }

  // A Vertex Rag source for features that need to be synced to Online
  // Store.
  message VertexRagSource {
    // Required. The BigQuery view/table URI that will be materialized on each
    // manual sync trigger. The table/view is expected to have the following
    // columns and types at least:
    //  - `corpus_id` (STRING, NULLABLE/REQUIRED)
    //  - `file_id` (STRING, NULLABLE/REQUIRED)
    //  - `chunk_id` (STRING, NULLABLE/REQUIRED)
    //  - `chunk_data_type` (STRING, NULLABLE/REQUIRED)
    //  - `chunk_data` (STRING, NULLABLE/REQUIRED)
    //  - `embeddings` (FLOAT, REPEATED)
    //  - `file_original_uri` (STRING, NULLABLE/REQUIRED)
    string uri = 1 [(google.api.field_behavior) = REQUIRED];

    // Optional. The RAG corpus id corresponding to this FeatureView.
    int64 rag_corpus_id = 2 [(google.api.field_behavior) = OPTIONAL];
  }

  // Configuration for FeatureViews created in Optimized FeatureOnlineStore.
  message OptimizedConfig {
    // Optional. A description of resources that the FeatureView uses, which to
    // large degree are decided by Vertex AI, and optionally allows only a
    // modest additional configuration. If min_replica_count is not set, the
    // default value is 2. If max_replica_count is not set, the default value
    // is 6. The max allowed replica count is 1000.
    AutomaticResources automatic_resources = 7
        [(google.api.field_behavior) = OPTIONAL];
  }

  // Service agent type used during data sync.
  enum ServiceAgentType {
    // By default, the project-level Vertex AI Service Agent is enabled.
    SERVICE_AGENT_TYPE_UNSPECIFIED = 0;

    // Indicates the project-level Vertex AI Service Agent
    // (https://cloud.google.com/vertex-ai/docs/general/access-control#service-agents)
    // will be used during sync jobs.
    SERVICE_AGENT_TYPE_PROJECT = 1;

    // Enable a FeatureView service account to be created by Vertex AI and
    // output in the field `service_account_email`. This service account will
    // be used to read from the source BigQuery table during sync.
    SERVICE_AGENT_TYPE_FEATURE_VIEW = 2;
  }

  oneof source {
    // Optional. Configures how data is supposed to be extracted from a BigQuery
    // source to be loaded onto the FeatureOnlineStore.
    BigQuerySource big_query_source = 6
        [(google.api.field_behavior) = OPTIONAL];

    // Optional. Configures the features from a Feature Registry source that
    // need to be loaded onto the FeatureOnlineStore.
    FeatureRegistrySource feature_registry_source = 9
        [(google.api.field_behavior) = OPTIONAL];

    // Optional. The Vertex RAG Source that the FeatureView is linked to.
    VertexRagSource vertex_rag_source = 18
        [(google.api.field_behavior) = OPTIONAL];
  }

  // Identifier. Name of the FeatureView. Format:
  // `projects/{project}/locations/{location}/featureOnlineStores/{feature_online_store}/featureViews/{feature_view}`
  string name = 1 [(google.api.field_behavior) = IDENTIFIER];

  // Output only. Timestamp when this FeatureView was created.
  google.protobuf.Timestamp create_time = 2
      [(google.api.field_behavior) = OUTPUT_ONLY];

  // Output only. Timestamp when this FeatureView was last updated.
  google.protobuf.Timestamp update_time = 3
      [(google.api.field_behavior) = OUTPUT_ONLY];

  // Optional. Used to perform consistent read-modify-write updates. If not set,
  // a blind "overwrite" update happens.
  string etag = 4 [(google.api.field_behavior) = OPTIONAL];

  // Optional. The labels with user-defined metadata to organize your
  // FeatureViews.
  //
  // Label keys and values can be no longer than 64 characters
  // (Unicode codepoints), can only contain lowercase letters, numeric
  // characters, underscores and dashes. International characters are allowed.
  //
  // See https://goo.gl/xmQnxf for more information on and examples of labels.
  // No more than 64 user labels can be associated with one
  // FeatureOnlineStore(System labels are excluded)." System reserved label keys
  // are prefixed with "aiplatform.googleapis.com/" and are immutable.
  map<string, string> labels = 5 [(google.api.field_behavior) = OPTIONAL];

  // Configures when data is to be synced/updated for this FeatureView. At the
  // end of the sync the latest featureValues for each entityId of this
  // FeatureView are made ready for online serving.
  SyncConfig sync_config = 7;

  // Optional. Configuration for index preparation for vector search. It
  // contains the required configurations to create an index from source data,
  // so that approximate nearest neighbor (a.k.a ANN) algorithms search can be
  // performed during online serving.
  IndexConfig index_config = 15 [(google.api.field_behavior) = OPTIONAL];

  // Optional. Configuration for FeatureView created under Optimized
  // FeatureOnlineStore.
  OptimizedConfig optimized_config = 16
      [(google.api.field_behavior) = OPTIONAL];

  // Optional. Service agent type used during data sync. By default, the Vertex
  // AI Service Agent is used. When using an IAM Policy to isolate this
  // FeatureView within a project, a separate service account should be
  // provisioned by setting this field to `SERVICE_AGENT_TYPE_FEATURE_VIEW`.
  // This will generate a separate service account to access the BigQuery source
  // table.
  ServiceAgentType service_agent_type = 14
      [(google.api.field_behavior) = OPTIONAL];

  // Output only. A Service Account unique to this FeatureView. The role
  // bigquery.dataViewer should be granted to this service account to allow
  // Vertex AI Feature Store to sync data to the online store.
  string service_account_email = 13 [(google.api.field_behavior) = OUTPUT_ONLY];

  // Output only. Reserved for future use.
  bool satisfies_pzs = 19 [(google.api.field_behavior) = OUTPUT_ONLY];

  // Output only. Reserved for future use.
  bool satisfies_pzi = 20 [(google.api.field_behavior) = OUTPUT_ONLY];
}
