// Copyright 2025 Google LLC
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

syntax = "proto3";

package google.cloud.aiplatform.v1beta1;

import "google/api/resource.proto";
import "google/cloud/aiplatform/v1beta1/explanation.proto";
import "google/cloud/aiplatform/v1beta1/io.proto";
import "google/cloud/aiplatform/v1beta1/machine_resources.proto";
import "google/cloud/aiplatform/v1beta1/model_monitoring_alert.proto";
import "google/type/interval.proto";

option csharp_namespace = "Google.Cloud.AIPlatform.V1Beta1";
option go_package = "cloud.google.com/go/aiplatform/apiv1beta1/aiplatformpb;aiplatformpb";
option java_multiple_files = true;
option java_outer_classname = "ModelMonitoringSpecProto";
option java_package = "com.google.cloud.aiplatform.v1beta1";
option php_namespace = "Google\\Cloud\\AIPlatform\\V1beta1";
option ruby_package = "Google::Cloud::AIPlatform::V1beta1";

// Monitoring monitoring job spec. It outlines the specifications for monitoring
// objectives, notifications, and result exports.
message ModelMonitoringSpec {
  // The monitoring objective spec.
  ModelMonitoringObjectiveSpec objective_spec = 1;

  // The model monitoring notification spec.
  ModelMonitoringNotificationSpec notification_spec = 2;

  // The Output destination spec for metrics, error logs, etc.
  ModelMonitoringOutputSpec output_spec = 3;
}

// Monitoring objectives spec.
message ModelMonitoringObjectiveSpec {
  // Data drift monitoring spec.
  // Data drift measures the distribution distance between the current dataset
  // and a baseline dataset. A typical use case is to detect data drift between
  // the recent production serving dataset and the training dataset, or to
  // compare the recent production dataset with a dataset from a previous
  // period.
  message DataDriftSpec {
    // Feature names / Prediction output names interested in monitoring.
    // These should be a subset of the input feature names or prediction output
    // names specified in the monitoring schema.
    // If the field is not specified all features / prediction outputs outlied
    // in the monitoring schema will be used.
    repeated string features = 1;

    // Supported metrics type:
    //  * l_infinity
    //  * jensen_shannon_divergence
    string categorical_metric_type = 2;

    // Supported metrics type:
    //  * jensen_shannon_divergence
    string numeric_metric_type = 3;

    // Default alert condition for all the categorical features.
    ModelMonitoringAlertCondition default_categorical_alert_condition = 4;

    // Default alert condition for all the numeric features.
    ModelMonitoringAlertCondition default_numeric_alert_condition = 5;

    // Per feature alert condition will override default alert condition.
    map<string, ModelMonitoringAlertCondition> feature_alert_conditions = 6;
  }

  // Feature attribution monitoring spec.
  message FeatureAttributionSpec {
    // Feature names interested in monitoring.
    // These should be a subset of the input feature names specified in the
    // monitoring schema. If the field is not specified all features outlied in
    // the monitoring schema will be used.
    repeated string features = 1;

    // Default alert condition for all the features.
    ModelMonitoringAlertCondition default_alert_condition = 2;

    // Per feature alert condition will override default alert condition.
    map<string, ModelMonitoringAlertCondition> feature_alert_conditions = 3;

    // The config of resources used by the Model Monitoring during the batch
    // explanation for non-AutoML models. If not set, `n1-standard-2` machine
    // type will be used by default.
    BatchDedicatedResources batch_explanation_dedicated_resources = 4;
  }

  // Tabular monitoring objective.
  message TabularObjective {
    // Input feature distribution drift monitoring spec.
    DataDriftSpec feature_drift_spec = 10;

    // Prediction output distribution drift monitoring spec.
    DataDriftSpec prediction_output_drift_spec = 11;

    // Feature attribution monitoring spec.
    FeatureAttributionSpec feature_attribution_spec = 12;
  }

  // The monitoring objective.
  oneof objective {
    // Tabular monitoring objective.
    TabularObjective tabular_objective = 1;
  }

  // The explanation spec.
  // This spec is required when the objectives spec includes feature attribution
  // objectives.
  ExplanationSpec explanation_spec = 3;

  // Baseline dataset.
  // It could be the training dataset or production serving dataset from a
  // previous period.
  ModelMonitoringInput baseline_dataset = 4;

  // Target dataset.
  ModelMonitoringInput target_dataset = 5;
}

// Specification for the export destination of monitoring results, including
// metrics, logs, etc.
message ModelMonitoringOutputSpec {
  // Google Cloud Storage base folder path for metrics, error logs, etc.
  GcsDestination gcs_base_directory = 1;
}

// Model monitoring data input spec.
message ModelMonitoringInput {
  // Input dataset spec.
  message ModelMonitoringDataset {
    // Dataset spec for data stored in Google Cloud Storage.
    message ModelMonitoringGcsSource {
      // Supported data format.
      enum DataFormat {
        // Data format unspecified, used when this field is unset.
        DATA_FORMAT_UNSPECIFIED = 0;

        // CSV files.
        CSV = 1;

        // TfRecord files
        TF_RECORD = 2;

        // JsonL files.
        JSONL = 3;
      }

      // Google Cloud Storage URI to the input file(s). May contain
      // wildcards. For more information on wildcards, see
      // https://cloud.google.com/storage/docs/wildcards.
      string gcs_uri = 1;

      // Data format of the dataset.
      DataFormat format = 2;
    }

    // Dataset spec for data sotred in BigQuery.
    message ModelMonitoringBigQuerySource {
      oneof connection {
        // BigQuery URI to a table, up to 2000 characters long. All the columns
        // in the table will be selected. Accepted forms:
        //
        // *  BigQuery path. For example:
        // `bq://projectId.bqDatasetId.bqTableId`.
        string table_uri = 1;

        // Standard SQL to be used instead of the `table_uri`.
        string query = 2;
      }
    }

    // Choose one of supported data location for columnized dataset.
    oneof data_location {
      // Resource name of the Vertex AI managed dataset.
      string vertex_dataset = 1 [(google.api.resource_reference) = {
        type: "aiplatform.googleapis.com/Dataset"
      }];

      // Google Cloud Storage data source.
      ModelMonitoringGcsSource gcs_source = 2;

      // BigQuery data source.
      ModelMonitoringBigQuerySource bigquery_source = 6;
    }

    // The timestamp field. Usually for serving data.
    string timestamp_field = 7;
  }

  // Data from Vertex AI Batch prediction job output.
  message BatchPredictionOutput {
    // Vertex AI Batch prediction job resource name. The job must match the
    // model version specified in [ModelMonitor].[model_monitoring_target].
    string batch_prediction_job = 1 [(google.api.resource_reference) = {
      type: "aiplatform.googleapis.com/BatchPredictionJob"
    }];
  }

  // Data from Vertex AI Endpoint request response logging.
  message VertexEndpointLogs {
    // List of endpoint resource names. The endpoints must enable the logging
    // with the [Endpoint].[request_response_logging_config], and must contain
    // the deployed model corresponding to the model version specified in
    // [ModelMonitor].[model_monitoring_target].
    repeated string endpoints = 1 [(google.api.resource_reference) = {
      type: "aiplatform.googleapis.com/Endpoint"
    }];
  }

  // Time offset setting.
  message TimeOffset {
    // [offset] is the time difference from the cut-off time.
    // For scheduled jobs, the cut-off time is the scheduled time.
    // For non-scheduled jobs, it's the time when the job was created.
    // Currently we support the following format:
    // 'w|W': Week, 'd|D': Day, 'h|H': Hour
    // E.g. '1h' stands for 1 hour, '2d' stands for 2 days.
    string offset = 1;

    // [window] refers to the scope of data selected for analysis.
    // It allows you to specify the quantity of data you wish to examine.
    // Currently we support the following format:
    // 'w|W': Week, 'd|D': Day, 'h|H': Hour
    // E.g. '1h' stands for 1 hour, '2d' stands for 2 days.
    string window = 2;
  }

  // Dataset source.
  oneof dataset {
    // Columnized dataset.
    ModelMonitoringDataset columnized_dataset = 1;

    // Vertex AI Batch prediction Job.
    BatchPredictionOutput batch_prediction_output = 2;

    // Vertex AI Endpoint request & response logging.
    VertexEndpointLogs vertex_endpoint_logs = 3;
  }

  // Time specification for the dataset.
  oneof time_spec {
    // The time interval (pair of start_time and end_time) for which results
    // should be returned.
    google.type.Interval time_interval = 6;

    // The time offset setting for which results should be returned.
    TimeOffset time_offset = 7;
  }
}

// Notification spec(email, notification channel) for model monitoring
// statistics/alerts.
message ModelMonitoringNotificationSpec {
  // The config for email alerts.
  message EmailConfig {
    // The email addresses to send the alerts.
    repeated string user_emails = 1;
  }

  // Google Cloud Notification Channel config.
  message NotificationChannelConfig {
    // Resource names of the NotificationChannels.
    // Must be of the format
    // `projects/<project_id_or_number>/notificationChannels/<channel_id>`
    string notification_channel = 1;
  }

  // Email alert config.
  EmailConfig email_config = 1;

  // Dump the anomalies to Cloud Logging. The anomalies will be put to json
  // payload encoded from proto
  // [google.cloud.aiplatform.logging.ModelMonitoringAnomaliesLogEntry][].
  // This can be further sinked to Pub/Sub or any other services supported
  // by Cloud Logging.
  bool enable_cloud_logging = 2;

  // Notification channel config.
  repeated NotificationChannelConfig notification_channel_configs = 3;
}
