// Copyright 2023 Google LLC
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

syntax = "proto3";

package dataform;

option java_package = "com.dataform.protos";
option java_outer_classname = "ConfigsMeta";
option java_multiple_files = true;

option go_package = "github.com/dataform-co/dataform/protos/dataform";

// Workflow Settings defines the contents of the `workflow_settings.yaml`
// configuration file.
message WorkflowSettings {
  // The desired dataform core version to compile against.
  string dataform_core_version = 1;

  // Required. The default Google Cloud project (database).
  string default_project = 2;

  // Required. The default dataset (schema).
  string default_dataset = 3;

  // Required. The default BigQuery location to use. For more information on
  // BigQuery locations, see https://cloud.google.com/bigquery/docs/locations.
  string default_location = 4;

  // Required. The default dataset (schema) for assertions.
  string default_assertion_dataset = 5;

  // Optional. User-defined variables that are made available to project code
  // during compilation. An object containing a list of "key": value pairs.
  map<string, string> vars = 6;

  // Optional. The suffix to append to all Google Cloud project references.
  string project_suffix = 7;

  // Optional. The suffix to append to all dataset references.
  string dataset_suffix = 8;

  // Optional. The prefix to append to all action names.
  string name_prefix = 9;

  // Optional. Default runtime options for Notebook actions.
  NotebookRuntimeOptionsConfig default_notebook_runtime_options = 10;

  // Optional. The prefix to append to built-in assertion names.
  string builtin_assertion_name_prefix = 11;
}

// Action configs defines the contents of `actions.yaml` configuration files.
message ActionConfigs {
  repeated ActionConfig actions = 1;
}

// Action config defines the contents of `actions.yaml` configuration files.
message ActionConfig {
  // Target represents a unique action identifier.
  message Target {
    // The Google Cloud project (database) of the action.
    string project = 1;

    // The dataset (schema) of the action. For notebooks, this is the location.
    string dataset = 2;

    // The name of the action.
    string name = 4;

    // flag for when we want to add assertions of this dependency in
    // dependency_targets as well.
    bool include_dependent_assertions = 5;
  }

  message ColumnDescriptor {
    // The identifier for the column, using multiple parts for nested records.
    repeated string path = 1;

    // A text description of the column.
    string description = 2;

    // A list of BigQuery policy tags that will be applied to the column.
    repeated string bigquery_policy_tags = 3;

    // A list of tags for this column which will be applied.
    repeated string tags = 4;
  }

  // Options for shorthand specifying assertions, useable for some table-based
  // action types.
  message TableAssertionsConfig {
    // Column(s) which constitute the dataset's unique key index.
    //  If set, the resulting assertion will fail if there is more than one row
    //  in the dataset with the same values for all of these column(s).
    repeated string unique_key = 1;

    // Combinations of column(s), each of which should constitute a unique key
    // index for the dataset. If set, the resulting assertion(s) will fail if
    // there is more than one row in the dataset with the same values for all of
    // the column(s) in the unique key(s).
    message UniqueKey {
      repeated string unique_key = 1;
    }
    repeated UniqueKey unique_keys = 2;

    // Column(s) which may never be `NULL`.
    // If set, the resulting assertion will fail if any row contains `NULL`
    // values for these column(s).
    repeated string non_null = 3;

    // General condition(s) which should hold true for all rows in the dataset.
    // If set, the resulting assertion will fail if any row violates any of
    // these condition(s).
    repeated string row_conditions = 4;
  }

  message TableConfig {
    // The name of the table.
    string name = 1;

    // The dataset (schema) of the table.
    string dataset = 2;

    // The Google Cloud project (database) of the table.
    string project = 3;

    // Targets of actions that this action is dependent on.
    repeated Target dependency_targets = 4;

    // Path to the source file that the contents of the action is loaded from.
    string filename = 5;

    // A list of user-defined tags with which the action should be labeled.
    repeated string tags = 6;

    // If set to true, this action will not be executed. However, the action can
    // still be depended upon. Useful for temporarily turning off broken
    // actions.
    bool disabled = 7;

    // Queries to run before `query`. This can be useful for granting
    // permissions.
    repeated string pre_operations = 8;

    // Queries to run after `query`.
    repeated string post_operations = 9;

    // Description of the table.
    string description = 10;

    // Descriptions of columns within the table.
    repeated ColumnDescriptor columns = 11;

    // The key by which to partition the table. Typically the name of a
    // timestamp or the date column. See
    // https://cloud.google.com/dataform/docs/partitions-clusters.
    string partition_by = 12;

    // The number of days for which BigQuery stores data in each partition.
    // The setting applies to all partitions in a table, but is calculated
    // independently for each partition based on the partition time.
    int32 partition_expiration_days = 13;

    // Declares whether the partitioned table requires a WHERE clause
    // predicate filter that filters the partitioning column.
    bool require_partition_filter = 14;

    // The keys by which to cluster partitions by. See
    // https://cloud.google.com/dataform/docs/partitions-clusters.
    repeated string cluster_by = 15;

    // Key-value pairs for BigQuery labels.
    map<string, string> labels = 16;

    // Key-value pairs of additional options to pass to the BigQuery API. Some
    // options, for example, partitionExpirationDays, have dedicated
    // type/validity checked fields. For such options, use the dedicated fields.
    map<string, string> additional_options = 17;

    // When set to true, assertions dependent upon any dependency will
    // be add as dedpendency to this action
    bool depend_on_dependency_assertions = 18;

    // Assertions to be run on the dataset.
    // If configured, relevant assertions will automatically be created and run
    // as a dependency of this dataset.
    TableAssertionsConfig assertions = 19;

    // If true, this indicates that the action only depends on data from
    // explicitly-declared dependencies. Otherwise if false, it indicates that
    // the  action depends on data from a source which has not been declared as
    // a dependency.
    bool hermetic = 20;
  }

  message ViewConfig {
    // The name of the view.
    string name = 1;

    // The dataset (schema) of the view.
    string dataset = 2;

    // The Google Cloud project (database) of the view.
    string project = 3;

    // Targets of actions that this action is dependent on.
    repeated Target dependency_targets = 4;

    // Path to the source file that the contents of the action is loaded from.
    string filename = 5;

    // A list of user-defined tags with which the action should be labeled.
    repeated string tags = 6;

    // If set to true, this action will not be executed. However, the action can
    // still be depended upon. Useful for temporarily turning off broken
    // actions.
    bool disabled = 7;

    // Queries to run before `query`. This can be useful for granting
    // permissions.
    repeated string pre_operations = 8;

    // Queries to run after `query`.
    repeated string post_operations = 9;

    // Applies the materialized view optimization, see
    // https://cloud.google.com/bigquery/docs/materialized-views-intro.
    bool materialized = 10;

    // Description of the view.
    string description = 11;

    // Descriptions of columns within the table.
    repeated ColumnDescriptor columns = 12;

    // Key-value pairs for BigQuery labels.
    map<string, string> labels = 13;

    // Key-value pairs of additional options to pass to the BigQuery API. Some
    // options, for example, partitionExpirationDays, have dedicated
    // type/validity checked fields. For such options, use the dedicated fields.
    map<string, string> additional_options = 14;

    // When set to true, assertions dependent upon any dependency will
    // be add as dedpendency to this action
    bool depend_on_dependency_assertions = 15;

    // If true, this indicates that the action only depends on data from
    // explicitly-declared dependencies. Otherwise if false, it indicates that
    // the  action depends on data from a source which has not been declared as
    // a dependency.
    bool hermetic = 16;

    // Assertions to be run on the dataset.
    // If configured, relevant assertions will automatically be created and run
    // as a dependency of this dataset.
    TableAssertionsConfig assertions = 17;
  }

  enum OnSchemaChange {
    // Ignore any schema changes (default).
    IGNORE = 0;
    // Fails if the query would result in a new column(s) being added, deleted,
    // or renamed.
    FAIL = 1;
    // Does not block any new column(s) from being added.
    EXTEND = 2;
    // Does not block any new column(s) from being added, deleted or renamed.
    SYNCHRONIZE = 3;
  }

  message IncrementalTableConfig {
    // The name of the incremental table.
    string name = 1;

    // The dataset (schema) of the incremental table.
    string dataset = 2;

    // The Google Cloud project (database) of the incremental table.
    string project = 3;

    // Targets of actions that this action is dependent on.
    repeated Target dependency_targets = 4;

    // Path to the source file that the contents of the action is loaded from.
    string filename = 5;

    // A list of user-defined tags with which the action should be labeled.
    repeated string tags = 6;

    // If set to true, this action will not be executed. However, the action can
    // still be depended upon. Useful for temporarily turning off broken
    // actions.
    bool disabled = 7;

    // Queries to run before `query`. This can be useful for granting
    // permissions.
    repeated string pre_operations = 8;

    // Queries to run after `query`.
    repeated string post_operations = 9;

    // If true, prevents the dataset from being rebuilt from scratch.
    bool protected = 10;

    // If set, unique key represents a set of names of columns that will act as
    // a the unique key. To enforce this, when updating the incremental
    // table, Dataform merges rows with `uniqueKey` instead of appending them.
    repeated string unique_key = 11;

    // Description of the incremental table.
    string description = 12;

    // Descriptions of columns within the table.
    repeated ColumnDescriptor columns = 13;

    // The key by which to partition the table. Typically the name of a
    // timestamp or the date column. See
    // https://cloud.google.com/dataform/docs/partitions-clusters.
    string partition_by = 14;

    // The number of days for which BigQuery stores data in each partition.
    // The setting applies to all partitions in a table, but is calculated
    // independently for each partition based on the partition time.
    int32 partition_expiration_days = 15;

    // Declares whether the partitioned table requires a WHERE clause
    // predicate filter that filters the partitioning column.
    bool require_partition_filter = 16;

    // SQL-based filter for when incremental updates are applied.
    string update_partition_filter = 17;

    // The keys by which to cluster partitions by. See
    // https://cloud.google.com/dataform/docs/partitions-clusters.
    repeated string cluster_by = 18;

    // Key-value pairs for BigQuery labels.
    map<string, string> labels = 19;

    // Key-value pairs of additional options to pass to the BigQuery API. Some
    // options, for example, partitionExpirationDays, have dedicated
    // type/validity checked fields. For such options, use the dedicated fields.
    map<string, string> additional_options = 20;

    // When set to true, assertions dependent upon any dependency will
    // be add as dedpendency to this action
    bool depend_on_dependency_assertions = 21;

    // Assertions to be run on the dataset.
    // If configured, relevant assertions will automatically be created and run
    // as a dependency of this dataset.
    TableAssertionsConfig assertions = 22;

    // If true, this indicates that the action only depends on data from
    // explicitly-declared dependencies. Otherwise if false, it indicates that
    // the  action depends on data from a source which has not been declared as
    // a dependency.
    bool hermetic = 23;

    // Defines the action behavior if the selected columns in the query
    // don't the match columns in the target table.
    OnSchemaChange on_schema_change = 24;
  }

  message AssertionConfig {
    // The name of the assertion.
    string name = 1;

    // The dataset (schema) of the assertion.
    string dataset = 2;

    // The Google Cloud project (database) of the assertion.
    string project = 3;

    // Targets of actions that this action is dependent on.
    repeated Target dependency_targets = 4;

    // Path to the source file that the contents of the action is loaded from.
    string filename = 5;

    // A list of user-defined tags with which the action should be labeled.
    repeated string tags = 6;

    // If set to true, this action will not be executed. However, the action can
    // still be depended upon. Useful for temporarily turning off broken
    // actions.
    bool disabled = 7;

    // Description of the assertion.
    string description = 8;

    // If true, this indicates that the action only depends on data from
    // explicitly-declared dependencies. Otherwise if false, it indicates that
    // the  action depends on data from a source which has not been declared as
    // a dependency.
    bool hermetic = 9;

    // If true, assertions dependent upon any of the dependencies are added as
    // dependencies as well.
    bool depend_on_dependency_assertions = 10;
  }

  message OperationConfig {
    // The name of the operation.
    string name = 1;

    // The dataset (schema) of the operation.
    string dataset = 2;

    // The Google Cloud project (database) of the operation.
    string project = 3;

    // Targets of actions that this action is dependent on.
    repeated Target dependency_targets = 4;

    // Path to the source file that the contents of the action is loaded from.
    string filename = 5;

    // A list of user-defined tags with which the action should be labeled.
    repeated string tags = 6;

    // If set to true, this action will not be executed. However, the action can
    // still be depended upon. Useful for temporarily turning off broken
    // actions.
    bool disabled = 7;

    // Declares that this action creates a dataset which should be
    // referenceable as a dependency target, for example by using the `ref`
    // function.
    bool has_output = 8;

    // Description of the operation.
    string description = 9;

    // Descriptions of columns within the operation. Can only be set if
    // hasOutput is true.
    repeated ColumnDescriptor columns = 10;

    // When set to true, assertions dependent upon any dependency will
    // be add as dedpendency to this action
    bool depend_on_dependency_assertions = 11;

    // If true, this indicates that the action only depends on data from
    // explicitly-declared dependencies. Otherwise if false, it indicates that
    // the  action depends on data from a source which has not been declared as
    // a dependency.
    bool hermetic = 12;
  }

  message DeclarationConfig {
    // The name of the declaration.
    string name = 1;

    // The dataset (schema) of the declaration.
    string dataset = 2;

    // The Google Cloud project (database) of the declaration.
    string project = 3;

    // Description of the declaration.
    string description = 4;

    // Descriptions of columns within the declaration.
    repeated ColumnDescriptor columns = 5;
  }

  message NotebookConfig {
    // The name of the notebook.
    string name = 1;

    // The Google Cloud location of the notebook.
    string location = 2;

    // The Google Cloud project (database) of the notebook.
    string project = 3;

    // Targets of actions that this action is dependent on.
    repeated Target dependency_targets = 4;

    // Path to the source file that the contents of the action is loaded from.
    string filename = 5;

    // A list of user-defined tags with which the action should be labeled.
    repeated string tags = 6;

    // If set to true, this action will not be executed. However, the action can
    // still be depended upon. Useful for temporarily turning off broken
    // actions.
    bool disabled = 7;

    // Description of the notebook.
    string description = 8;

    // When set to true, assertions dependent upon any dependency will
    // be add as dedpendency to this action
    bool depend_on_dependency_assertions = 9;

    // A notebook runtime field definition could be added here, to allow atomic
    // runtime settings of notebooks.
  }

  message DataPreparationConfig {
    // The name of the data preparation.
    string name = 1;

    // The dataset (schema) of the destination table.
    string dataset = 13;

    // The Google Cloud project (database) of the destination table.
    string project = 14;

    // Targets of actions that this action is dependent on.
    repeated Target dependency_targets = 2;

    // Path to the source file that the contents of the action is loaded from.
    string filename = 3;

    // A list of user-defined tags with which the action should be labeled.
    repeated string tags = 4;

    // If set to true, this action will not be executed. However, the action can
    // still be depended upon. Useful for temporarily turning off broken
    // actions.
    bool disabled = 7;

    // Description of the data preparation.
    string description = 8;

    ErrorTableConfig error_table = 11;

    LoadModeConfig load_mode = 12;

    message ErrorTableConfig {
      // The name of the error table.
      string name = 1;

      // The dataset (schema) of the error table.
      string dataset = 2;

      // The Google Cloud project (database) of the error table.
      string project = 3;

      int32 retention_days = 4;
    }
  }

  oneof action {
    TableConfig table = 1;
    ViewConfig view = 2;
    IncrementalTableConfig incremental_table = 3;
    AssertionConfig assertion = 4;
    OperationConfig operation = 5;
    DeclarationConfig declaration = 6;
    NotebookConfig notebook = 7;
    DataPreparationConfig data_preparation = 8;
  }

  message LoadModeConfig {
    LoadMode mode = 1;
    // Required when mode is MAXIMUM or UNIQUE
    string incremental_column = 2;
  }

  enum LoadMode {
    // Replace existing table (default).
    REPLACE_TABLE = 0;
    // Insert into destination table.
    APPEND = 1;
    // Insert only records where the specified column value exceeds the existing
    // maximum value in the destination table.
    MAXIMUM = 2;
    // Insert only records where the specified column value is not already
    // present in the destination column values.
    UNIQUE = 3;
  }
}

message NotebookRuntimeOptionsConfig {
  oneof output_sink {
    // Storage bucket to output notebooks to after their execution.
    string output_bucket = 1;
  }

  // Colab runtime template (https://cloud.google.com/colab/docs/runtimes), from
  // which a runtime is created for notebook executions.
  string runtime_template_name = 2;
}
