// Copyright 2025 Google LLC
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

syntax = "proto3";

package google.cloud.aiplatform.v1;

import "google/api/annotations.proto";
import "google/api/client.proto";
import "google/api/field_behavior.proto";
import "google/api/resource.proto";
import "google/cloud/aiplatform/v1/content.proto";

option csharp_namespace = "Google.Cloud.AIPlatform.V1";
option go_package = "cloud.google.com/go/aiplatform/apiv1/aiplatformpb;aiplatformpb";
option java_multiple_files = true;
option java_outer_classname = "DataFoundryServiceProto";
option java_package = "com.google.cloud.aiplatform.v1";
option php_namespace = "Google\\Cloud\\AIPlatform\\V1";
option ruby_package = "Google::Cloud::AIPlatform::V1";

// Service for generating and preparing datasets for Gen AI evaluation.
service DataFoundryService {
  option (google.api.default_host) = "aiplatform.googleapis.com";
  option (google.api.oauth_scopes) =
      "https://www.googleapis.com/auth/cloud-platform";

  // Generates synthetic data based on the provided configuration.
  rpc GenerateSyntheticData(GenerateSyntheticDataRequest)
      returns (GenerateSyntheticDataResponse) {
    option (google.api.http) = {
      post: "/v1/{location=projects/*/locations/*}:generateSyntheticData"
      body: "*"
    };
  }
}

// Request message for DataFoundryService.GenerateSyntheticData.
message GenerateSyntheticDataRequest {
  // The generation strategy to use.
  oneof strategy {
    // Generate data from a high-level task description.
    TaskDescriptionStrategy task_description = 3;
  }

  // Required. The resource name of the Location to run the job.
  // Format: `projects/{project}/locations/{location}`
  string location = 1 [
    (google.api.field_behavior) = REQUIRED,
    (google.api.resource_reference) = {
      type: "locations.googleapis.com/Location"
    }
  ];

  // Required. The number of synthetic examples to generate.
  // For this stateless API, the count is limited to a small number.
  int32 count = 2 [(google.api.field_behavior) = REQUIRED];

  // Required. The schema of the desired output, defined by a list of fields.
  repeated OutputFieldSpec output_field_specs = 4
      [(google.api.field_behavior) = REQUIRED];

  // Optional. A list of few-shot examples to guide the model's output style
  // and format.
  repeated SyntheticExample examples = 5
      [(google.api.field_behavior) = OPTIONAL];
}

// Represents a single named field within a SyntheticExample.
message SyntheticField {
  // Optional. The name of the field.
  string field_name = 1 [(google.api.field_behavior) = OPTIONAL];

  // Required. The content of the field.
  Content content = 2 [(google.api.field_behavior) = REQUIRED];
}

// Represents a single synthetic example, composed of multiple fields.
// Used for providing few-shot examples in the request and for returning
// generated examples in the response.
message SyntheticExample {
  // Required. A list of fields that constitute an example.
  repeated SyntheticField fields = 1 [(google.api.field_behavior) = REQUIRED];
}

// Defines a specification for a single output field.
message OutputFieldSpec {
  // The data type of the field.
  enum FieldType {
    // Field type is unspecified.
    FIELD_TYPE_UNSPECIFIED = 0;

    // Arbitrary content field type.
    CONTENT = 1;

    // Text field type.
    TEXT = 2;

    // Image field type.
    IMAGE = 3;

    // Audio field type.
    AUDIO = 4;
  }

  // Required. The name of the output field.
  string field_name = 1 [(google.api.field_behavior) = REQUIRED];

  // Optional. Optional, but recommended. Additional guidance specific to this
  // field to provide targeted instructions for the LLM to generate the content
  // of a single output field. While the LLM can sometimes infer content from
  // the field name, providing explicit guidance is preferred.
  string guidance = 2 [(google.api.field_behavior) = OPTIONAL];

  // Optional. The data type of the field. Defaults to CONTENT if not set.
  FieldType field_type = 3 [(google.api.field_behavior) = OPTIONAL];
}

// Defines a generation strategy based on a high-level task description.
message TaskDescriptionStrategy {
  // Required. A high-level description of the synthetic data to be generated.
  string task_description = 1 [(google.api.field_behavior) = REQUIRED];
}

// The response containing the generated data.
message GenerateSyntheticDataResponse {
  // A list of generated synthetic examples.
  repeated SyntheticExample synthetic_examples = 1;
}
