// Copyright 2018 Google LLC.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//

syntax = "proto3";

package google.cloud.bigquery.storage.v1beta1;

import "google/cloud/bigquery/storage/v1beta1/avro.proto";
import "google/cloud/bigquery/storage/v1beta1/read_options.proto";
import "google/cloud/bigquery/storage/v1beta1/table_reference.proto";
import "google/protobuf/empty.proto";
import "google/protobuf/timestamp.proto";

option go_package = "google.golang.org/genproto/googleapis/cloud/bigquery/storage/v1beta1;storage";
option java_package = "com.google.cloud.bigquery.storage.v1beta1";


// BigQuery storage API.
//
// The BigQuery storage API can be used to read data stored in BigQuery.
service BigQueryStorage {
  // Creates a new read session. A read session divides the contents of a
  // BigQuery table into one or more streams, which can then be used to read
  // data from the table. The read session also specifies properties of the
  // data to be read, such as a list of columns or a push-down filter describing
  // the rows to be returned.
  //
  // A particular row can be read by at most one stream. When the caller has
  // reached the end of each stream in the session, then all the data in the
  // table has been read.
  //
  // Read sessions automatically expire 24 hours after they are created and do
  // not require manual clean-up by the caller.
  rpc CreateReadSession(CreateReadSessionRequest) returns (ReadSession) {
  }

  // Reads rows from the table in the format prescribed by the read session.
  // Each response contains one or more table rows, up to a maximum of 10 MiB
  // per response; read requests which attempt to read individual rows larger
  // than this will fail.
  //
  // Each request also returns a set of stream statistics reflecting the
  // estimated total number of rows in the read stream. This number is computed
  // based on the total table size and the number of active streams in the read
  // session, and may change as other streams continue to read data.
  rpc ReadRows(ReadRowsRequest) returns (stream ReadRowsResponse) {
  }

  // Creates additional streams for a ReadSession. This API can be used to
  // dynamically adjust the parallelism of a batch processing task upwards by
  // adding additional workers.
  rpc BatchCreateReadSessionStreams(BatchCreateReadSessionStreamsRequest) returns (BatchCreateReadSessionStreamsResponse) {
  }

  // Triggers the graceful termination of a single stream in a ReadSession. This
  // API can be used to dynamically adjust the parallelism of a batch processing
  // task downwards without losing data.
  //
  // This API does not delete the stream -- it remains visible in the
  // ReadSession, and any data processed by the stream is not released to other
  // streams. However, no additional data will be assigned to the stream once
  // this call completes. Callers must continue reading data on the stream until
  // the end of the stream is reached so that data which has already been
  // assigned to the stream will be processed.
  //
  // This method will return an error if there are no other live streams
  // in the Session, or if SplitReadStream() has been called on the given
  // Stream.
  rpc FinalizeStream(FinalizeStreamRequest) returns (google.protobuf.Empty) {
  }

  // Splits a given read stream into two Streams. These streams are referred to
  // as the primary and the residual of the split. The original stream can still
  // be read from in the same manner as before. Both of the returned streams can
  // also be read from, and the total rows return by both child streams will be
  // the same as the rows read from the original stream.
  //
  // Moreover, the two child streams will be allocated back to back in the
  // original Stream. Concretely, it is guaranteed that for streams Original,
  // Primary, and Residual, that Original[0-j] = Primary[0-j] and
  // Original[j-n] = Residual[0-m] once the streams have been read to
  // completion.
  //
  // This method is guaranteed to be idempotent.
  rpc SplitReadStream(SplitReadStreamRequest) returns (SplitReadStreamResponse) {
  }
}

// Information about a single data stream within a read session.
message Stream {
  // Name of the stream. In the form
  // `/projects/{project_id}/stream/{stream_id}`
  string name = 1;

  // Rows in the stream.
  int64 row_count = 2;
}

// Expresses a point within a given stream using an offset position.
message StreamPosition {
  // Identifier for a given Stream.
  Stream stream = 1;

  // Position in the stream.
  int64 offset = 2;
}

// Information returned from a `CreateReadSession` request.
message ReadSession {
  // Unique identifier for the session. In the form
  // `projects/{project_id}/sessions/{session_id}`
  string name = 1;

  // Time at which the session becomes invalid. After this time, subsequent
  // requests to read this Session will return errors.
  google.protobuf.Timestamp expire_time = 2;

  // The schema for the read. If read_options.selected_fields is set, the
  // schema may be different from the table schema as it will only contain
  // the selected fields.
  oneof schema {
    // Avro schema.
    AvroSchema avro_schema = 5;
  }

  // Streams associated with this session.
  repeated Stream streams = 4;

  // Table that this ReadSession is reading from.
  TableReference table_reference = 7;

  // Any modifiers which are applied when reading from the specified table.
  TableModifiers table_modifiers = 8;
}

// Creates a new read session, which may include additional options such as
// requested parallelism, projection filters and constraints.
message CreateReadSessionRequest {
  // Required. Reference to the table to read.
  TableReference table_reference = 1;

  // Required. String of the form "projects/your-project-id" indicating the
  // project this ReadSession is associated with. This is the project that will
  // be billed for usage.
  string parent = 6;

  // Optional. Any modifiers to the Table (e.g. snapshot timestamp).
  TableModifiers table_modifiers = 2;

  // Optional. Initial number of streams. If unset or 0, we will
  // provide a value of streams so as to produce reasonable throughput. Must be
  // non-negative. The number of streams may be lower than the requested number,
  // depending on the amount parallelism that is reasonable for the table and
  // the maximum amount of parallelism allowed by the system.
  //
  // Streams must be read starting from offset 0.
  int32 requested_streams = 3;

  // Optional. Read options for this session (e.g. column selection, filters).
  TableReadOptions read_options = 4;

  // Data output format. Currently default to Avro.
  DataFormat format = 5;
}

// Requesting row data via `ReadRows` must provide Stream position information.
message ReadRowsRequest {
  // Required. Identifier of the position in the stream to start reading from.
  // The offset requested must be less than the last row read from ReadRows.
  // Requesting a larger offset is undefined.
  StreamPosition read_position = 1;
}

// Progress information for a given Stream.
message StreamStatus {
  // Number of estimated rows in the current stream. May change over time as
  // different readers in the stream progress at rates which are relatively fast
  // or slow.
  int64 estimated_row_count = 1;
}

// Information on if the current connection is being throttled.
message ThrottleStatus {
  // How much this connection is being throttled.
  // 0 is no throttling, 100 is completely throttled.
  int32 throttle_percent = 1;
}

// Response from calling `ReadRows` may include row data, progress and
// throttling information.
message ReadRowsResponse {
  // Row data is returned in format specified during session creation.
  oneof rows {
    // Serialized row data in AVRO format.
    AvroRows avro_rows = 3;
  }

  // Estimated stream statistics.
  StreamStatus status = 2;

  // Throttling status. If unset, the latest response still describes
  // the current throttling status.
  ThrottleStatus throttle_status = 5;
}

// Information needed to request additional streams for an established read
// session.
message BatchCreateReadSessionStreamsRequest {
  // Required. Must be a non-expired session obtained from a call to
  // CreateReadSession. Only the name field needs to be set.
  ReadSession session = 1;

  // Required. Number of new streams requested. Must be positive.
  // Number of added streams may be less than this, see CreateReadSessionRequest
  // for more information.
  int32 requested_streams = 2;
}

// The response from `BatchCreateReadSessionStreams` returns the stream
// identifiers for the newly created streams.
message BatchCreateReadSessionStreamsResponse {
  // Newly added streams.
  repeated Stream streams = 1;
}

// Request information for invoking `FinalizeStream`.
message FinalizeStreamRequest {
  // Stream to finalize.
  Stream stream = 2;
}

// Request information for `SplitReadStream`.
message SplitReadStreamRequest {
  // Stream to split.
  Stream original_stream = 1;
}

// Response from `SplitReadStream`.
message SplitReadStreamResponse {
  // Primary stream. Will contain the beginning portion of
  // |original_stream|.
  Stream primary_stream = 1;

  // Remainder stream. Will contain the tail of |original_stream|.
  Stream remainder_stream = 2;
}

// Data format for input or output data.
enum DataFormat {
  // Data format is unspecified.
  DATA_FORMAT_UNSPECIFIED = 0;

  // Avro is a standard open source row based file format.
  // See https://avro.apache.org/ for more details.
  AVRO = 1;
}
