// Copyright 2025 Google LLC
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

syntax = "proto3";

package google.chromeos.uidetection.v1;

import "google/api/annotations.proto";
import "google/api/client.proto";
import "google/api/field_behavior.proto";

option go_package = "google.golang.org/genproto/googleapis/chromeos/uidetection/v1;uidetection";
option java_multiple_files = true;
option java_outer_classname = "UiDetectionProto";
option java_package = "com.google.chromeos.uidetection.v1";

// Provides image-based UI detection service.
service UiDetectionService {
  option (google.api.default_host) = "chromeosuidetection.googleapis.com";

  // Runs the detection.
  rpc ExecuteDetection(UiDetectionRequest) returns (UiDetectionResponse) {
    option (google.api.http) = {
      get: "/v1/executeDetection:execute"
    };
  }
}

// Request message for UI detection.
message UiDetectionRequest {
  // Required. Required field that represents a PNG image.
  bytes image_png = 1 [(google.api.field_behavior) = REQUIRED];

  // Required. Required field that indicates the detection type.
  DetectionRequest request = 2 [(google.api.field_behavior) = REQUIRED];

  // Indicates whether to fall back to resizing the image if no elements are
  // detected.
  optional bool resize_image = 3;

  // Deprecated as of 2023-03-29. Use test_metadata instead.
  string test_id = 4 [deprecated = true];

  // Optional. Metadata about the client for analytics.
  TestMetadata test_metadata = 5 [(google.api.field_behavior) = OPTIONAL];

  // Optional. Indicates whether to always start by resizing the image.
  bool force_image_resizing = 6 [(google.api.field_behavior) = OPTIONAL];

  // Optional. Indicates whether to respond with the transformed image png.
  bool return_transformed_image = 7 [(google.api.field_behavior) = OPTIONAL];
}

// Detection type specifies what to detect in the image.
message DetectionRequest {
  oneof detection_request_type {
    // Detection type for word detection.
    WordDetectionRequest word_detection_request = 1;

    // Detection type for text block detection.
    TextBlockDetectionRequest text_block_detection_request = 2;

    // Detection type for custom icon detection.
    CustomIconDetectionRequest custom_icon_detection_request = 3;
  }
}

// Metadata about the client test and test device.
message TestMetadata {
  // Name of the calling test. For example, 'tast.uidetection.BasicDetections'.
  string test_id = 1;

  // Board name of the ChromeOS device under test. For example, 'volteer'.
  string board = 2;

  // Model name of the ChromeOS device under test. For example, 'volet'.
  string model = 3;

  // ChromeOS build of the device under test.
  // For example, 'volteer-release/R110-15275.0.0-75031-8794956681263330561'.
  string cros_build = 4;
}

// Detection type for word detection.
message WordDetectionRequest {
  // Required. The word to locate in the image.
  string word = 1 [(google.api.field_behavior) = REQUIRED];

  // Indicating whether the query string is a regex or not.
  bool regex_mode = 2;

  // Indicating whether the detection is an approximate match.
  bool disable_approx_match = 3;

  // Levenshtein distance threshold.
  // Applicable only if regex_mode is False.
  optional int32 max_edit_distance = 4;
}

// Detection type for text block detection.
message TextBlockDetectionRequest {
  // Required. The text block consisting a list of words to locate in the image.
  repeated string words = 1 [(google.api.field_behavior) = REQUIRED];

  // Indicating whether the query string is a regex or not.
  bool regex_mode = 2;

  // Indicating whether the detection is an approximate match.
  bool disable_approx_match = 3;

  // Levenshtein distance threshold.
  // Applicable only if regex_mode is False.
  optional int32 max_edit_distance = 4;

  // Indicating whether the detection result should only contain the specified
  // words.
  bool specified_words_only = 5;
}

// Detection type for custom icon detection.
message CustomIconDetectionRequest {
  // Required. Required field that represents an icon in PNG format.
  bytes icon_png = 1 [(google.api.field_behavior) = REQUIRED];

  // Set match_count to -1 to not limit the number of matches.
  int32 match_count = 2;

  // Confidence threshold in the range [0.0, 1.0] below which the matches will
  // be considered as non-existent.
  double min_confidence_threshold = 3;
}

// Response message for UI detection.
message UiDetectionResponse {
  // Locations of matching UI elements.
  repeated BoundingBox bounding_boxes = 1;

  // The transformed detection image PNG, if requested and transformations were
  // applied.
  bytes transformed_image_png = 2;

  // The amount the original image was scaled by to make the transformed image.
  // 1.0 if the detection result is not based on a resized image.
  float resizing_scale_factor = 3;
}

// The location of a UI element.
// A bounding box is reprensented by its top-left point [left, top]
// and its bottom-right point [right, bottom].
message BoundingBox {
  // The text found in the bounding box.
  string text = 1;

  // The y-coordinate of the top-left point.
  int32 top = 2;

  // The x-coordinate of the top-left point.
  int32 left = 3;

  // The y-coordinate of the bottom-right point.
  int32 bottom = 4;

  // The x-coordinate of the bottom-right point.
  int32 right = 5;
}
