xref: /aosp_15_r20/external/googleapis/google/chromeos/uidetection/v1/ui_detection.proto (revision d5c09012810ac0c9f33fe448fb6da8260d444cc9)
1// Copyright 2023 Google LLC
2//
3// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at
6//
7//     http://www.apache.org/licenses/LICENSE-2.0
8//
9// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS,
11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12// See the License for the specific language governing permissions and
13// limitations under the License.
14
15syntax = "proto3";
16
17package google.chromeos.uidetection.v1;
18
19import "google/api/annotations.proto";
20import "google/api/client.proto";
21import "google/api/field_behavior.proto";
22
23option go_package = "google.golang.org/genproto/googleapis/chromeos/uidetection/v1;uidetection";
24option java_multiple_files = true;
25option java_outer_classname = "UiDetectionProto";
26option java_package = "com.google.chromeos.uidetection.v1";
27
28// Provides image-based UI detection service.
29service UiDetectionService {
30  option (google.api.default_host) = "chromeosuidetection.googleapis.com";
31
32  // Runs the detection.
33  rpc ExecuteDetection(UiDetectionRequest) returns (UiDetectionResponse) {
34    option (google.api.http) = {
35      get: "/v1/executeDetection:execute"
36    };
37  }
38}
39
40// Request message for UI detection.
41message UiDetectionRequest {
42  // Required. Required field that represents a PNG image.
43  bytes image_png = 1 [(google.api.field_behavior) = REQUIRED];
44
45  // Required. Required field that indicates the detection type.
46  DetectionRequest request = 2 [(google.api.field_behavior) = REQUIRED];
47
48  // Indicates whether to fall back to resizing the image if no elements are
49  // detected.
50  optional bool resize_image = 3;
51
52  // Deprecated as of 2023-03-29. Use test_metadata instead.
53  string test_id = 4 [deprecated = true];
54
55  // Optional. Metadata about the client for analytics.
56  TestMetadata test_metadata = 5 [(google.api.field_behavior) = OPTIONAL];
57
58  // Optional. Indicates whether to always start by resizing the image.
59  bool force_image_resizing = 6 [(google.api.field_behavior) = OPTIONAL];
60
61  // Optional. Indicates whether to respond with the transformed image png.
62  bool return_transformed_image = 7 [(google.api.field_behavior) = OPTIONAL];
63}
64
65// Detection type specifies what to detect in the image.
66message DetectionRequest {
67  oneof detection_request_type {
68    // Detection type for word detection.
69    WordDetectionRequest word_detection_request = 1;
70
71    // Detection type for text block detection.
72    TextBlockDetectionRequest text_block_detection_request = 2;
73
74    // Detection type for custom icon detection.
75    CustomIconDetectionRequest custom_icon_detection_request = 3;
76  }
77}
78
79// Metadata about the client test and test device.
80message TestMetadata {
81  // Name of the calling test. For example, 'tast.uidetection.BasicDetections'.
82  string test_id = 1;
83
84  // Board name of the ChromeOS device under test. For example, 'volteer'.
85  string board = 2;
86
87  // Model name of the ChromeOS device under test. For example, 'volet'.
88  string model = 3;
89
90  // ChromeOS build of the device under test.
91  // For example, 'volteer-release/R110-15275.0.0-75031-8794956681263330561'.
92  string cros_build = 4;
93}
94
95// Detection type for word detection.
96message WordDetectionRequest {
97  // Required. The word to locate in the image.
98  string word = 1 [(google.api.field_behavior) = REQUIRED];
99
100  // Indicating whether the query string is a regex or not.
101  bool regex_mode = 2;
102
103  // Indicating whether the detection is an approximate match.
104  bool disable_approx_match = 3;
105
106  // Levenshtein distance threshold.
107  // Applicable only if regex_mode is False.
108  optional int32 max_edit_distance = 4;
109}
110
111// Detection type for text block detection.
112message TextBlockDetectionRequest {
113  // Required. The text block consisting a list of words to locate in the image.
114  repeated string words = 1 [(google.api.field_behavior) = REQUIRED];
115
116  // Indicating whether the query string is a regex or not.
117  bool regex_mode = 2;
118
119  // Indicating whether the detection is an approximate match.
120  bool disable_approx_match = 3;
121
122  // Levenshtein distance threshold.
123  // Applicable only if regex_mode is False.
124  optional int32 max_edit_distance = 4;
125
126  // Indicating whether the detection result should only contain the specified
127  // words.
128  bool specified_words_only = 5;
129}
130
131// Detection type for custom icon detection.
132message CustomIconDetectionRequest {
133  // Required. Required field that represents an icon in PNG format.
134  bytes icon_png = 1 [(google.api.field_behavior) = REQUIRED];
135
136  // Set match_count to -1 to not limit the number of matches.
137  int32 match_count = 2;
138
139  // Confidence threshold in the range [0.0, 1.0] below which the matches will
140  // be considered as non-existent.
141  double min_confidence_threshold = 3;
142}
143
144// Response message for UI detection.
145message UiDetectionResponse {
146  // Locations of matching UI elements.
147  repeated BoundingBox bounding_boxes = 1;
148
149  // The transformed detection image PNG, if requested and transformations were
150  // applied.
151  bytes transformed_image_png = 2;
152
153  // The amount the original image was scaled by to make the transformed image.
154  // 1.0 if the detection result is not based on a resized image.
155  float resizing_scale_factor = 3;
156}
157
158// The location of a UI element.
159// A bounding box is reprensented by its top-left point [left, top]
160// and its bottom-right point [right, bottom].
161message BoundingBox {
162  // The text found in the bounding box.
163  string text = 1;
164
165  // The y-coordinate of the top-left point.
166  int32 top = 2;
167
168  // The x-coordinate of the top-left point.
169  int32 left = 3;
170
171  // The y-coordinate of the bottom-right point.
172  int32 bottom = 4;
173
174  // The x-coordinate of the bottom-right point.
175  int32 right = 5;
176}
177