1// Copyright 2023 Google LLC 2// 3// Licensed under the Apache License, Version 2.0 (the "License"); 4// you may not use this file except in compliance with the License. 5// You may obtain a copy of the License at 6// 7// http://www.apache.org/licenses/LICENSE-2.0 8// 9// Unless required by applicable law or agreed to in writing, software 10// distributed under the License is distributed on an "AS IS" BASIS, 11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12// See the License for the specific language governing permissions and 13// limitations under the License. 14 15syntax = "proto3"; 16 17package google.chromeos.uidetection.v1; 18 19import "google/api/annotations.proto"; 20import "google/api/client.proto"; 21import "google/api/field_behavior.proto"; 22 23option go_package = "google.golang.org/genproto/googleapis/chromeos/uidetection/v1;uidetection"; 24option java_multiple_files = true; 25option java_outer_classname = "UiDetectionProto"; 26option java_package = "com.google.chromeos.uidetection.v1"; 27 28// Provides image-based UI detection service. 29service UiDetectionService { 30 option (google.api.default_host) = "chromeosuidetection.googleapis.com"; 31 32 // Runs the detection. 33 rpc ExecuteDetection(UiDetectionRequest) returns (UiDetectionResponse) { 34 option (google.api.http) = { 35 get: "/v1/executeDetection:execute" 36 }; 37 } 38} 39 40// Request message for UI detection. 41message UiDetectionRequest { 42 // Required. Required field that represents a PNG image. 43 bytes image_png = 1 [(google.api.field_behavior) = REQUIRED]; 44 45 // Required. Required field that indicates the detection type. 46 DetectionRequest request = 2 [(google.api.field_behavior) = REQUIRED]; 47 48 // Indicates whether to fall back to resizing the image if no elements are 49 // detected. 50 optional bool resize_image = 3; 51 52 // Deprecated as of 2023-03-29. Use test_metadata instead. 53 string test_id = 4 [deprecated = true]; 54 55 // Optional. Metadata about the client for analytics. 56 TestMetadata test_metadata = 5 [(google.api.field_behavior) = OPTIONAL]; 57 58 // Optional. Indicates whether to always start by resizing the image. 59 bool force_image_resizing = 6 [(google.api.field_behavior) = OPTIONAL]; 60 61 // Optional. Indicates whether to respond with the transformed image png. 62 bool return_transformed_image = 7 [(google.api.field_behavior) = OPTIONAL]; 63} 64 65// Detection type specifies what to detect in the image. 66message DetectionRequest { 67 oneof detection_request_type { 68 // Detection type for word detection. 69 WordDetectionRequest word_detection_request = 1; 70 71 // Detection type for text block detection. 72 TextBlockDetectionRequest text_block_detection_request = 2; 73 74 // Detection type for custom icon detection. 75 CustomIconDetectionRequest custom_icon_detection_request = 3; 76 } 77} 78 79// Metadata about the client test and test device. 80message TestMetadata { 81 // Name of the calling test. For example, 'tast.uidetection.BasicDetections'. 82 string test_id = 1; 83 84 // Board name of the ChromeOS device under test. For example, 'volteer'. 85 string board = 2; 86 87 // Model name of the ChromeOS device under test. For example, 'volet'. 88 string model = 3; 89 90 // ChromeOS build of the device under test. 91 // For example, 'volteer-release/R110-15275.0.0-75031-8794956681263330561'. 92 string cros_build = 4; 93} 94 95// Detection type for word detection. 96message WordDetectionRequest { 97 // Required. The word to locate in the image. 98 string word = 1 [(google.api.field_behavior) = REQUIRED]; 99 100 // Indicating whether the query string is a regex or not. 101 bool regex_mode = 2; 102 103 // Indicating whether the detection is an approximate match. 104 bool disable_approx_match = 3; 105 106 // Levenshtein distance threshold. 107 // Applicable only if regex_mode is False. 108 optional int32 max_edit_distance = 4; 109} 110 111// Detection type for text block detection. 112message TextBlockDetectionRequest { 113 // Required. The text block consisting a list of words to locate in the image. 114 repeated string words = 1 [(google.api.field_behavior) = REQUIRED]; 115 116 // Indicating whether the query string is a regex or not. 117 bool regex_mode = 2; 118 119 // Indicating whether the detection is an approximate match. 120 bool disable_approx_match = 3; 121 122 // Levenshtein distance threshold. 123 // Applicable only if regex_mode is False. 124 optional int32 max_edit_distance = 4; 125 126 // Indicating whether the detection result should only contain the specified 127 // words. 128 bool specified_words_only = 5; 129} 130 131// Detection type for custom icon detection. 132message CustomIconDetectionRequest { 133 // Required. Required field that represents an icon in PNG format. 134 bytes icon_png = 1 [(google.api.field_behavior) = REQUIRED]; 135 136 // Set match_count to -1 to not limit the number of matches. 137 int32 match_count = 2; 138 139 // Confidence threshold in the range [0.0, 1.0] below which the matches will 140 // be considered as non-existent. 141 double min_confidence_threshold = 3; 142} 143 144// Response message for UI detection. 145message UiDetectionResponse { 146 // Locations of matching UI elements. 147 repeated BoundingBox bounding_boxes = 1; 148 149 // The transformed detection image PNG, if requested and transformations were 150 // applied. 151 bytes transformed_image_png = 2; 152 153 // The amount the original image was scaled by to make the transformed image. 154 // 1.0 if the detection result is not based on a resized image. 155 float resizing_scale_factor = 3; 156} 157 158// The location of a UI element. 159// A bounding box is reprensented by its top-left point [left, top] 160// and its bottom-right point [right, bottom]. 161message BoundingBox { 162 // The text found in the bounding box. 163 string text = 1; 164 165 // The y-coordinate of the top-left point. 166 int32 top = 2; 167 168 // The x-coordinate of the top-left point. 169 int32 left = 3; 170 171 // The y-coordinate of the bottom-right point. 172 int32 bottom = 4; 173 174 // The x-coordinate of the bottom-right point. 175 int32 right = 5; 176} 177