1// Copyright 2020 Google LLC 2// 3// Licensed under the Apache License, Version 2.0 (the "License"); 4// you may not use this file except in compliance with the License. 5// You may obtain a copy of the License at 6// 7// http://www.apache.org/licenses/LICENSE-2.0 8// 9// Unless required by applicable law or agreed to in writing, software 10// distributed under the License is distributed on an "AS IS" BASIS, 11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12// See the License for the specific language governing permissions and 13// limitations under the License. 14 15syntax = "proto3"; 16 17package google.cloud.automl.v1beta1; 18 19import "google/cloud/automl/v1beta1/geometry.proto"; 20import "google/cloud/automl/v1beta1/io.proto"; 21import "google/cloud/automl/v1beta1/temporal.proto"; 22import "google/cloud/automl/v1beta1/text_segment.proto"; 23import "google/protobuf/struct.proto"; 24 25option go_package = "cloud.google.com/go/automl/apiv1beta1/automlpb;automlpb"; 26option java_multiple_files = true; 27option java_package = "com.google.cloud.automl.v1beta1"; 28option php_namespace = "Google\\Cloud\\AutoMl\\V1beta1"; 29option ruby_package = "Google::Cloud::AutoML::V1beta1"; 30 31// A representation of an image. 32// Only images up to 30MB in size are supported. 33message Image { 34 // Input only. The data representing the image. 35 // For Predict calls [image_bytes][google.cloud.automl.v1beta1.Image.image_bytes] must be set, as other options are not 36 // currently supported by prediction API. You can read the contents of an 37 // uploaded image by using the [content_uri][google.cloud.automl.v1beta1.Image.content_uri] field. 38 oneof data { 39 // Image content represented as a stream of bytes. 40 // Note: As with all `bytes` fields, protobuffers use a pure binary 41 // representation, whereas JSON representations use base64. 42 bytes image_bytes = 1; 43 44 // An input config specifying the content of the image. 45 InputConfig input_config = 6; 46 } 47 48 // Output only. HTTP URI to the thumbnail image. 49 string thumbnail_uri = 4; 50} 51 52// A representation of a text snippet. 53message TextSnippet { 54 // Required. The content of the text snippet as a string. Up to 250000 55 // characters long. 56 string content = 1; 57 58 // Optional. The format of [content][google.cloud.automl.v1beta1.TextSnippet.content]. Currently the only two allowed 59 // values are "text/html" and "text/plain". If left blank, the format is 60 // automatically determined from the type of the uploaded [content][google.cloud.automl.v1beta1.TextSnippet.content]. 61 string mime_type = 2; 62 63 // Output only. HTTP URI where you can download the content. 64 string content_uri = 4; 65} 66 67// Message that describes dimension of a document. 68message DocumentDimensions { 69 // Unit of the document dimension. 70 enum DocumentDimensionUnit { 71 // Should not be used. 72 DOCUMENT_DIMENSION_UNIT_UNSPECIFIED = 0; 73 74 // Document dimension is measured in inches. 75 INCH = 1; 76 77 // Document dimension is measured in centimeters. 78 CENTIMETER = 2; 79 80 // Document dimension is measured in points. 72 points = 1 inch. 81 POINT = 3; 82 } 83 84 // Unit of the dimension. 85 DocumentDimensionUnit unit = 1; 86 87 // Width value of the document, works together with the unit. 88 float width = 2; 89 90 // Height value of the document, works together with the unit. 91 float height = 3; 92} 93 94// A structured text document e.g. a PDF. 95message Document { 96 // Describes the layout information of a [text_segment][google.cloud.automl.v1beta1.Document.Layout.text_segment] in the document. 97 message Layout { 98 // The type of TextSegment in the context of the original document. 99 enum TextSegmentType { 100 // Should not be used. 101 TEXT_SEGMENT_TYPE_UNSPECIFIED = 0; 102 103 // The text segment is a token. e.g. word. 104 TOKEN = 1; 105 106 // The text segment is a paragraph. 107 PARAGRAPH = 2; 108 109 // The text segment is a form field. 110 FORM_FIELD = 3; 111 112 // The text segment is the name part of a form field. It will be treated 113 // as child of another FORM_FIELD TextSegment if its span is subspan of 114 // another TextSegment with type FORM_FIELD. 115 FORM_FIELD_NAME = 4; 116 117 // The text segment is the text content part of a form field. It will be 118 // treated as child of another FORM_FIELD TextSegment if its span is 119 // subspan of another TextSegment with type FORM_FIELD. 120 FORM_FIELD_CONTENTS = 5; 121 122 // The text segment is a whole table, including headers, and all rows. 123 TABLE = 6; 124 125 // The text segment is a table's headers. It will be treated as child of 126 // another TABLE TextSegment if its span is subspan of another TextSegment 127 // with type TABLE. 128 TABLE_HEADER = 7; 129 130 // The text segment is a row in table. It will be treated as child of 131 // another TABLE TextSegment if its span is subspan of another TextSegment 132 // with type TABLE. 133 TABLE_ROW = 8; 134 135 // The text segment is a cell in table. It will be treated as child of 136 // another TABLE_ROW TextSegment if its span is subspan of another 137 // TextSegment with type TABLE_ROW. 138 TABLE_CELL = 9; 139 } 140 141 // Text Segment that represents a segment in 142 // [document_text][google.cloud.automl.v1beta1.Document.document_text]. 143 TextSegment text_segment = 1; 144 145 // Page number of the [text_segment][google.cloud.automl.v1beta1.Document.Layout.text_segment] in the original document, starts 146 // from 1. 147 int32 page_number = 2; 148 149 // The position of the [text_segment][google.cloud.automl.v1beta1.Document.Layout.text_segment] in the page. 150 // Contains exactly 4 151 // 152 // [normalized_vertices][google.cloud.automl.v1beta1.BoundingPoly.normalized_vertices] 153 // and they are connected by edges in the order provided, which will 154 // represent a rectangle parallel to the frame. The 155 // [NormalizedVertex-s][google.cloud.automl.v1beta1.NormalizedVertex] are 156 // relative to the page. 157 // Coordinates are based on top-left as point (0,0). 158 BoundingPoly bounding_poly = 3; 159 160 // The type of the [text_segment][google.cloud.automl.v1beta1.Document.Layout.text_segment] in document. 161 TextSegmentType text_segment_type = 4; 162 } 163 164 // An input config specifying the content of the document. 165 DocumentInputConfig input_config = 1; 166 167 // The plain text version of this document. 168 TextSnippet document_text = 2; 169 170 // Describes the layout of the document. 171 // Sorted by [page_number][]. 172 repeated Layout layout = 3; 173 174 // The dimensions of the page in the document. 175 DocumentDimensions document_dimensions = 4; 176 177 // Number of pages in the document. 178 int32 page_count = 5; 179} 180 181// A representation of a row in a relational table. 182message Row { 183 // The resource IDs of the column specs describing the columns of the row. 184 // If set must contain, but possibly in a different order, all input 185 // feature 186 // 187 // [column_spec_ids][google.cloud.automl.v1beta1.TablesModelMetadata.input_feature_column_specs] 188 // of the Model this row is being passed to. 189 // Note: The below `values` field must match order of this field, if this 190 // field is set. 191 repeated string column_spec_ids = 2; 192 193 // Required. The values of the row cells, given in the same order as the 194 // column_spec_ids, or, if not set, then in the same order as input 195 // feature 196 // 197 // [column_specs][google.cloud.automl.v1beta1.TablesModelMetadata.input_feature_column_specs] 198 // of the Model this row is being passed to. 199 repeated google.protobuf.Value values = 3; 200} 201 202// Example data used for training or prediction. 203message ExamplePayload { 204 // Required. Input only. The example data. 205 oneof payload { 206 // Example image. 207 Image image = 1; 208 209 // Example text. 210 TextSnippet text_snippet = 2; 211 212 // Example document. 213 Document document = 4; 214 215 // Example relational table row. 216 Row row = 3; 217 } 218} 219