1// Copyright 2020 Google LLC
2//
3// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at
6//
7//     http://www.apache.org/licenses/LICENSE-2.0
8//
9// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS,
11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12// See the License for the specific language governing permissions and
13// limitations under the License.
14
15syntax = "proto3";
16
17package google.cloud.automl.v1beta1;
18
19import "google/cloud/automl/v1beta1/geometry.proto";
20import "google/cloud/automl/v1beta1/io.proto";
21import "google/cloud/automl/v1beta1/temporal.proto";
22import "google/cloud/automl/v1beta1/text_segment.proto";
23import "google/protobuf/struct.proto";
24
25option go_package = "cloud.google.com/go/automl/apiv1beta1/automlpb;automlpb";
26option java_multiple_files = true;
27option java_package = "com.google.cloud.automl.v1beta1";
28option php_namespace = "Google\\Cloud\\AutoMl\\V1beta1";
29option ruby_package = "Google::Cloud::AutoML::V1beta1";
30
31// A representation of an image.
32// Only images up to 30MB in size are supported.
33message Image {
34  // Input only. The data representing the image.
35  // For Predict calls [image_bytes][google.cloud.automl.v1beta1.Image.image_bytes] must be set, as other options are not
36  // currently supported by prediction API. You can read the contents of an
37  // uploaded image by using the [content_uri][google.cloud.automl.v1beta1.Image.content_uri] field.
38  oneof data {
39    // Image content represented as a stream of bytes.
40    // Note: As with all `bytes` fields, protobuffers use a pure binary
41    // representation, whereas JSON representations use base64.
42    bytes image_bytes = 1;
43
44    // An input config specifying the content of the image.
45    InputConfig input_config = 6;
46  }
47
48  // Output only. HTTP URI to the thumbnail image.
49  string thumbnail_uri = 4;
50}
51
52// A representation of a text snippet.
53message TextSnippet {
54  // Required. The content of the text snippet as a string. Up to 250000
55  // characters long.
56  string content = 1;
57
58  // Optional. The format of [content][google.cloud.automl.v1beta1.TextSnippet.content]. Currently the only two allowed
59  // values are "text/html" and "text/plain". If left blank, the format is
60  // automatically determined from the type of the uploaded [content][google.cloud.automl.v1beta1.TextSnippet.content].
61  string mime_type = 2;
62
63  // Output only. HTTP URI where you can download the content.
64  string content_uri = 4;
65}
66
67// Message that describes dimension of a document.
68message DocumentDimensions {
69  // Unit of the document dimension.
70  enum DocumentDimensionUnit {
71    // Should not be used.
72    DOCUMENT_DIMENSION_UNIT_UNSPECIFIED = 0;
73
74    // Document dimension is measured in inches.
75    INCH = 1;
76
77    // Document dimension is measured in centimeters.
78    CENTIMETER = 2;
79
80    // Document dimension is measured in points. 72 points = 1 inch.
81    POINT = 3;
82  }
83
84  // Unit of the dimension.
85  DocumentDimensionUnit unit = 1;
86
87  // Width value of the document, works together with the unit.
88  float width = 2;
89
90  // Height value of the document, works together with the unit.
91  float height = 3;
92}
93
94// A structured text document e.g. a PDF.
95message Document {
96  // Describes the layout information of a [text_segment][google.cloud.automl.v1beta1.Document.Layout.text_segment] in the document.
97  message Layout {
98    // The type of TextSegment in the context of the original document.
99    enum TextSegmentType {
100      // Should not be used.
101      TEXT_SEGMENT_TYPE_UNSPECIFIED = 0;
102
103      // The text segment is a token. e.g. word.
104      TOKEN = 1;
105
106      // The text segment is a paragraph.
107      PARAGRAPH = 2;
108
109      // The text segment is a form field.
110      FORM_FIELD = 3;
111
112      // The text segment is the name part of a form field. It will be treated
113      // as child of another FORM_FIELD TextSegment if its span is subspan of
114      // another TextSegment with type FORM_FIELD.
115      FORM_FIELD_NAME = 4;
116
117      // The text segment is the text content part of a form field. It will be
118      // treated as child of another FORM_FIELD TextSegment if its span is
119      // subspan of another TextSegment with type FORM_FIELD.
120      FORM_FIELD_CONTENTS = 5;
121
122      // The text segment is a whole table, including headers, and all rows.
123      TABLE = 6;
124
125      // The text segment is a table's headers. It will be treated as child of
126      // another TABLE TextSegment if its span is subspan of another TextSegment
127      // with type TABLE.
128      TABLE_HEADER = 7;
129
130      // The text segment is a row in table. It will be treated as child of
131      // another TABLE TextSegment if its span is subspan of another TextSegment
132      // with type TABLE.
133      TABLE_ROW = 8;
134
135      // The text segment is a cell in table. It will be treated as child of
136      // another TABLE_ROW TextSegment if its span is subspan of another
137      // TextSegment with type TABLE_ROW.
138      TABLE_CELL = 9;
139    }
140
141    // Text Segment that represents a segment in
142    // [document_text][google.cloud.automl.v1beta1.Document.document_text].
143    TextSegment text_segment = 1;
144
145    // Page number of the [text_segment][google.cloud.automl.v1beta1.Document.Layout.text_segment] in the original document, starts
146    // from 1.
147    int32 page_number = 2;
148
149    // The position of the [text_segment][google.cloud.automl.v1beta1.Document.Layout.text_segment] in the page.
150    // Contains exactly 4
151    //
152    // [normalized_vertices][google.cloud.automl.v1beta1.BoundingPoly.normalized_vertices]
153    // and they are connected by edges in the order provided, which will
154    // represent a rectangle parallel to the frame. The
155    // [NormalizedVertex-s][google.cloud.automl.v1beta1.NormalizedVertex] are
156    // relative to the page.
157    // Coordinates are based on top-left as point (0,0).
158    BoundingPoly bounding_poly = 3;
159
160    // The type of the [text_segment][google.cloud.automl.v1beta1.Document.Layout.text_segment] in document.
161    TextSegmentType text_segment_type = 4;
162  }
163
164  // An input config specifying the content of the document.
165  DocumentInputConfig input_config = 1;
166
167  // The plain text version of this document.
168  TextSnippet document_text = 2;
169
170  // Describes the layout of the document.
171  // Sorted by [page_number][].
172  repeated Layout layout = 3;
173
174  // The dimensions of the page in the document.
175  DocumentDimensions document_dimensions = 4;
176
177  // Number of pages in the document.
178  int32 page_count = 5;
179}
180
181// A representation of a row in a relational table.
182message Row {
183  // The resource IDs of the column specs describing the columns of the row.
184  // If set must contain, but possibly in a different order, all input
185  // feature
186  //
187  // [column_spec_ids][google.cloud.automl.v1beta1.TablesModelMetadata.input_feature_column_specs]
188  // of the Model this row is being passed to.
189  // Note: The below `values` field must match order of this field, if this
190  // field is set.
191  repeated string column_spec_ids = 2;
192
193  // Required. The values of the row cells, given in the same order as the
194  // column_spec_ids, or, if not set, then in the same order as input
195  // feature
196  //
197  // [column_specs][google.cloud.automl.v1beta1.TablesModelMetadata.input_feature_column_specs]
198  // of the Model this row is being passed to.
199  repeated google.protobuf.Value values = 3;
200}
201
202// Example data used for training or prediction.
203message ExamplePayload {
204  // Required. Input only. The example data.
205  oneof payload {
206    // Example image.
207    Image image = 1;
208
209    // Example text.
210    TextSnippet text_snippet = 2;
211
212    // Example document.
213    Document document = 4;
214
215    // Example relational table row.
216    Row row = 3;
217  }
218}
219