1// Copyright 2021 Google LLC
2//
3// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at
6//
7//     http://www.apache.org/licenses/LICENSE-2.0
8//
9// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS,
11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12// See the License for the specific language governing permissions and
13// limitations under the License.
14
15syntax = "proto3";
16
17package google.cloud.automl.v1;
18
19import "google/api/field_behavior.proto";
20
21option csharp_namespace = "Google.Cloud.AutoML.V1";
22option go_package = "cloud.google.com/go/automl/apiv1/automlpb;automlpb";
23option java_multiple_files = true;
24option java_package = "com.google.cloud.automl.v1";
25option php_namespace = "Google\\Cloud\\AutoMl\\V1";
26option ruby_package = "Google::Cloud::AutoML::V1";
27
28// Input configuration for [AutoMl.ImportData][google.cloud.automl.v1.AutoMl.ImportData] action.
29//
30// The format of input depends on dataset_metadata the Dataset into which
31// the import is happening has. As input source the
32// [gcs_source][google.cloud.automl.v1.InputConfig.gcs_source]
33// is expected, unless specified otherwise. Additionally any input .CSV file
34// by itself must be 100MB or smaller, unless specified otherwise.
35// If an "example" file (that is, image, video etc.) with identical content
36// (even if it had different `GCS_FILE_PATH`) is mentioned multiple times, then
37// its label, bounding boxes etc. are appended. The same file should be always
38// provided with the same `ML_USE` and `GCS_FILE_PATH`, if it is not, then
39// these values are nondeterministically selected from the given ones.
40//
41// The formats are represented in EBNF with commas being literal and with
42// non-terminal symbols defined near the end of this comment. The formats are:
43//
44// <h4>AutoML Vision</h4>
45//
46//
47// <div class="ds-selector-tabs"><section><h5>Classification</h5>
48//
49// See [Preparing your training
50// data](https://cloud.google.com/vision/automl/docs/prepare) for more
51// information.
52//
53// CSV file(s) with each line in format:
54//
55//     ML_USE,GCS_FILE_PATH,LABEL,LABEL,...
56//
57// *   `ML_USE` - Identifies the data set that the current row (file) applies
58// to.
59//     This value can be one of the following:
60//     * `TRAIN` - Rows in this file are used to train the model.
61//     * `TEST` - Rows in this file are used to test the model during training.
62//     * `UNASSIGNED` - Rows in this file are not categorized. They are
63//        Automatically divided into train and test data. 80% for training and
64//        20% for testing.
65//
66// *   `GCS_FILE_PATH` - The Google Cloud Storage location of an image of up to
67//      30MB in size. Supported extensions: .JPEG, .GIF, .PNG, .WEBP, .BMP,
68//      .TIFF, .ICO.
69//
70// *   `LABEL` - A label that identifies the object in the image.
71//
72// For the `MULTICLASS` classification type, at most one `LABEL` is allowed
73// per image. If an image has not yet been labeled, then it should be
74// mentioned just once with no `LABEL`.
75//
76// Some sample rows:
77//
78//     TRAIN,gs://folder/image1.jpg,daisy
79//     TEST,gs://folder/image2.jpg,dandelion,tulip,rose
80//     UNASSIGNED,gs://folder/image3.jpg,daisy
81//     UNASSIGNED,gs://folder/image4.jpg
82//
83//
84// </section><section><h5>Object Detection</h5>
85// See [Preparing your training
86// data](https://cloud.google.com/vision/automl/object-detection/docs/prepare)
87// for more information.
88//
89// A CSV file(s) with each line in format:
90//
91//     ML_USE,GCS_FILE_PATH,[LABEL],(BOUNDING_BOX | ,,,,,,,)
92//
93// *   `ML_USE` - Identifies the data set that the current row (file) applies
94// to.
95//     This value can be one of the following:
96//     * `TRAIN` - Rows in this file are used to train the model.
97//     * `TEST` - Rows in this file are used to test the model during training.
98//     * `UNASSIGNED` - Rows in this file are not categorized. They are
99//        Automatically divided into train and test data. 80% for training and
100//        20% for testing.
101//
102// *  `GCS_FILE_PATH` - The Google Cloud Storage location of an image of up to
103//     30MB in size. Supported extensions: .JPEG, .GIF, .PNG. Each image
104//     is assumed to be exhaustively labeled.
105//
106// *  `LABEL` - A label that identifies the object in the image specified by the
107//    `BOUNDING_BOX`.
108//
109// *  `BOUNDING BOX` - The vertices of an object in the example image.
110//    The minimum allowed `BOUNDING_BOX` edge length is 0.01, and no more than
111//    500 `BOUNDING_BOX` instances per image are allowed (one `BOUNDING_BOX`
112//    per line). If an image has no looked for objects then it should be
113//    mentioned just once with no LABEL and the ",,,,,,," in place of the
114//   `BOUNDING_BOX`.
115//
116// **Four sample rows:**
117//
118//     TRAIN,gs://folder/image1.png,car,0.1,0.1,,,0.3,0.3,,
119//     TRAIN,gs://folder/image1.png,bike,.7,.6,,,.8,.9,,
120//     UNASSIGNED,gs://folder/im2.png,car,0.1,0.1,0.2,0.1,0.2,0.3,0.1,0.3
121//     TEST,gs://folder/im3.png,,,,,,,,,
122//   </section>
123// </div>
124//
125//
126// <h4>AutoML Video Intelligence</h4>
127//
128//
129// <div class="ds-selector-tabs"><section><h5>Classification</h5>
130//
131// See [Preparing your training
132// data](https://cloud.google.com/video-intelligence/automl/docs/prepare) for
133// more information.
134//
135// CSV file(s) with each line in format:
136//
137//     ML_USE,GCS_FILE_PATH
138//
139// For `ML_USE`, do not use `VALIDATE`.
140//
141// `GCS_FILE_PATH` is the path to another .csv file that describes training
142// example for a given `ML_USE`, using the following row format:
143//
144//     GCS_FILE_PATH,(LABEL,TIME_SEGMENT_START,TIME_SEGMENT_END | ,,)
145//
146// Here `GCS_FILE_PATH` leads to a video of up to 50GB in size and up
147// to 3h duration. Supported extensions: .MOV, .MPEG4, .MP4, .AVI.
148//
149// `TIME_SEGMENT_START` and `TIME_SEGMENT_END` must be within the
150// length of the video, and the end time must be after the start time. Any
151// segment of a video which has one or more labels on it, is considered a
152// hard negative for all other labels. Any segment with no labels on
153// it is considered to be unknown. If a whole video is unknown, then
154// it should be mentioned just once with ",," in place of `LABEL,
155// TIME_SEGMENT_START,TIME_SEGMENT_END`.
156//
157// Sample top level CSV file:
158//
159//     TRAIN,gs://folder/train_videos.csv
160//     TEST,gs://folder/test_videos.csv
161//     UNASSIGNED,gs://folder/other_videos.csv
162//
163// Sample rows of a CSV file for a particular ML_USE:
164//
165//     gs://folder/video1.avi,car,120,180.000021
166//     gs://folder/video1.avi,bike,150,180.000021
167//     gs://folder/vid2.avi,car,0,60.5
168//     gs://folder/vid3.avi,,,
169//
170//
171//
172// </section><section><h5>Object Tracking</h5>
173//
174// See [Preparing your training
175// data](/video-intelligence/automl/object-tracking/docs/prepare) for more
176// information.
177//
178// CSV file(s) with each line in format:
179//
180//     ML_USE,GCS_FILE_PATH
181//
182// For `ML_USE`, do not use `VALIDATE`.
183//
184// `GCS_FILE_PATH` is the path to another .csv file that describes training
185// example for a given `ML_USE`, using the following row format:
186//
187//     GCS_FILE_PATH,LABEL,[INSTANCE_ID],TIMESTAMP,BOUNDING_BOX
188//
189// or
190//
191//     GCS_FILE_PATH,,,,,,,,,,
192//
193// Here `GCS_FILE_PATH` leads to a video of up to 50GB in size and up
194// to 3h duration. Supported extensions: .MOV, .MPEG4, .MP4, .AVI.
195// Providing `INSTANCE_ID`s can help to obtain a better model. When
196// a specific labeled entity leaves the video frame, and shows up
197// afterwards it is not required, albeit preferable, that the same
198// `INSTANCE_ID` is given to it.
199//
200// `TIMESTAMP` must be within the length of the video, the
201// `BOUNDING_BOX` is assumed to be drawn on the closest video's frame
202// to the `TIMESTAMP`. Any mentioned by the `TIMESTAMP` frame is expected
203// to be exhaustively labeled and no more than 500 `BOUNDING_BOX`-es per
204// frame are allowed. If a whole video is unknown, then it should be
205// mentioned just once with ",,,,,,,,,," in place of `LABEL,
206// [INSTANCE_ID],TIMESTAMP,BOUNDING_BOX`.
207//
208// Sample top level CSV file:
209//
210//      TRAIN,gs://folder/train_videos.csv
211//      TEST,gs://folder/test_videos.csv
212//      UNASSIGNED,gs://folder/other_videos.csv
213//
214// Seven sample rows of a CSV file for a particular ML_USE:
215//
216//      gs://folder/video1.avi,car,1,12.10,0.8,0.8,0.9,0.8,0.9,0.9,0.8,0.9
217//      gs://folder/video1.avi,car,1,12.90,0.4,0.8,0.5,0.8,0.5,0.9,0.4,0.9
218//      gs://folder/video1.avi,car,2,12.10,.4,.2,.5,.2,.5,.3,.4,.3
219//      gs://folder/video1.avi,car,2,12.90,.8,.2,,,.9,.3,,
220//      gs://folder/video1.avi,bike,,12.50,.45,.45,,,.55,.55,,
221//      gs://folder/video2.avi,car,1,0,.1,.9,,,.9,.1,,
222//      gs://folder/video2.avi,,,,,,,,,,,
223//   </section>
224// </div>
225//
226//
227// <h4>AutoML Natural Language</h4>
228//
229//
230// <div class="ds-selector-tabs"><section><h5>Entity Extraction</h5>
231//
232// See [Preparing your training
233// data](/natural-language/automl/entity-analysis/docs/prepare) for more
234// information.
235//
236// One or more CSV file(s) with each line in the following format:
237//
238//     ML_USE,GCS_FILE_PATH
239//
240// *   `ML_USE` - Identifies the data set that the current row (file) applies
241// to.
242//     This value can be one of the following:
243//     * `TRAIN` - Rows in this file are used to train the model.
244//     * `TEST` - Rows in this file are used to test the model during training.
245//     * `UNASSIGNED` - Rows in this file are not categorized. They are
246//        Automatically divided into train and test data. 80% for training and
247//        20% for testing..
248//
249// *   `GCS_FILE_PATH` - a Identifies JSON Lines (.JSONL) file stored in
250//      Google Cloud Storage that contains in-line text in-line as documents
251//      for model training.
252//
253// After the training data set has been determined from the `TRAIN` and
254// `UNASSIGNED` CSV files, the training data is divided into train and
255// validation data sets. 70% for training and 30% for validation.
256//
257// For example:
258//
259//     TRAIN,gs://folder/file1.jsonl
260//     VALIDATE,gs://folder/file2.jsonl
261//     TEST,gs://folder/file3.jsonl
262//
263// **In-line JSONL files**
264//
265// In-line .JSONL files contain, per line, a JSON document that wraps a
266// [`text_snippet`][google.cloud.automl.v1.TextSnippet] field followed by
267// one or more [`annotations`][google.cloud.automl.v1.AnnotationPayload]
268// fields, which have `display_name` and `text_extraction` fields to describe
269// the entity from the text snippet. Multiple JSON documents can be separated
270// using line breaks (\n).
271//
272// The supplied text must be annotated exhaustively. For example, if you
273// include the text "horse", but do not label it as "animal",
274// then "horse" is assumed to not be an "animal".
275//
276// Any given text snippet content must have 30,000 characters or
277// less, and also be UTF-8 NFC encoded. ASCII is accepted as it is
278// UTF-8 NFC encoded.
279//
280// For example:
281//
282//     {
283//       "text_snippet": {
284//         "content": "dog car cat"
285//       },
286//       "annotations": [
287//          {
288//            "display_name": "animal",
289//            "text_extraction": {
290//              "text_segment": {"start_offset": 0, "end_offset": 2}
291//           }
292//          },
293//          {
294//           "display_name": "vehicle",
295//            "text_extraction": {
296//              "text_segment": {"start_offset": 4, "end_offset": 6}
297//            }
298//          },
299//          {
300//            "display_name": "animal",
301//            "text_extraction": {
302//              "text_segment": {"start_offset": 8, "end_offset": 10}
303//            }
304//          }
305//      ]
306//     }\n
307//     {
308//        "text_snippet": {
309//          "content": "This dog is good."
310//        },
311//        "annotations": [
312//           {
313//             "display_name": "animal",
314//             "text_extraction": {
315//               "text_segment": {"start_offset": 5, "end_offset": 7}
316//             }
317//           }
318//        ]
319//     }
320//
321// **JSONL files that reference documents**
322//
323// .JSONL files contain, per line, a JSON document that wraps a
324// `input_config` that contains the path to a source document.
325// Multiple JSON documents can be separated using line breaks (\n).
326//
327// Supported document extensions: .PDF, .TIF, .TIFF
328//
329// For example:
330//
331//     {
332//       "document": {
333//         "input_config": {
334//           "gcs_source": { "input_uris": [ "gs://folder/document1.pdf" ]
335//           }
336//         }
337//       }
338//     }\n
339//     {
340//       "document": {
341//         "input_config": {
342//           "gcs_source": { "input_uris": [ "gs://folder/document2.tif" ]
343//           }
344//         }
345//       }
346//     }
347//
348// **In-line JSONL files with document layout information**
349//
350// **Note:** You can only annotate documents using the UI. The format described
351// below applies to annotated documents exported using the UI or `exportData`.
352//
353// In-line .JSONL files for documents contain, per line, a JSON document
354// that wraps a `document` field that provides the textual content of the
355// document and the layout information.
356//
357// For example:
358//
359//     {
360//       "document": {
361//               "document_text": {
362//                 "content": "dog car cat"
363//               }
364//               "layout": [
365//                 {
366//                   "text_segment": {
367//                     "start_offset": 0,
368//                     "end_offset": 11,
369//                    },
370//                    "page_number": 1,
371//                    "bounding_poly": {
372//                       "normalized_vertices": [
373//                         {"x": 0.1, "y": 0.1},
374//                         {"x": 0.1, "y": 0.3},
375//                         {"x": 0.3, "y": 0.3},
376//                         {"x": 0.3, "y": 0.1},
377//                       ],
378//                     },
379//                     "text_segment_type": TOKEN,
380//                 }
381//               ],
382//               "document_dimensions": {
383//                 "width": 8.27,
384//                 "height": 11.69,
385//                 "unit": INCH,
386//               }
387//               "page_count": 3,
388//             },
389//             "annotations": [
390//               {
391//                 "display_name": "animal",
392//                 "text_extraction": {
393//                   "text_segment": {"start_offset": 0, "end_offset": 3}
394//                 }
395//               },
396//               {
397//                 "display_name": "vehicle",
398//                 "text_extraction": {
399//                   "text_segment": {"start_offset": 4, "end_offset": 7}
400//                 }
401//               },
402//               {
403//                 "display_name": "animal",
404//                 "text_extraction": {
405//                   "text_segment": {"start_offset": 8, "end_offset": 11}
406//                 }
407//               },
408//             ],
409//
410//
411//
412//
413// </section><section><h5>Classification</h5>
414//
415// See [Preparing your training
416// data](https://cloud.google.com/natural-language/automl/docs/prepare) for more
417// information.
418//
419// One or more CSV file(s) with each line in the following format:
420//
421//     ML_USE,(TEXT_SNIPPET | GCS_FILE_PATH),LABEL,LABEL,...
422//
423// *   `ML_USE` - Identifies the data set that the current row (file) applies
424// to.
425//     This value can be one of the following:
426//     * `TRAIN` - Rows in this file are used to train the model.
427//     * `TEST` - Rows in this file are used to test the model during training.
428//     * `UNASSIGNED` - Rows in this file are not categorized. They are
429//        Automatically divided into train and test data. 80% for training and
430//        20% for testing.
431//
432// *   `TEXT_SNIPPET` and `GCS_FILE_PATH` are distinguished by a pattern. If
433//     the column content is a valid Google Cloud Storage file path, that is,
434//     prefixed by "gs://", it is treated as a `GCS_FILE_PATH`. Otherwise, if
435//     the content is enclosed in double quotes (""), it is treated as a
436//     `TEXT_SNIPPET`. For `GCS_FILE_PATH`, the path must lead to a
437//     file with supported extension and UTF-8 encoding, for example,
438//     "gs://folder/content.txt" AutoML imports the file content
439//     as a text snippet. For `TEXT_SNIPPET`, AutoML imports the column content
440//     excluding quotes. In both cases, size of the content must be 10MB or
441//     less in size. For zip files, the size of each file inside the zip must be
442//     10MB or less in size.
443//
444//     For the `MULTICLASS` classification type, at most one `LABEL` is allowed.
445//
446//     The `ML_USE` and `LABEL` columns are optional.
447//     Supported file extensions: .TXT, .PDF, .TIF, .TIFF, .ZIP
448//
449// A maximum of 100 unique labels are allowed per CSV row.
450//
451// Sample rows:
452//
453//     TRAIN,"They have bad food and very rude",RudeService,BadFood
454//     gs://folder/content.txt,SlowService
455//     TEST,gs://folder/document.pdf
456//     VALIDATE,gs://folder/text_files.zip,BadFood
457//
458//
459//
460// </section><section><h5>Sentiment Analysis</h5>
461//
462// See [Preparing your training
463// data](https://cloud.google.com/natural-language/automl/docs/prepare) for more
464// information.
465//
466// CSV file(s) with each line in format:
467//
468//     ML_USE,(TEXT_SNIPPET | GCS_FILE_PATH),SENTIMENT
469//
470// *   `ML_USE` - Identifies the data set that the current row (file) applies
471// to.
472//     This value can be one of the following:
473//     * `TRAIN` - Rows in this file are used to train the model.
474//     * `TEST` - Rows in this file are used to test the model during training.
475//     * `UNASSIGNED` - Rows in this file are not categorized. They are
476//        Automatically divided into train and test data. 80% for training and
477//        20% for testing.
478//
479// *   `TEXT_SNIPPET` and `GCS_FILE_PATH` are distinguished by a pattern. If
480//     the column content is a valid  Google Cloud Storage file path, that is,
481//     prefixed by "gs://", it is treated as a `GCS_FILE_PATH`. Otherwise, if
482//     the content is enclosed in double quotes (""), it is treated as a
483//     `TEXT_SNIPPET`. For `GCS_FILE_PATH`, the path must lead to a
484//     file with supported extension and UTF-8 encoding, for example,
485//     "gs://folder/content.txt" AutoML imports the file content
486//     as a text snippet. For `TEXT_SNIPPET`, AutoML imports the column content
487//     excluding quotes. In both cases, size of the content must be 128kB or
488//     less in size. For zip files, the size of each file inside the zip must be
489//     128kB or less in size.
490//
491//     The `ML_USE` and `SENTIMENT` columns are optional.
492//     Supported file extensions: .TXT, .PDF, .TIF, .TIFF, .ZIP
493//
494// *  `SENTIMENT` - An integer between 0 and
495//     Dataset.text_sentiment_dataset_metadata.sentiment_max
496//     (inclusive). Describes the ordinal of the sentiment - higher
497//     value means a more positive sentiment. All the values are
498//     completely relative, i.e. neither 0 needs to mean a negative or
499//     neutral sentiment nor sentiment_max needs to mean a positive one -
500//     it is just required that 0 is the least positive sentiment
501//     in the data, and sentiment_max is the  most positive one.
502//     The SENTIMENT shouldn't be confused with "score" or "magnitude"
503//     from the previous Natural Language Sentiment Analysis API.
504//     All SENTIMENT values between 0 and sentiment_max must be
505//     represented in the imported data. On prediction the same 0 to
506//     sentiment_max range will be used. The difference between
507//     neighboring sentiment values needs not to be uniform, e.g. 1 and
508//     2 may be similar whereas the difference between 2 and 3 may be
509//     large.
510//
511// Sample rows:
512//
513//     TRAIN,"@freewrytin this is way too good for your product",2
514//     gs://folder/content.txt,3
515//     TEST,gs://folder/document.pdf
516//     VALIDATE,gs://folder/text_files.zip,2
517//   </section>
518// </div>
519//
520//
521//
522// <h4>AutoML Tables</h4><div class="ui-datasection-main"><section
523// class="selected">
524//
525// See [Preparing your training
526// data](https://cloud.google.com/automl-tables/docs/prepare) for more
527// information.
528//
529// You can use either
530// [gcs_source][google.cloud.automl.v1.InputConfig.gcs_source] or
531// [bigquery_source][google.cloud.automl.v1.InputConfig.bigquery_source].
532// All input is concatenated into a
533// single
534// [primary_table_spec_id][google.cloud.automl.v1.TablesDatasetMetadata.primary_table_spec_id]
535//
536// **For gcs_source:**
537//
538// CSV file(s), where the first row of the first file is the header,
539// containing unique column names. If the first row of a subsequent
540// file is the same as the header, then it is also treated as a
541// header. All other rows contain values for the corresponding
542// columns.
543//
544// Each .CSV file by itself must be 10GB or smaller, and their total
545// size must be 100GB or smaller.
546//
547// First three sample rows of a CSV file:
548// <pre>
549// "Id","First Name","Last Name","Dob","Addresses"
550// "1","John","Doe","1968-01-22","[{"status":"current","address":"123_First_Avenue","city":"Seattle","state":"WA","zip":"11111","numberOfYears":"1"},{"status":"previous","address":"456_Main_Street","city":"Portland","state":"OR","zip":"22222","numberOfYears":"5"}]"
551// "2","Jane","Doe","1980-10-16","[{"status":"current","address":"789_Any_Avenue","city":"Albany","state":"NY","zip":"33333","numberOfYears":"2"},{"status":"previous","address":"321_Main_Street","city":"Hoboken","state":"NJ","zip":"44444","numberOfYears":"3"}]}
552// </pre>
553// **For bigquery_source:**
554//
555// An URI of a BigQuery table. The user data size of the BigQuery
556// table must be 100GB or smaller.
557//
558// An imported table must have between 2 and 1,000 columns, inclusive,
559// and between 1000 and 100,000,000 rows, inclusive. There are at most 5
560// import data running in parallel.
561//
562//   </section>
563// </div>
564//
565//
566// **Input field definitions:**
567//
568// `ML_USE`
569// : ("TRAIN" | "VALIDATE" | "TEST" | "UNASSIGNED")
570//   Describes how the given example (file) should be used for model
571//   training. "UNASSIGNED" can be used when user has no preference.
572//
573// `GCS_FILE_PATH`
574// : The path to a file on Google Cloud Storage. For example,
575//   "gs://folder/image1.png".
576//
577// `LABEL`
578// : A display name of an object on an image, video etc., e.g. "dog".
579//   Must be up to 32 characters long and can consist only of ASCII
580//   Latin letters A-Z and a-z, underscores(_), and ASCII digits 0-9.
581//   For each label an AnnotationSpec is created which display_name
582//   becomes the label; AnnotationSpecs are given back in predictions.
583//
584// `INSTANCE_ID`
585// : A positive integer that identifies a specific instance of a
586//   labeled entity on an example. Used e.g. to track two cars on
587//   a video while being able to tell apart which one is which.
588//
589// `BOUNDING_BOX`
590// : (`VERTEX,VERTEX,VERTEX,VERTEX` | `VERTEX,,,VERTEX,,`)
591//   A rectangle parallel to the frame of the example (image,
592//   video). If 4 vertices are given they are connected by edges
593//   in the order provided, if 2 are given they are recognized
594//   as diagonally opposite vertices of the rectangle.
595//
596// `VERTEX`
597// : (`COORDINATE,COORDINATE`)
598//   First coordinate is horizontal (x), the second is vertical (y).
599//
600// `COORDINATE`
601// : A float in 0 to 1 range, relative to total length of
602//   image or video in given dimension. For fractions the
603//   leading non-decimal 0 can be omitted (i.e. 0.3 = .3).
604//   Point 0,0 is in top left.
605//
606// `TIME_SEGMENT_START`
607// : (`TIME_OFFSET`)
608//   Expresses a beginning, inclusive, of a time segment
609//   within an example that has a time dimension
610//   (e.g. video).
611//
612// `TIME_SEGMENT_END`
613// : (`TIME_OFFSET`)
614//   Expresses an end, exclusive, of a time segment within
615//   n example that has a time dimension (e.g. video).
616//
617// `TIME_OFFSET`
618// : A number of seconds as measured from the start of an
619//   example (e.g. video). Fractions are allowed, up to a
620//   microsecond precision. "inf" is allowed, and it means the end
621//   of the example.
622//
623// `TEXT_SNIPPET`
624// : The content of a text snippet, UTF-8 encoded, enclosed within
625//   double quotes ("").
626//
627// `DOCUMENT`
628// : A field that provides the textual content with document and the layout
629//   information.
630//
631//
632//  **Errors:**
633//
634//  If any of the provided CSV files can't be parsed or if more than certain
635//  percent of CSV rows cannot be processed then the operation fails and
636//  nothing is imported. Regardless of overall success or failure the per-row
637//  failures, up to a certain count cap, is listed in
638//  Operation.metadata.partial_failures.
639//
640message InputConfig {
641  // The source of the input.
642  oneof source {
643    // The Google Cloud Storage location for the input content.
644    // For [AutoMl.ImportData][google.cloud.automl.v1.AutoMl.ImportData], `gcs_source` points to a CSV file with
645    // a structure described in [InputConfig][google.cloud.automl.v1.InputConfig].
646    GcsSource gcs_source = 1;
647  }
648
649  // Additional domain-specific parameters describing the semantic of the
650  // imported data, any string must be up to 25000
651  // characters long.
652  //
653  // <h4>AutoML Tables</h4>
654  //
655  // `schema_inference_version`
656  // : (integer) This value must be supplied.
657  //   The version of the
658  //   algorithm to use for the initial inference of the
659  //   column data types of the imported table. Allowed values: "1".
660  map<string, string> params = 2;
661}
662
663// Input configuration for BatchPredict Action.
664//
665// The format of input depends on the ML problem of the model used for
666// prediction. As input source the
667// [gcs_source][google.cloud.automl.v1.InputConfig.gcs_source]
668// is expected, unless specified otherwise.
669//
670// The formats are represented in EBNF with commas being literal and with
671// non-terminal symbols defined near the end of this comment. The formats
672// are:
673//
674// <h4>AutoML Vision</h4>
675// <div class="ds-selector-tabs"><section><h5>Classification</h5>
676//
677// One or more CSV files where each line is a single column:
678//
679//     GCS_FILE_PATH
680//
681// The Google Cloud Storage location of an image of up to
682// 30MB in size. Supported extensions: .JPEG, .GIF, .PNG.
683// This path is treated as the ID in the batch predict output.
684//
685// Sample rows:
686//
687//     gs://folder/image1.jpeg
688//     gs://folder/image2.gif
689//     gs://folder/image3.png
690//
691// </section><section><h5>Object Detection</h5>
692//
693// One or more CSV files where each line is a single column:
694//
695//     GCS_FILE_PATH
696//
697// The Google Cloud Storage location of an image of up to
698// 30MB in size. Supported extensions: .JPEG, .GIF, .PNG.
699// This path is treated as the ID in the batch predict output.
700//
701// Sample rows:
702//
703//     gs://folder/image1.jpeg
704//     gs://folder/image2.gif
705//     gs://folder/image3.png
706//   </section>
707// </div>
708//
709// <h4>AutoML Video Intelligence</h4>
710// <div class="ds-selector-tabs"><section><h5>Classification</h5>
711//
712// One or more CSV files where each line is a single column:
713//
714//     GCS_FILE_PATH,TIME_SEGMENT_START,TIME_SEGMENT_END
715//
716// `GCS_FILE_PATH` is the Google Cloud Storage location of video up to 50GB in
717// size and up to 3h in duration duration.
718// Supported extensions: .MOV, .MPEG4, .MP4, .AVI.
719//
720// `TIME_SEGMENT_START` and `TIME_SEGMENT_END` must be within the
721// length of the video, and the end time must be after the start time.
722//
723// Sample rows:
724//
725//     gs://folder/video1.mp4,10,40
726//     gs://folder/video1.mp4,20,60
727//     gs://folder/vid2.mov,0,inf
728//
729// </section><section><h5>Object Tracking</h5>
730//
731// One or more CSV files where each line is a single column:
732//
733//     GCS_FILE_PATH,TIME_SEGMENT_START,TIME_SEGMENT_END
734//
735// `GCS_FILE_PATH` is the Google Cloud Storage location of video up to 50GB in
736// size and up to 3h in duration duration.
737// Supported extensions: .MOV, .MPEG4, .MP4, .AVI.
738//
739// `TIME_SEGMENT_START` and `TIME_SEGMENT_END` must be within the
740// length of the video, and the end time must be after the start time.
741//
742// Sample rows:
743//
744//     gs://folder/video1.mp4,10,40
745//     gs://folder/video1.mp4,20,60
746//     gs://folder/vid2.mov,0,inf
747//   </section>
748// </div>
749//
750// <h4>AutoML Natural Language</h4>
751// <div class="ds-selector-tabs"><section><h5>Classification</h5>
752//
753// One or more CSV files where each line is a single column:
754//
755//     GCS_FILE_PATH
756//
757// `GCS_FILE_PATH` is the Google Cloud Storage location of a text file.
758// Supported file extensions: .TXT, .PDF, .TIF, .TIFF
759//
760// Text files can be no larger than 10MB in size.
761//
762// Sample rows:
763//
764//     gs://folder/text1.txt
765//     gs://folder/text2.pdf
766//     gs://folder/text3.tif
767//
768// </section><section><h5>Sentiment Analysis</h5>
769// One or more CSV files where each line is a single column:
770//
771//     GCS_FILE_PATH
772//
773// `GCS_FILE_PATH` is the Google Cloud Storage location of a text file.
774// Supported file extensions: .TXT, .PDF, .TIF, .TIFF
775//
776// Text files can be no larger than 128kB in size.
777//
778// Sample rows:
779//
780//     gs://folder/text1.txt
781//     gs://folder/text2.pdf
782//     gs://folder/text3.tif
783//
784// </section><section><h5>Entity Extraction</h5>
785//
786// One or more JSONL (JSON Lines) files that either provide inline text or
787// documents. You can only use one format, either inline text or documents,
788// for a single call to [AutoMl.BatchPredict].
789//
790// Each JSONL file contains a per line a proto that
791// wraps a temporary user-assigned TextSnippet ID (string up to 2000
792// characters long) called "id", a TextSnippet proto (in
793// JSON representation) and zero or more TextFeature protos. Any given
794// text snippet content must have 30,000 characters or less, and also
795// be UTF-8 NFC encoded (ASCII already is). The IDs provided should be
796// unique.
797//
798// Each document JSONL file contains, per line, a proto that wraps a Document
799// proto with `input_config` set. Each document cannot exceed 2MB in size.
800//
801// Supported document extensions: .PDF, .TIF, .TIFF
802//
803// Each JSONL file must not exceed 100MB in size, and no more than 20
804// JSONL files may be passed.
805//
806// Sample inline JSONL file (Shown with artificial line
807// breaks. Actual line breaks are denoted by "\n".):
808//
809//     {
810//        "id": "my_first_id",
811//        "text_snippet": { "content": "dog car cat"},
812//        "text_features": [
813//          {
814//            "text_segment": {"start_offset": 4, "end_offset": 6},
815//            "structural_type": PARAGRAPH,
816//            "bounding_poly": {
817//              "normalized_vertices": [
818//                {"x": 0.1, "y": 0.1},
819//                {"x": 0.1, "y": 0.3},
820//                {"x": 0.3, "y": 0.3},
821//                {"x": 0.3, "y": 0.1},
822//              ]
823//            },
824//          }
825//        ],
826//      }\n
827//      {
828//        "id": "2",
829//        "text_snippet": {
830//          "content": "Extended sample content",
831//          "mime_type": "text/plain"
832//        }
833//      }
834//
835// Sample document JSONL file (Shown with artificial line
836// breaks. Actual line breaks are denoted by "\n".):
837//
838//      {
839//        "document": {
840//          "input_config": {
841//            "gcs_source": { "input_uris": [ "gs://folder/document1.pdf" ]
842//            }
843//          }
844//        }
845//      }\n
846//      {
847//        "document": {
848//          "input_config": {
849//            "gcs_source": { "input_uris": [ "gs://folder/document2.tif" ]
850//            }
851//          }
852//        }
853//      }
854//   </section>
855// </div>
856//
857// <h4>AutoML Tables</h4><div class="ui-datasection-main"><section
858// class="selected">
859//
860// See [Preparing your training
861// data](https://cloud.google.com/automl-tables/docs/predict-batch) for more
862// information.
863//
864// You can use either
865// [gcs_source][google.cloud.automl.v1.BatchPredictInputConfig.gcs_source]
866// or
867// [bigquery_source][BatchPredictInputConfig.bigquery_source].
868//
869// **For gcs_source:**
870//
871// CSV file(s), each by itself 10GB or smaller and total size must be
872// 100GB or smaller, where first file must have a header containing
873// column names. If the first row of a subsequent file is the same as
874// the header, then it is also treated as a header. All other rows
875// contain values for the corresponding columns.
876//
877// The column names must contain the model's
878// [input_feature_column_specs'][google.cloud.automl.v1.TablesModelMetadata.input_feature_column_specs]
879// [display_name-s][google.cloud.automl.v1.ColumnSpec.display_name]
880// (order doesn't matter). The columns corresponding to the model's
881// input feature column specs must contain values compatible with the
882// column spec's data types. Prediction on all the rows, i.e. the CSV
883// lines, will be attempted.
884//
885//
886// Sample rows from a CSV file:
887// <pre>
888// "First Name","Last Name","Dob","Addresses"
889// "John","Doe","1968-01-22","[{"status":"current","address":"123_First_Avenue","city":"Seattle","state":"WA","zip":"11111","numberOfYears":"1"},{"status":"previous","address":"456_Main_Street","city":"Portland","state":"OR","zip":"22222","numberOfYears":"5"}]"
890// "Jane","Doe","1980-10-16","[{"status":"current","address":"789_Any_Avenue","city":"Albany","state":"NY","zip":"33333","numberOfYears":"2"},{"status":"previous","address":"321_Main_Street","city":"Hoboken","state":"NJ","zip":"44444","numberOfYears":"3"}]}
891// </pre>
892// **For bigquery_source:**
893//
894// The URI of a BigQuery table. The user data size of the BigQuery
895// table must be 100GB or smaller.
896//
897// The column names must contain the model's
898// [input_feature_column_specs'][google.cloud.automl.v1.TablesModelMetadata.input_feature_column_specs]
899// [display_name-s][google.cloud.automl.v1.ColumnSpec.display_name]
900// (order doesn't matter). The columns corresponding to the model's
901// input feature column specs must contain values compatible with the
902// column spec's data types. Prediction on all the rows of the table
903// will be attempted.
904//   </section>
905// </div>
906//
907// **Input field definitions:**
908//
909// `GCS_FILE_PATH`
910// : The path to a file on Google Cloud Storage. For example,
911//   "gs://folder/video.avi".
912//
913// `TIME_SEGMENT_START`
914// : (`TIME_OFFSET`)
915//   Expresses a beginning, inclusive, of a time segment
916//   within an example that has a time dimension
917//   (e.g. video).
918//
919// `TIME_SEGMENT_END`
920// : (`TIME_OFFSET`)
921//   Expresses an end, exclusive, of a time segment within
922//   n example that has a time dimension (e.g. video).
923//
924// `TIME_OFFSET`
925// : A number of seconds as measured from the start of an
926//   example (e.g. video). Fractions are allowed, up to a
927//   microsecond precision. "inf" is allowed, and it means the end
928//   of the example.
929//
930//  **Errors:**
931//
932//  If any of the provided CSV files can't be parsed or if more than certain
933//  percent of CSV rows cannot be processed then the operation fails and
934//  prediction does not happen. Regardless of overall success or failure the
935//  per-row failures, up to a certain count cap, will be listed in
936//  Operation.metadata.partial_failures.
937message BatchPredictInputConfig {
938  // The source of the input.
939  oneof source {
940    // Required. The Google Cloud Storage location for the input content.
941    GcsSource gcs_source = 1 [(google.api.field_behavior) = REQUIRED];
942  }
943}
944
945// Input configuration of a [Document][google.cloud.automl.v1.Document].
946message DocumentInputConfig {
947  // The Google Cloud Storage location of the document file. Only a single path
948  // should be given.
949  //
950  // Max supported size: 512MB.
951  //
952  // Supported extensions: .PDF.
953  GcsSource gcs_source = 1;
954}
955
956// *  For Translation:
957//         CSV file `translation.csv`, with each line in format:
958//         ML_USE,GCS_FILE_PATH
959//         GCS_FILE_PATH leads to a .TSV file which describes examples that have
960//         given ML_USE, using the following row format per line:
961//         TEXT_SNIPPET (in source language) \t TEXT_SNIPPET (in target
962//         language)
963//
964//   *  For Tables:
965//         Output depends on whether the dataset was imported from Google Cloud
966//         Storage or BigQuery.
967//         Google Cloud Storage case:
968//           [gcs_destination][google.cloud.automl.v1p1beta.OutputConfig.gcs_destination]
969//           must be set. Exported are CSV file(s) `tables_1.csv`,
970//           `tables_2.csv`,...,`tables_N.csv` with each having as header line
971//           the table's column names, and all other lines contain values for
972//           the header columns.
973//         BigQuery case:
974//           [bigquery_destination][google.cloud.automl.v1p1beta.OutputConfig.bigquery_destination]
975//           pointing to a BigQuery project must be set. In the given project a
976//           new dataset will be created with name
977//           `export_data_<automl-dataset-display-name>_<timestamp-of-export-call>`
978//           where <automl-dataset-display-name> will be made
979//           BigQuery-dataset-name compatible (e.g. most special characters will
980//           become underscores), and timestamp will be in
981//           YYYY_MM_DDThh_mm_ss_sssZ "based on ISO-8601" format. In that
982//           dataset a new table called `primary_table` will be created, and
983//           filled with precisely the same data as this obtained on import.
984message OutputConfig {
985  // The destination of the output.
986  oneof destination {
987    // Required. The Google Cloud Storage location where the output is to be written to.
988    // For Image Object Detection, Text Extraction, Video Classification and
989    // Tables, in the given directory a new directory will be created with name:
990    // export_data-<dataset-display-name>-<timestamp-of-export-call> where
991    // timestamp is in YYYY-MM-DDThh:mm:ss.sssZ ISO-8601 format. All export
992    // output will be written into that directory.
993    GcsDestination gcs_destination = 1 [(google.api.field_behavior) = REQUIRED];
994  }
995}
996
997// Output configuration for BatchPredict Action.
998//
999// As destination the
1000// [gcs_destination][google.cloud.automl.v1.BatchPredictOutputConfig.gcs_destination]
1001// must be set unless specified otherwise for a domain. If gcs_destination is
1002// set then in the given directory a new directory is created. Its name
1003// will be
1004// "prediction-<model-display-name>-<timestamp-of-prediction-call>",
1005// where timestamp is in YYYY-MM-DDThh:mm:ss.sssZ ISO-8601 format. The contents
1006// of it depends on the ML problem the predictions are made for.
1007//
1008//  *  For Image Classification:
1009//         In the created directory files `image_classification_1.jsonl`,
1010//         `image_classification_2.jsonl`,...,`image_classification_N.jsonl`
1011//         will be created, where N may be 1, and depends on the
1012//         total number of the successfully predicted images and annotations.
1013//         A single image will be listed only once with all its annotations,
1014//         and its annotations will never be split across files.
1015//         Each .JSONL file will contain, per line, a JSON representation of a
1016//         proto that wraps image's "ID" : "<id_value>" followed by a list of
1017//         zero or more AnnotationPayload protos (called annotations), which
1018//         have classification detail populated.
1019//         If prediction for any image failed (partially or completely), then an
1020//         additional `errors_1.jsonl`, `errors_2.jsonl`,..., `errors_N.jsonl`
1021//         files will be created (N depends on total number of failed
1022//         predictions). These files will have a JSON representation of a proto
1023//         that wraps the same "ID" : "<id_value>" but here followed by
1024//         exactly one
1025//         [`google.rpc.Status`](https://github.com/googleapis/googleapis/blob/master/google/rpc/status.proto)
1026//         containing only `code` and `message`fields.
1027//
1028//  *  For Image Object Detection:
1029//         In the created directory files `image_object_detection_1.jsonl`,
1030//         `image_object_detection_2.jsonl`,...,`image_object_detection_N.jsonl`
1031//         will be created, where N may be 1, and depends on the
1032//         total number of the successfully predicted images and annotations.
1033//         Each .JSONL file will contain, per line, a JSON representation of a
1034//         proto that wraps image's "ID" : "<id_value>" followed by a list of
1035//         zero or more AnnotationPayload protos (called annotations), which
1036//         have image_object_detection detail populated. A single image will
1037//         be listed only once with all its annotations, and its annotations
1038//         will never be split across files.
1039//         If prediction for any image failed (partially or completely), then
1040//         additional `errors_1.jsonl`, `errors_2.jsonl`,..., `errors_N.jsonl`
1041//         files will be created (N depends on total number of failed
1042//         predictions). These files will have a JSON representation of a proto
1043//         that wraps the same "ID" : "<id_value>" but here followed by
1044//         exactly one
1045//         [`google.rpc.Status`](https://github.com/googleapis/googleapis/blob/master/google/rpc/status.proto)
1046//         containing only `code` and `message`fields.
1047//  *  For Video Classification:
1048//         In the created directory a video_classification.csv file, and a .JSON
1049//         file per each video classification requested in the input (i.e. each
1050//         line in given CSV(s)), will be created.
1051//
1052//         The format of video_classification.csv is:
1053//         GCS_FILE_PATH,TIME_SEGMENT_START,TIME_SEGMENT_END,JSON_FILE_NAME,STATUS
1054//         where:
1055//         GCS_FILE_PATH,TIME_SEGMENT_START,TIME_SEGMENT_END = matches 1 to 1
1056//             the prediction input lines (i.e. video_classification.csv has
1057//             precisely the same number of lines as the prediction input had.)
1058//         JSON_FILE_NAME = Name of .JSON file in the output directory, which
1059//             contains prediction responses for the video time segment.
1060//         STATUS = "OK" if prediction completed successfully, or an error code
1061//             with message otherwise. If STATUS is not "OK" then the .JSON file
1062//             for that line may not exist or be empty.
1063//
1064//         Each .JSON file, assuming STATUS is "OK", will contain a list of
1065//         AnnotationPayload protos in JSON format, which are the predictions
1066//         for the video time segment the file is assigned to in the
1067//         video_classification.csv. All AnnotationPayload protos will have
1068//         video_classification field set, and will be sorted by
1069//         video_classification.type field (note that the returned types are
1070//         governed by `classifaction_types` parameter in
1071//         [PredictService.BatchPredictRequest.params][]).
1072//
1073//  *  For Video Object Tracking:
1074//         In the created directory a video_object_tracking.csv file will be
1075//         created, and multiple files video_object_trackinng_1.json,
1076//         video_object_trackinng_2.json,..., video_object_trackinng_N.json,
1077//         where N is the number of requests in the input (i.e. the number of
1078//         lines in given CSV(s)).
1079//
1080//         The format of video_object_tracking.csv is:
1081//         GCS_FILE_PATH,TIME_SEGMENT_START,TIME_SEGMENT_END,JSON_FILE_NAME,STATUS
1082//         where:
1083//         GCS_FILE_PATH,TIME_SEGMENT_START,TIME_SEGMENT_END = matches 1 to 1
1084//             the prediction input lines (i.e. video_object_tracking.csv has
1085//             precisely the same number of lines as the prediction input had.)
1086//         JSON_FILE_NAME = Name of .JSON file in the output directory, which
1087//             contains prediction responses for the video time segment.
1088//         STATUS = "OK" if prediction completed successfully, or an error
1089//             code with message otherwise. If STATUS is not "OK" then the .JSON
1090//             file for that line may not exist or be empty.
1091//
1092//         Each .JSON file, assuming STATUS is "OK", will contain a list of
1093//         AnnotationPayload protos in JSON format, which are the predictions
1094//         for each frame of the video time segment the file is assigned to in
1095//         video_object_tracking.csv. All AnnotationPayload protos will have
1096//         video_object_tracking field set.
1097//  *  For Text Classification:
1098//         In the created directory files `text_classification_1.jsonl`,
1099//         `text_classification_2.jsonl`,...,`text_classification_N.jsonl`
1100//         will be created, where N may be 1, and depends on the
1101//         total number of inputs and annotations found.
1102//
1103//         Each .JSONL file will contain, per line, a JSON representation of a
1104//         proto that wraps input text file (or document) in
1105//         the text snippet (or document) proto and a list of
1106//         zero or more AnnotationPayload protos (called annotations), which
1107//         have classification detail populated. A single text file (or
1108//         document) will be listed only once with all its annotations, and its
1109//         annotations will never be split across files.
1110//
1111//         If prediction for any input file (or document) failed (partially or
1112//         completely), then additional `errors_1.jsonl`, `errors_2.jsonl`,...,
1113//         `errors_N.jsonl` files will be created (N depends on total number of
1114//         failed predictions). These files will have a JSON representation of a
1115//         proto that wraps input file followed by exactly one
1116//         [`google.rpc.Status`](https://github.com/googleapis/googleapis/blob/master/google/rpc/status.proto)
1117//         containing only `code` and `message`.
1118//
1119//  *  For Text Sentiment:
1120//         In the created directory files `text_sentiment_1.jsonl`,
1121//         `text_sentiment_2.jsonl`,...,`text_sentiment_N.jsonl`
1122//         will be created, where N may be 1, and depends on the
1123//         total number of inputs and annotations found.
1124//
1125//         Each .JSONL file will contain, per line, a JSON representation of a
1126//         proto that wraps input text file (or document) in
1127//         the text snippet (or document) proto and a list of
1128//         zero or more AnnotationPayload protos (called annotations), which
1129//         have text_sentiment detail populated. A single text file (or
1130//         document) will be listed only once with all its annotations, and its
1131//         annotations will never be split across files.
1132//
1133//         If prediction for any input file (or document) failed (partially or
1134//         completely), then additional `errors_1.jsonl`, `errors_2.jsonl`,...,
1135//         `errors_N.jsonl` files will be created (N depends on total number of
1136//         failed predictions). These files will have a JSON representation of a
1137//         proto that wraps input file followed by exactly one
1138//         [`google.rpc.Status`](https://github.com/googleapis/googleapis/blob/master/google/rpc/status.proto)
1139//         containing only `code` and `message`.
1140//
1141//   *  For Text Extraction:
1142//         In the created directory files `text_extraction_1.jsonl`,
1143//         `text_extraction_2.jsonl`,...,`text_extraction_N.jsonl`
1144//         will be created, where N may be 1, and depends on the
1145//         total number of inputs and annotations found.
1146//         The contents of these .JSONL file(s) depend on whether the input
1147//         used inline text, or documents.
1148//         If input was inline, then each .JSONL file will contain, per line,
1149//           a JSON representation of a proto that wraps given in request text
1150//           snippet's "id" (if specified), followed by input text snippet,
1151//           and a list of zero or more
1152//           AnnotationPayload protos (called annotations), which have
1153//           text_extraction detail populated. A single text snippet will be
1154//           listed only once with all its annotations, and its annotations will
1155//           never be split across files.
1156//         If input used documents, then each .JSONL file will contain, per
1157//           line, a JSON representation of a proto that wraps given in request
1158//           document proto, followed by its OCR-ed representation in the form
1159//           of a text snippet, finally followed by a list of zero or more
1160//           AnnotationPayload protos (called annotations), which have
1161//           text_extraction detail populated and refer, via their indices, to
1162//           the OCR-ed text snippet. A single document (and its text snippet)
1163//           will be listed only once with all its annotations, and its
1164//           annotations will never be split across files.
1165//         If prediction for any text snippet failed (partially or completely),
1166//         then additional `errors_1.jsonl`, `errors_2.jsonl`,...,
1167//         `errors_N.jsonl` files will be created (N depends on total number of
1168//         failed predictions). These files will have a JSON representation of a
1169//         proto that wraps either the "id" : "<id_value>" (in case of inline)
1170//         or the document proto (in case of document) but here followed by
1171//         exactly one
1172//         [`google.rpc.Status`](https://github.com/googleapis/googleapis/blob/master/google/rpc/status.proto)
1173//         containing only `code` and `message`.
1174//
1175//  *  For Tables:
1176//         Output depends on whether
1177//         [gcs_destination][google.cloud.automl.v1p1beta.BatchPredictOutputConfig.gcs_destination]
1178//         or
1179//         [bigquery_destination][google.cloud.automl.v1p1beta.BatchPredictOutputConfig.bigquery_destination]
1180//         is set (either is allowed).
1181//         Google Cloud Storage case:
1182//           In the created directory files `tables_1.csv`, `tables_2.csv`,...,
1183//           `tables_N.csv` will be created, where N may be 1, and depends on
1184//           the total number of the successfully predicted rows.
1185//           For all CLASSIFICATION
1186//           [prediction_type-s][google.cloud.automl.v1p1beta.TablesModelMetadata.prediction_type]:
1187//             Each .csv file will contain a header, listing all columns'
1188//             [display_name-s][google.cloud.automl.v1p1beta.ColumnSpec.display_name]
1189//             given on input followed by M target column names in the format of
1190//             "<[target_column_specs][google.cloud.automl.v1p1beta.TablesModelMetadata.target_column_spec]
1191//             [display_name][google.cloud.automl.v1p1beta.ColumnSpec.display_name]>_<target
1192//             value>_score" where M is the number of distinct target values,
1193//             i.e. number of distinct values in the target column of the table
1194//             used to train the model. Subsequent lines will contain the
1195//             respective values of successfully predicted rows, with the last,
1196//             i.e. the target, columns having the corresponding prediction
1197//             [scores][google.cloud.automl.v1p1beta.TablesAnnotation.score].
1198//           For REGRESSION and FORECASTING
1199//           [prediction_type-s][google.cloud.automl.v1p1beta.TablesModelMetadata.prediction_type]:
1200//             Each .csv file will contain a header, listing all columns'
1201//             [display_name-s][google.cloud.automl.v1p1beta.display_name]
1202//             given on input followed by the predicted target column with name
1203//             in the format of
1204//             "predicted_<[target_column_specs][google.cloud.automl.v1p1beta.TablesModelMetadata.target_column_spec]
1205//             [display_name][google.cloud.automl.v1p1beta.ColumnSpec.display_name]>"
1206//             Subsequent lines will contain the respective values of
1207//             successfully predicted rows, with the last, i.e. the target,
1208//             column having the predicted target value.
1209//             If prediction for any rows failed, then an additional
1210//             `errors_1.csv`, `errors_2.csv`,..., `errors_N.csv` will be
1211//             created (N depends on total number of failed rows). These files
1212//             will have analogous format as `tables_*.csv`, but always with a
1213//             single target column having
1214//             [`google.rpc.Status`](https://github.com/googleapis/googleapis/blob/master/google/rpc/status.proto)
1215//             represented as a JSON string, and containing only `code` and
1216//             `message`.
1217//         BigQuery case:
1218//           [bigquery_destination][google.cloud.automl.v1p1beta.OutputConfig.bigquery_destination]
1219//           pointing to a BigQuery project must be set. In the given project a
1220//           new dataset will be created with name
1221//           `prediction_<model-display-name>_<timestamp-of-prediction-call>`
1222//           where <model-display-name> will be made
1223//           BigQuery-dataset-name compatible (e.g. most special characters will
1224//           become underscores), and timestamp will be in
1225//           YYYY_MM_DDThh_mm_ss_sssZ "based on ISO-8601" format. In the dataset
1226//           two tables will be created, `predictions`, and `errors`.
1227//           The `predictions` table's column names will be the input columns'
1228//           [display_name-s][google.cloud.automl.v1p1beta.ColumnSpec.display_name]
1229//           followed by the target column with name in the format of
1230//           "predicted_<[target_column_specs][google.cloud.automl.v1p1beta.TablesModelMetadata.target_column_spec]
1231//           [display_name][google.cloud.automl.v1p1beta.ColumnSpec.display_name]>"
1232//           The input feature columns will contain the respective values of
1233//           successfully predicted rows, with the target column having an
1234//           ARRAY of
1235//           [AnnotationPayloads][google.cloud.automl.v1p1beta.AnnotationPayload],
1236//           represented as STRUCT-s, containing
1237//           [TablesAnnotation][google.cloud.automl.v1p1beta.TablesAnnotation].
1238//           The `errors` table contains rows for which the prediction has
1239//           failed, it has analogous input columns while the target column name
1240//           is in the format of
1241//           "errors_<[target_column_specs][google.cloud.automl.v1p1beta.TablesModelMetadata.target_column_spec]
1242//           [display_name][google.cloud.automl.v1p1beta.ColumnSpec.display_name]>",
1243//           and as a value has
1244//           [`google.rpc.Status`](https://github.com/googleapis/googleapis/blob/master/google/rpc/status.proto)
1245//           represented as a STRUCT, and containing only `code` and `message`.
1246message BatchPredictOutputConfig {
1247  // The destination of the output.
1248  oneof destination {
1249    // Required. The Google Cloud Storage location of the directory where the output is to
1250    // be written to.
1251    GcsDestination gcs_destination = 1 [(google.api.field_behavior) = REQUIRED];
1252  }
1253}
1254
1255// Output configuration for ModelExport Action.
1256message ModelExportOutputConfig {
1257  // The destination of the output.
1258  oneof destination {
1259    // Required. The Google Cloud Storage location where the model is to be written to.
1260    // This location may only be set for the following model formats:
1261    //   "tflite", "edgetpu_tflite", "tf_saved_model", "tf_js", "core_ml".
1262    //
1263    //  Under the directory given as the destination a new one with name
1264    //  "model-export-<model-display-name>-<timestamp-of-export-call>",
1265    //  where timestamp is in YYYY-MM-DDThh:mm:ss.sssZ ISO-8601 format,
1266    //  will be created. Inside the model and any of its supporting files
1267    //  will be written.
1268    GcsDestination gcs_destination = 1 [(google.api.field_behavior) = REQUIRED];
1269  }
1270
1271  // The format in which the model must be exported. The available, and default,
1272  // formats depend on the problem and model type (if given problem and type
1273  // combination doesn't have a format listed, it means its models are not
1274  // exportable):
1275  //
1276  // *  For Image Classification mobile-low-latency-1, mobile-versatile-1,
1277  //        mobile-high-accuracy-1:
1278  //      "tflite" (default), "edgetpu_tflite", "tf_saved_model", "tf_js",
1279  //      "docker".
1280  //
1281  // *  For Image Classification mobile-core-ml-low-latency-1,
1282  //        mobile-core-ml-versatile-1, mobile-core-ml-high-accuracy-1:
1283  //      "core_ml" (default).
1284  //
1285  // *  For Image Object Detection mobile-low-latency-1, mobile-versatile-1,
1286  //        mobile-high-accuracy-1:
1287  //      "tflite", "tf_saved_model", "tf_js".
1288  // Formats description:
1289  //
1290  // * tflite - Used for Android mobile devices.
1291  // * edgetpu_tflite - Used for [Edge TPU](https://cloud.google.com/edge-tpu/)
1292  //                    devices.
1293  // * tf_saved_model - A tensorflow model in SavedModel format.
1294  // * tf_js - A [TensorFlow.js](https://www.tensorflow.org/js) model that can
1295  //           be used in the browser and in Node.js using JavaScript.
1296  // * docker - Used for Docker containers. Use the params field to customize
1297  //            the container. The container is verified to work correctly on
1298  //            ubuntu 16.04 operating system. See more at
1299  //            [containers
1300  //            quickstart](https://cloud.google.com/vision/automl/docs/containers-gcs-quickstart)
1301  // * core_ml - Used for iOS mobile devices.
1302  string model_format = 4;
1303
1304  // Additional model-type and format specific parameters describing the
1305  // requirements for the to be exported model files, any string must be up to
1306  // 25000 characters long.
1307  //
1308  //  * For `docker` format:
1309  //     `cpu_architecture` - (string) "x86_64" (default).
1310  //     `gpu_architecture` - (string) "none" (default), "nvidia".
1311  map<string, string> params = 2;
1312}
1313
1314// The Google Cloud Storage location for the input content.
1315message GcsSource {
1316  // Required. Google Cloud Storage URIs to input files, up to 2000
1317  // characters long. Accepted forms:
1318  // * Full object path, e.g. gs://bucket/directory/object.csv
1319  repeated string input_uris = 1 [(google.api.field_behavior) = REQUIRED];
1320}
1321
1322// The Google Cloud Storage location where the output is to be written to.
1323message GcsDestination {
1324  // Required. Google Cloud Storage URI to output directory, up to 2000
1325  // characters long.
1326  // Accepted forms:
1327  // * Prefix path: gs://bucket/directory
1328  // The requesting user must have write permission to the bucket.
1329  // The directory is created if it doesn't exist.
1330  string output_uri_prefix = 1 [(google.api.field_behavior) = REQUIRED];
1331}
1332