1// Copyright 2021 Google LLC 2// 3// Licensed under the Apache License, Version 2.0 (the "License"); 4// you may not use this file except in compliance with the License. 5// You may obtain a copy of the License at 6// 7// http://www.apache.org/licenses/LICENSE-2.0 8// 9// Unless required by applicable law or agreed to in writing, software 10// distributed under the License is distributed on an "AS IS" BASIS, 11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12// See the License for the specific language governing permissions and 13// limitations under the License. 14 15syntax = "proto3"; 16 17package google.cloud.automl.v1; 18 19import "google/api/field_behavior.proto"; 20 21option csharp_namespace = "Google.Cloud.AutoML.V1"; 22option go_package = "cloud.google.com/go/automl/apiv1/automlpb;automlpb"; 23option java_multiple_files = true; 24option java_package = "com.google.cloud.automl.v1"; 25option php_namespace = "Google\\Cloud\\AutoMl\\V1"; 26option ruby_package = "Google::Cloud::AutoML::V1"; 27 28// Input configuration for [AutoMl.ImportData][google.cloud.automl.v1.AutoMl.ImportData] action. 29// 30// The format of input depends on dataset_metadata the Dataset into which 31// the import is happening has. As input source the 32// [gcs_source][google.cloud.automl.v1.InputConfig.gcs_source] 33// is expected, unless specified otherwise. Additionally any input .CSV file 34// by itself must be 100MB or smaller, unless specified otherwise. 35// If an "example" file (that is, image, video etc.) with identical content 36// (even if it had different `GCS_FILE_PATH`) is mentioned multiple times, then 37// its label, bounding boxes etc. are appended. The same file should be always 38// provided with the same `ML_USE` and `GCS_FILE_PATH`, if it is not, then 39// these values are nondeterministically selected from the given ones. 40// 41// The formats are represented in EBNF with commas being literal and with 42// non-terminal symbols defined near the end of this comment. The formats are: 43// 44// <h4>AutoML Vision</h4> 45// 46// 47// <div class="ds-selector-tabs"><section><h5>Classification</h5> 48// 49// See [Preparing your training 50// data](https://cloud.google.com/vision/automl/docs/prepare) for more 51// information. 52// 53// CSV file(s) with each line in format: 54// 55// ML_USE,GCS_FILE_PATH,LABEL,LABEL,... 56// 57// * `ML_USE` - Identifies the data set that the current row (file) applies 58// to. 59// This value can be one of the following: 60// * `TRAIN` - Rows in this file are used to train the model. 61// * `TEST` - Rows in this file are used to test the model during training. 62// * `UNASSIGNED` - Rows in this file are not categorized. They are 63// Automatically divided into train and test data. 80% for training and 64// 20% for testing. 65// 66// * `GCS_FILE_PATH` - The Google Cloud Storage location of an image of up to 67// 30MB in size. Supported extensions: .JPEG, .GIF, .PNG, .WEBP, .BMP, 68// .TIFF, .ICO. 69// 70// * `LABEL` - A label that identifies the object in the image. 71// 72// For the `MULTICLASS` classification type, at most one `LABEL` is allowed 73// per image. If an image has not yet been labeled, then it should be 74// mentioned just once with no `LABEL`. 75// 76// Some sample rows: 77// 78// TRAIN,gs://folder/image1.jpg,daisy 79// TEST,gs://folder/image2.jpg,dandelion,tulip,rose 80// UNASSIGNED,gs://folder/image3.jpg,daisy 81// UNASSIGNED,gs://folder/image4.jpg 82// 83// 84// </section><section><h5>Object Detection</h5> 85// See [Preparing your training 86// data](https://cloud.google.com/vision/automl/object-detection/docs/prepare) 87// for more information. 88// 89// A CSV file(s) with each line in format: 90// 91// ML_USE,GCS_FILE_PATH,[LABEL],(BOUNDING_BOX | ,,,,,,,) 92// 93// * `ML_USE` - Identifies the data set that the current row (file) applies 94// to. 95// This value can be one of the following: 96// * `TRAIN` - Rows in this file are used to train the model. 97// * `TEST` - Rows in this file are used to test the model during training. 98// * `UNASSIGNED` - Rows in this file are not categorized. They are 99// Automatically divided into train and test data. 80% for training and 100// 20% for testing. 101// 102// * `GCS_FILE_PATH` - The Google Cloud Storage location of an image of up to 103// 30MB in size. Supported extensions: .JPEG, .GIF, .PNG. Each image 104// is assumed to be exhaustively labeled. 105// 106// * `LABEL` - A label that identifies the object in the image specified by the 107// `BOUNDING_BOX`. 108// 109// * `BOUNDING BOX` - The vertices of an object in the example image. 110// The minimum allowed `BOUNDING_BOX` edge length is 0.01, and no more than 111// 500 `BOUNDING_BOX` instances per image are allowed (one `BOUNDING_BOX` 112// per line). If an image has no looked for objects then it should be 113// mentioned just once with no LABEL and the ",,,,,,," in place of the 114// `BOUNDING_BOX`. 115// 116// **Four sample rows:** 117// 118// TRAIN,gs://folder/image1.png,car,0.1,0.1,,,0.3,0.3,, 119// TRAIN,gs://folder/image1.png,bike,.7,.6,,,.8,.9,, 120// UNASSIGNED,gs://folder/im2.png,car,0.1,0.1,0.2,0.1,0.2,0.3,0.1,0.3 121// TEST,gs://folder/im3.png,,,,,,,,, 122// </section> 123// </div> 124// 125// 126// <h4>AutoML Video Intelligence</h4> 127// 128// 129// <div class="ds-selector-tabs"><section><h5>Classification</h5> 130// 131// See [Preparing your training 132// data](https://cloud.google.com/video-intelligence/automl/docs/prepare) for 133// more information. 134// 135// CSV file(s) with each line in format: 136// 137// ML_USE,GCS_FILE_PATH 138// 139// For `ML_USE`, do not use `VALIDATE`. 140// 141// `GCS_FILE_PATH` is the path to another .csv file that describes training 142// example for a given `ML_USE`, using the following row format: 143// 144// GCS_FILE_PATH,(LABEL,TIME_SEGMENT_START,TIME_SEGMENT_END | ,,) 145// 146// Here `GCS_FILE_PATH` leads to a video of up to 50GB in size and up 147// to 3h duration. Supported extensions: .MOV, .MPEG4, .MP4, .AVI. 148// 149// `TIME_SEGMENT_START` and `TIME_SEGMENT_END` must be within the 150// length of the video, and the end time must be after the start time. Any 151// segment of a video which has one or more labels on it, is considered a 152// hard negative for all other labels. Any segment with no labels on 153// it is considered to be unknown. If a whole video is unknown, then 154// it should be mentioned just once with ",," in place of `LABEL, 155// TIME_SEGMENT_START,TIME_SEGMENT_END`. 156// 157// Sample top level CSV file: 158// 159// TRAIN,gs://folder/train_videos.csv 160// TEST,gs://folder/test_videos.csv 161// UNASSIGNED,gs://folder/other_videos.csv 162// 163// Sample rows of a CSV file for a particular ML_USE: 164// 165// gs://folder/video1.avi,car,120,180.000021 166// gs://folder/video1.avi,bike,150,180.000021 167// gs://folder/vid2.avi,car,0,60.5 168// gs://folder/vid3.avi,,, 169// 170// 171// 172// </section><section><h5>Object Tracking</h5> 173// 174// See [Preparing your training 175// data](/video-intelligence/automl/object-tracking/docs/prepare) for more 176// information. 177// 178// CSV file(s) with each line in format: 179// 180// ML_USE,GCS_FILE_PATH 181// 182// For `ML_USE`, do not use `VALIDATE`. 183// 184// `GCS_FILE_PATH` is the path to another .csv file that describes training 185// example for a given `ML_USE`, using the following row format: 186// 187// GCS_FILE_PATH,LABEL,[INSTANCE_ID],TIMESTAMP,BOUNDING_BOX 188// 189// or 190// 191// GCS_FILE_PATH,,,,,,,,,, 192// 193// Here `GCS_FILE_PATH` leads to a video of up to 50GB in size and up 194// to 3h duration. Supported extensions: .MOV, .MPEG4, .MP4, .AVI. 195// Providing `INSTANCE_ID`s can help to obtain a better model. When 196// a specific labeled entity leaves the video frame, and shows up 197// afterwards it is not required, albeit preferable, that the same 198// `INSTANCE_ID` is given to it. 199// 200// `TIMESTAMP` must be within the length of the video, the 201// `BOUNDING_BOX` is assumed to be drawn on the closest video's frame 202// to the `TIMESTAMP`. Any mentioned by the `TIMESTAMP` frame is expected 203// to be exhaustively labeled and no more than 500 `BOUNDING_BOX`-es per 204// frame are allowed. If a whole video is unknown, then it should be 205// mentioned just once with ",,,,,,,,,," in place of `LABEL, 206// [INSTANCE_ID],TIMESTAMP,BOUNDING_BOX`. 207// 208// Sample top level CSV file: 209// 210// TRAIN,gs://folder/train_videos.csv 211// TEST,gs://folder/test_videos.csv 212// UNASSIGNED,gs://folder/other_videos.csv 213// 214// Seven sample rows of a CSV file for a particular ML_USE: 215// 216// gs://folder/video1.avi,car,1,12.10,0.8,0.8,0.9,0.8,0.9,0.9,0.8,0.9 217// gs://folder/video1.avi,car,1,12.90,0.4,0.8,0.5,0.8,0.5,0.9,0.4,0.9 218// gs://folder/video1.avi,car,2,12.10,.4,.2,.5,.2,.5,.3,.4,.3 219// gs://folder/video1.avi,car,2,12.90,.8,.2,,,.9,.3,, 220// gs://folder/video1.avi,bike,,12.50,.45,.45,,,.55,.55,, 221// gs://folder/video2.avi,car,1,0,.1,.9,,,.9,.1,, 222// gs://folder/video2.avi,,,,,,,,,,, 223// </section> 224// </div> 225// 226// 227// <h4>AutoML Natural Language</h4> 228// 229// 230// <div class="ds-selector-tabs"><section><h5>Entity Extraction</h5> 231// 232// See [Preparing your training 233// data](/natural-language/automl/entity-analysis/docs/prepare) for more 234// information. 235// 236// One or more CSV file(s) with each line in the following format: 237// 238// ML_USE,GCS_FILE_PATH 239// 240// * `ML_USE` - Identifies the data set that the current row (file) applies 241// to. 242// This value can be one of the following: 243// * `TRAIN` - Rows in this file are used to train the model. 244// * `TEST` - Rows in this file are used to test the model during training. 245// * `UNASSIGNED` - Rows in this file are not categorized. They are 246// Automatically divided into train and test data. 80% for training and 247// 20% for testing.. 248// 249// * `GCS_FILE_PATH` - a Identifies JSON Lines (.JSONL) file stored in 250// Google Cloud Storage that contains in-line text in-line as documents 251// for model training. 252// 253// After the training data set has been determined from the `TRAIN` and 254// `UNASSIGNED` CSV files, the training data is divided into train and 255// validation data sets. 70% for training and 30% for validation. 256// 257// For example: 258// 259// TRAIN,gs://folder/file1.jsonl 260// VALIDATE,gs://folder/file2.jsonl 261// TEST,gs://folder/file3.jsonl 262// 263// **In-line JSONL files** 264// 265// In-line .JSONL files contain, per line, a JSON document that wraps a 266// [`text_snippet`][google.cloud.automl.v1.TextSnippet] field followed by 267// one or more [`annotations`][google.cloud.automl.v1.AnnotationPayload] 268// fields, which have `display_name` and `text_extraction` fields to describe 269// the entity from the text snippet. Multiple JSON documents can be separated 270// using line breaks (\n). 271// 272// The supplied text must be annotated exhaustively. For example, if you 273// include the text "horse", but do not label it as "animal", 274// then "horse" is assumed to not be an "animal". 275// 276// Any given text snippet content must have 30,000 characters or 277// less, and also be UTF-8 NFC encoded. ASCII is accepted as it is 278// UTF-8 NFC encoded. 279// 280// For example: 281// 282// { 283// "text_snippet": { 284// "content": "dog car cat" 285// }, 286// "annotations": [ 287// { 288// "display_name": "animal", 289// "text_extraction": { 290// "text_segment": {"start_offset": 0, "end_offset": 2} 291// } 292// }, 293// { 294// "display_name": "vehicle", 295// "text_extraction": { 296// "text_segment": {"start_offset": 4, "end_offset": 6} 297// } 298// }, 299// { 300// "display_name": "animal", 301// "text_extraction": { 302// "text_segment": {"start_offset": 8, "end_offset": 10} 303// } 304// } 305// ] 306// }\n 307// { 308// "text_snippet": { 309// "content": "This dog is good." 310// }, 311// "annotations": [ 312// { 313// "display_name": "animal", 314// "text_extraction": { 315// "text_segment": {"start_offset": 5, "end_offset": 7} 316// } 317// } 318// ] 319// } 320// 321// **JSONL files that reference documents** 322// 323// .JSONL files contain, per line, a JSON document that wraps a 324// `input_config` that contains the path to a source document. 325// Multiple JSON documents can be separated using line breaks (\n). 326// 327// Supported document extensions: .PDF, .TIF, .TIFF 328// 329// For example: 330// 331// { 332// "document": { 333// "input_config": { 334// "gcs_source": { "input_uris": [ "gs://folder/document1.pdf" ] 335// } 336// } 337// } 338// }\n 339// { 340// "document": { 341// "input_config": { 342// "gcs_source": { "input_uris": [ "gs://folder/document2.tif" ] 343// } 344// } 345// } 346// } 347// 348// **In-line JSONL files with document layout information** 349// 350// **Note:** You can only annotate documents using the UI. The format described 351// below applies to annotated documents exported using the UI or `exportData`. 352// 353// In-line .JSONL files for documents contain, per line, a JSON document 354// that wraps a `document` field that provides the textual content of the 355// document and the layout information. 356// 357// For example: 358// 359// { 360// "document": { 361// "document_text": { 362// "content": "dog car cat" 363// } 364// "layout": [ 365// { 366// "text_segment": { 367// "start_offset": 0, 368// "end_offset": 11, 369// }, 370// "page_number": 1, 371// "bounding_poly": { 372// "normalized_vertices": [ 373// {"x": 0.1, "y": 0.1}, 374// {"x": 0.1, "y": 0.3}, 375// {"x": 0.3, "y": 0.3}, 376// {"x": 0.3, "y": 0.1}, 377// ], 378// }, 379// "text_segment_type": TOKEN, 380// } 381// ], 382// "document_dimensions": { 383// "width": 8.27, 384// "height": 11.69, 385// "unit": INCH, 386// } 387// "page_count": 3, 388// }, 389// "annotations": [ 390// { 391// "display_name": "animal", 392// "text_extraction": { 393// "text_segment": {"start_offset": 0, "end_offset": 3} 394// } 395// }, 396// { 397// "display_name": "vehicle", 398// "text_extraction": { 399// "text_segment": {"start_offset": 4, "end_offset": 7} 400// } 401// }, 402// { 403// "display_name": "animal", 404// "text_extraction": { 405// "text_segment": {"start_offset": 8, "end_offset": 11} 406// } 407// }, 408// ], 409// 410// 411// 412// 413// </section><section><h5>Classification</h5> 414// 415// See [Preparing your training 416// data](https://cloud.google.com/natural-language/automl/docs/prepare) for more 417// information. 418// 419// One or more CSV file(s) with each line in the following format: 420// 421// ML_USE,(TEXT_SNIPPET | GCS_FILE_PATH),LABEL,LABEL,... 422// 423// * `ML_USE` - Identifies the data set that the current row (file) applies 424// to. 425// This value can be one of the following: 426// * `TRAIN` - Rows in this file are used to train the model. 427// * `TEST` - Rows in this file are used to test the model during training. 428// * `UNASSIGNED` - Rows in this file are not categorized. They are 429// Automatically divided into train and test data. 80% for training and 430// 20% for testing. 431// 432// * `TEXT_SNIPPET` and `GCS_FILE_PATH` are distinguished by a pattern. If 433// the column content is a valid Google Cloud Storage file path, that is, 434// prefixed by "gs://", it is treated as a `GCS_FILE_PATH`. Otherwise, if 435// the content is enclosed in double quotes (""), it is treated as a 436// `TEXT_SNIPPET`. For `GCS_FILE_PATH`, the path must lead to a 437// file with supported extension and UTF-8 encoding, for example, 438// "gs://folder/content.txt" AutoML imports the file content 439// as a text snippet. For `TEXT_SNIPPET`, AutoML imports the column content 440// excluding quotes. In both cases, size of the content must be 10MB or 441// less in size. For zip files, the size of each file inside the zip must be 442// 10MB or less in size. 443// 444// For the `MULTICLASS` classification type, at most one `LABEL` is allowed. 445// 446// The `ML_USE` and `LABEL` columns are optional. 447// Supported file extensions: .TXT, .PDF, .TIF, .TIFF, .ZIP 448// 449// A maximum of 100 unique labels are allowed per CSV row. 450// 451// Sample rows: 452// 453// TRAIN,"They have bad food and very rude",RudeService,BadFood 454// gs://folder/content.txt,SlowService 455// TEST,gs://folder/document.pdf 456// VALIDATE,gs://folder/text_files.zip,BadFood 457// 458// 459// 460// </section><section><h5>Sentiment Analysis</h5> 461// 462// See [Preparing your training 463// data](https://cloud.google.com/natural-language/automl/docs/prepare) for more 464// information. 465// 466// CSV file(s) with each line in format: 467// 468// ML_USE,(TEXT_SNIPPET | GCS_FILE_PATH),SENTIMENT 469// 470// * `ML_USE` - Identifies the data set that the current row (file) applies 471// to. 472// This value can be one of the following: 473// * `TRAIN` - Rows in this file are used to train the model. 474// * `TEST` - Rows in this file are used to test the model during training. 475// * `UNASSIGNED` - Rows in this file are not categorized. They are 476// Automatically divided into train and test data. 80% for training and 477// 20% for testing. 478// 479// * `TEXT_SNIPPET` and `GCS_FILE_PATH` are distinguished by a pattern. If 480// the column content is a valid Google Cloud Storage file path, that is, 481// prefixed by "gs://", it is treated as a `GCS_FILE_PATH`. Otherwise, if 482// the content is enclosed in double quotes (""), it is treated as a 483// `TEXT_SNIPPET`. For `GCS_FILE_PATH`, the path must lead to a 484// file with supported extension and UTF-8 encoding, for example, 485// "gs://folder/content.txt" AutoML imports the file content 486// as a text snippet. For `TEXT_SNIPPET`, AutoML imports the column content 487// excluding quotes. In both cases, size of the content must be 128kB or 488// less in size. For zip files, the size of each file inside the zip must be 489// 128kB or less in size. 490// 491// The `ML_USE` and `SENTIMENT` columns are optional. 492// Supported file extensions: .TXT, .PDF, .TIF, .TIFF, .ZIP 493// 494// * `SENTIMENT` - An integer between 0 and 495// Dataset.text_sentiment_dataset_metadata.sentiment_max 496// (inclusive). Describes the ordinal of the sentiment - higher 497// value means a more positive sentiment. All the values are 498// completely relative, i.e. neither 0 needs to mean a negative or 499// neutral sentiment nor sentiment_max needs to mean a positive one - 500// it is just required that 0 is the least positive sentiment 501// in the data, and sentiment_max is the most positive one. 502// The SENTIMENT shouldn't be confused with "score" or "magnitude" 503// from the previous Natural Language Sentiment Analysis API. 504// All SENTIMENT values between 0 and sentiment_max must be 505// represented in the imported data. On prediction the same 0 to 506// sentiment_max range will be used. The difference between 507// neighboring sentiment values needs not to be uniform, e.g. 1 and 508// 2 may be similar whereas the difference between 2 and 3 may be 509// large. 510// 511// Sample rows: 512// 513// TRAIN,"@freewrytin this is way too good for your product",2 514// gs://folder/content.txt,3 515// TEST,gs://folder/document.pdf 516// VALIDATE,gs://folder/text_files.zip,2 517// </section> 518// </div> 519// 520// 521// 522// <h4>AutoML Tables</h4><div class="ui-datasection-main"><section 523// class="selected"> 524// 525// See [Preparing your training 526// data](https://cloud.google.com/automl-tables/docs/prepare) for more 527// information. 528// 529// You can use either 530// [gcs_source][google.cloud.automl.v1.InputConfig.gcs_source] or 531// [bigquery_source][google.cloud.automl.v1.InputConfig.bigquery_source]. 532// All input is concatenated into a 533// single 534// [primary_table_spec_id][google.cloud.automl.v1.TablesDatasetMetadata.primary_table_spec_id] 535// 536// **For gcs_source:** 537// 538// CSV file(s), where the first row of the first file is the header, 539// containing unique column names. If the first row of a subsequent 540// file is the same as the header, then it is also treated as a 541// header. All other rows contain values for the corresponding 542// columns. 543// 544// Each .CSV file by itself must be 10GB or smaller, and their total 545// size must be 100GB or smaller. 546// 547// First three sample rows of a CSV file: 548// <pre> 549// "Id","First Name","Last Name","Dob","Addresses" 550// "1","John","Doe","1968-01-22","[{"status":"current","address":"123_First_Avenue","city":"Seattle","state":"WA","zip":"11111","numberOfYears":"1"},{"status":"previous","address":"456_Main_Street","city":"Portland","state":"OR","zip":"22222","numberOfYears":"5"}]" 551// "2","Jane","Doe","1980-10-16","[{"status":"current","address":"789_Any_Avenue","city":"Albany","state":"NY","zip":"33333","numberOfYears":"2"},{"status":"previous","address":"321_Main_Street","city":"Hoboken","state":"NJ","zip":"44444","numberOfYears":"3"}]} 552// </pre> 553// **For bigquery_source:** 554// 555// An URI of a BigQuery table. The user data size of the BigQuery 556// table must be 100GB or smaller. 557// 558// An imported table must have between 2 and 1,000 columns, inclusive, 559// and between 1000 and 100,000,000 rows, inclusive. There are at most 5 560// import data running in parallel. 561// 562// </section> 563// </div> 564// 565// 566// **Input field definitions:** 567// 568// `ML_USE` 569// : ("TRAIN" | "VALIDATE" | "TEST" | "UNASSIGNED") 570// Describes how the given example (file) should be used for model 571// training. "UNASSIGNED" can be used when user has no preference. 572// 573// `GCS_FILE_PATH` 574// : The path to a file on Google Cloud Storage. For example, 575// "gs://folder/image1.png". 576// 577// `LABEL` 578// : A display name of an object on an image, video etc., e.g. "dog". 579// Must be up to 32 characters long and can consist only of ASCII 580// Latin letters A-Z and a-z, underscores(_), and ASCII digits 0-9. 581// For each label an AnnotationSpec is created which display_name 582// becomes the label; AnnotationSpecs are given back in predictions. 583// 584// `INSTANCE_ID` 585// : A positive integer that identifies a specific instance of a 586// labeled entity on an example. Used e.g. to track two cars on 587// a video while being able to tell apart which one is which. 588// 589// `BOUNDING_BOX` 590// : (`VERTEX,VERTEX,VERTEX,VERTEX` | `VERTEX,,,VERTEX,,`) 591// A rectangle parallel to the frame of the example (image, 592// video). If 4 vertices are given they are connected by edges 593// in the order provided, if 2 are given they are recognized 594// as diagonally opposite vertices of the rectangle. 595// 596// `VERTEX` 597// : (`COORDINATE,COORDINATE`) 598// First coordinate is horizontal (x), the second is vertical (y). 599// 600// `COORDINATE` 601// : A float in 0 to 1 range, relative to total length of 602// image or video in given dimension. For fractions the 603// leading non-decimal 0 can be omitted (i.e. 0.3 = .3). 604// Point 0,0 is in top left. 605// 606// `TIME_SEGMENT_START` 607// : (`TIME_OFFSET`) 608// Expresses a beginning, inclusive, of a time segment 609// within an example that has a time dimension 610// (e.g. video). 611// 612// `TIME_SEGMENT_END` 613// : (`TIME_OFFSET`) 614// Expresses an end, exclusive, of a time segment within 615// n example that has a time dimension (e.g. video). 616// 617// `TIME_OFFSET` 618// : A number of seconds as measured from the start of an 619// example (e.g. video). Fractions are allowed, up to a 620// microsecond precision. "inf" is allowed, and it means the end 621// of the example. 622// 623// `TEXT_SNIPPET` 624// : The content of a text snippet, UTF-8 encoded, enclosed within 625// double quotes (""). 626// 627// `DOCUMENT` 628// : A field that provides the textual content with document and the layout 629// information. 630// 631// 632// **Errors:** 633// 634// If any of the provided CSV files can't be parsed or if more than certain 635// percent of CSV rows cannot be processed then the operation fails and 636// nothing is imported. Regardless of overall success or failure the per-row 637// failures, up to a certain count cap, is listed in 638// Operation.metadata.partial_failures. 639// 640message InputConfig { 641 // The source of the input. 642 oneof source { 643 // The Google Cloud Storage location for the input content. 644 // For [AutoMl.ImportData][google.cloud.automl.v1.AutoMl.ImportData], `gcs_source` points to a CSV file with 645 // a structure described in [InputConfig][google.cloud.automl.v1.InputConfig]. 646 GcsSource gcs_source = 1; 647 } 648 649 // Additional domain-specific parameters describing the semantic of the 650 // imported data, any string must be up to 25000 651 // characters long. 652 // 653 // <h4>AutoML Tables</h4> 654 // 655 // `schema_inference_version` 656 // : (integer) This value must be supplied. 657 // The version of the 658 // algorithm to use for the initial inference of the 659 // column data types of the imported table. Allowed values: "1". 660 map<string, string> params = 2; 661} 662 663// Input configuration for BatchPredict Action. 664// 665// The format of input depends on the ML problem of the model used for 666// prediction. As input source the 667// [gcs_source][google.cloud.automl.v1.InputConfig.gcs_source] 668// is expected, unless specified otherwise. 669// 670// The formats are represented in EBNF with commas being literal and with 671// non-terminal symbols defined near the end of this comment. The formats 672// are: 673// 674// <h4>AutoML Vision</h4> 675// <div class="ds-selector-tabs"><section><h5>Classification</h5> 676// 677// One or more CSV files where each line is a single column: 678// 679// GCS_FILE_PATH 680// 681// The Google Cloud Storage location of an image of up to 682// 30MB in size. Supported extensions: .JPEG, .GIF, .PNG. 683// This path is treated as the ID in the batch predict output. 684// 685// Sample rows: 686// 687// gs://folder/image1.jpeg 688// gs://folder/image2.gif 689// gs://folder/image3.png 690// 691// </section><section><h5>Object Detection</h5> 692// 693// One or more CSV files where each line is a single column: 694// 695// GCS_FILE_PATH 696// 697// The Google Cloud Storage location of an image of up to 698// 30MB in size. Supported extensions: .JPEG, .GIF, .PNG. 699// This path is treated as the ID in the batch predict output. 700// 701// Sample rows: 702// 703// gs://folder/image1.jpeg 704// gs://folder/image2.gif 705// gs://folder/image3.png 706// </section> 707// </div> 708// 709// <h4>AutoML Video Intelligence</h4> 710// <div class="ds-selector-tabs"><section><h5>Classification</h5> 711// 712// One or more CSV files where each line is a single column: 713// 714// GCS_FILE_PATH,TIME_SEGMENT_START,TIME_SEGMENT_END 715// 716// `GCS_FILE_PATH` is the Google Cloud Storage location of video up to 50GB in 717// size and up to 3h in duration duration. 718// Supported extensions: .MOV, .MPEG4, .MP4, .AVI. 719// 720// `TIME_SEGMENT_START` and `TIME_SEGMENT_END` must be within the 721// length of the video, and the end time must be after the start time. 722// 723// Sample rows: 724// 725// gs://folder/video1.mp4,10,40 726// gs://folder/video1.mp4,20,60 727// gs://folder/vid2.mov,0,inf 728// 729// </section><section><h5>Object Tracking</h5> 730// 731// One or more CSV files where each line is a single column: 732// 733// GCS_FILE_PATH,TIME_SEGMENT_START,TIME_SEGMENT_END 734// 735// `GCS_FILE_PATH` is the Google Cloud Storage location of video up to 50GB in 736// size and up to 3h in duration duration. 737// Supported extensions: .MOV, .MPEG4, .MP4, .AVI. 738// 739// `TIME_SEGMENT_START` and `TIME_SEGMENT_END` must be within the 740// length of the video, and the end time must be after the start time. 741// 742// Sample rows: 743// 744// gs://folder/video1.mp4,10,40 745// gs://folder/video1.mp4,20,60 746// gs://folder/vid2.mov,0,inf 747// </section> 748// </div> 749// 750// <h4>AutoML Natural Language</h4> 751// <div class="ds-selector-tabs"><section><h5>Classification</h5> 752// 753// One or more CSV files where each line is a single column: 754// 755// GCS_FILE_PATH 756// 757// `GCS_FILE_PATH` is the Google Cloud Storage location of a text file. 758// Supported file extensions: .TXT, .PDF, .TIF, .TIFF 759// 760// Text files can be no larger than 10MB in size. 761// 762// Sample rows: 763// 764// gs://folder/text1.txt 765// gs://folder/text2.pdf 766// gs://folder/text3.tif 767// 768// </section><section><h5>Sentiment Analysis</h5> 769// One or more CSV files where each line is a single column: 770// 771// GCS_FILE_PATH 772// 773// `GCS_FILE_PATH` is the Google Cloud Storage location of a text file. 774// Supported file extensions: .TXT, .PDF, .TIF, .TIFF 775// 776// Text files can be no larger than 128kB in size. 777// 778// Sample rows: 779// 780// gs://folder/text1.txt 781// gs://folder/text2.pdf 782// gs://folder/text3.tif 783// 784// </section><section><h5>Entity Extraction</h5> 785// 786// One or more JSONL (JSON Lines) files that either provide inline text or 787// documents. You can only use one format, either inline text or documents, 788// for a single call to [AutoMl.BatchPredict]. 789// 790// Each JSONL file contains a per line a proto that 791// wraps a temporary user-assigned TextSnippet ID (string up to 2000 792// characters long) called "id", a TextSnippet proto (in 793// JSON representation) and zero or more TextFeature protos. Any given 794// text snippet content must have 30,000 characters or less, and also 795// be UTF-8 NFC encoded (ASCII already is). The IDs provided should be 796// unique. 797// 798// Each document JSONL file contains, per line, a proto that wraps a Document 799// proto with `input_config` set. Each document cannot exceed 2MB in size. 800// 801// Supported document extensions: .PDF, .TIF, .TIFF 802// 803// Each JSONL file must not exceed 100MB in size, and no more than 20 804// JSONL files may be passed. 805// 806// Sample inline JSONL file (Shown with artificial line 807// breaks. Actual line breaks are denoted by "\n".): 808// 809// { 810// "id": "my_first_id", 811// "text_snippet": { "content": "dog car cat"}, 812// "text_features": [ 813// { 814// "text_segment": {"start_offset": 4, "end_offset": 6}, 815// "structural_type": PARAGRAPH, 816// "bounding_poly": { 817// "normalized_vertices": [ 818// {"x": 0.1, "y": 0.1}, 819// {"x": 0.1, "y": 0.3}, 820// {"x": 0.3, "y": 0.3}, 821// {"x": 0.3, "y": 0.1}, 822// ] 823// }, 824// } 825// ], 826// }\n 827// { 828// "id": "2", 829// "text_snippet": { 830// "content": "Extended sample content", 831// "mime_type": "text/plain" 832// } 833// } 834// 835// Sample document JSONL file (Shown with artificial line 836// breaks. Actual line breaks are denoted by "\n".): 837// 838// { 839// "document": { 840// "input_config": { 841// "gcs_source": { "input_uris": [ "gs://folder/document1.pdf" ] 842// } 843// } 844// } 845// }\n 846// { 847// "document": { 848// "input_config": { 849// "gcs_source": { "input_uris": [ "gs://folder/document2.tif" ] 850// } 851// } 852// } 853// } 854// </section> 855// </div> 856// 857// <h4>AutoML Tables</h4><div class="ui-datasection-main"><section 858// class="selected"> 859// 860// See [Preparing your training 861// data](https://cloud.google.com/automl-tables/docs/predict-batch) for more 862// information. 863// 864// You can use either 865// [gcs_source][google.cloud.automl.v1.BatchPredictInputConfig.gcs_source] 866// or 867// [bigquery_source][BatchPredictInputConfig.bigquery_source]. 868// 869// **For gcs_source:** 870// 871// CSV file(s), each by itself 10GB or smaller and total size must be 872// 100GB or smaller, where first file must have a header containing 873// column names. If the first row of a subsequent file is the same as 874// the header, then it is also treated as a header. All other rows 875// contain values for the corresponding columns. 876// 877// The column names must contain the model's 878// [input_feature_column_specs'][google.cloud.automl.v1.TablesModelMetadata.input_feature_column_specs] 879// [display_name-s][google.cloud.automl.v1.ColumnSpec.display_name] 880// (order doesn't matter). The columns corresponding to the model's 881// input feature column specs must contain values compatible with the 882// column spec's data types. Prediction on all the rows, i.e. the CSV 883// lines, will be attempted. 884// 885// 886// Sample rows from a CSV file: 887// <pre> 888// "First Name","Last Name","Dob","Addresses" 889// "John","Doe","1968-01-22","[{"status":"current","address":"123_First_Avenue","city":"Seattle","state":"WA","zip":"11111","numberOfYears":"1"},{"status":"previous","address":"456_Main_Street","city":"Portland","state":"OR","zip":"22222","numberOfYears":"5"}]" 890// "Jane","Doe","1980-10-16","[{"status":"current","address":"789_Any_Avenue","city":"Albany","state":"NY","zip":"33333","numberOfYears":"2"},{"status":"previous","address":"321_Main_Street","city":"Hoboken","state":"NJ","zip":"44444","numberOfYears":"3"}]} 891// </pre> 892// **For bigquery_source:** 893// 894// The URI of a BigQuery table. The user data size of the BigQuery 895// table must be 100GB or smaller. 896// 897// The column names must contain the model's 898// [input_feature_column_specs'][google.cloud.automl.v1.TablesModelMetadata.input_feature_column_specs] 899// [display_name-s][google.cloud.automl.v1.ColumnSpec.display_name] 900// (order doesn't matter). The columns corresponding to the model's 901// input feature column specs must contain values compatible with the 902// column spec's data types. Prediction on all the rows of the table 903// will be attempted. 904// </section> 905// </div> 906// 907// **Input field definitions:** 908// 909// `GCS_FILE_PATH` 910// : The path to a file on Google Cloud Storage. For example, 911// "gs://folder/video.avi". 912// 913// `TIME_SEGMENT_START` 914// : (`TIME_OFFSET`) 915// Expresses a beginning, inclusive, of a time segment 916// within an example that has a time dimension 917// (e.g. video). 918// 919// `TIME_SEGMENT_END` 920// : (`TIME_OFFSET`) 921// Expresses an end, exclusive, of a time segment within 922// n example that has a time dimension (e.g. video). 923// 924// `TIME_OFFSET` 925// : A number of seconds as measured from the start of an 926// example (e.g. video). Fractions are allowed, up to a 927// microsecond precision. "inf" is allowed, and it means the end 928// of the example. 929// 930// **Errors:** 931// 932// If any of the provided CSV files can't be parsed or if more than certain 933// percent of CSV rows cannot be processed then the operation fails and 934// prediction does not happen. Regardless of overall success or failure the 935// per-row failures, up to a certain count cap, will be listed in 936// Operation.metadata.partial_failures. 937message BatchPredictInputConfig { 938 // The source of the input. 939 oneof source { 940 // Required. The Google Cloud Storage location for the input content. 941 GcsSource gcs_source = 1 [(google.api.field_behavior) = REQUIRED]; 942 } 943} 944 945// Input configuration of a [Document][google.cloud.automl.v1.Document]. 946message DocumentInputConfig { 947 // The Google Cloud Storage location of the document file. Only a single path 948 // should be given. 949 // 950 // Max supported size: 512MB. 951 // 952 // Supported extensions: .PDF. 953 GcsSource gcs_source = 1; 954} 955 956// * For Translation: 957// CSV file `translation.csv`, with each line in format: 958// ML_USE,GCS_FILE_PATH 959// GCS_FILE_PATH leads to a .TSV file which describes examples that have 960// given ML_USE, using the following row format per line: 961// TEXT_SNIPPET (in source language) \t TEXT_SNIPPET (in target 962// language) 963// 964// * For Tables: 965// Output depends on whether the dataset was imported from Google Cloud 966// Storage or BigQuery. 967// Google Cloud Storage case: 968// [gcs_destination][google.cloud.automl.v1p1beta.OutputConfig.gcs_destination] 969// must be set. Exported are CSV file(s) `tables_1.csv`, 970// `tables_2.csv`,...,`tables_N.csv` with each having as header line 971// the table's column names, and all other lines contain values for 972// the header columns. 973// BigQuery case: 974// [bigquery_destination][google.cloud.automl.v1p1beta.OutputConfig.bigquery_destination] 975// pointing to a BigQuery project must be set. In the given project a 976// new dataset will be created with name 977// `export_data_<automl-dataset-display-name>_<timestamp-of-export-call>` 978// where <automl-dataset-display-name> will be made 979// BigQuery-dataset-name compatible (e.g. most special characters will 980// become underscores), and timestamp will be in 981// YYYY_MM_DDThh_mm_ss_sssZ "based on ISO-8601" format. In that 982// dataset a new table called `primary_table` will be created, and 983// filled with precisely the same data as this obtained on import. 984message OutputConfig { 985 // The destination of the output. 986 oneof destination { 987 // Required. The Google Cloud Storage location where the output is to be written to. 988 // For Image Object Detection, Text Extraction, Video Classification and 989 // Tables, in the given directory a new directory will be created with name: 990 // export_data-<dataset-display-name>-<timestamp-of-export-call> where 991 // timestamp is in YYYY-MM-DDThh:mm:ss.sssZ ISO-8601 format. All export 992 // output will be written into that directory. 993 GcsDestination gcs_destination = 1 [(google.api.field_behavior) = REQUIRED]; 994 } 995} 996 997// Output configuration for BatchPredict Action. 998// 999// As destination the 1000// [gcs_destination][google.cloud.automl.v1.BatchPredictOutputConfig.gcs_destination] 1001// must be set unless specified otherwise for a domain. If gcs_destination is 1002// set then in the given directory a new directory is created. Its name 1003// will be 1004// "prediction-<model-display-name>-<timestamp-of-prediction-call>", 1005// where timestamp is in YYYY-MM-DDThh:mm:ss.sssZ ISO-8601 format. The contents 1006// of it depends on the ML problem the predictions are made for. 1007// 1008// * For Image Classification: 1009// In the created directory files `image_classification_1.jsonl`, 1010// `image_classification_2.jsonl`,...,`image_classification_N.jsonl` 1011// will be created, where N may be 1, and depends on the 1012// total number of the successfully predicted images and annotations. 1013// A single image will be listed only once with all its annotations, 1014// and its annotations will never be split across files. 1015// Each .JSONL file will contain, per line, a JSON representation of a 1016// proto that wraps image's "ID" : "<id_value>" followed by a list of 1017// zero or more AnnotationPayload protos (called annotations), which 1018// have classification detail populated. 1019// If prediction for any image failed (partially or completely), then an 1020// additional `errors_1.jsonl`, `errors_2.jsonl`,..., `errors_N.jsonl` 1021// files will be created (N depends on total number of failed 1022// predictions). These files will have a JSON representation of a proto 1023// that wraps the same "ID" : "<id_value>" but here followed by 1024// exactly one 1025// [`google.rpc.Status`](https://github.com/googleapis/googleapis/blob/master/google/rpc/status.proto) 1026// containing only `code` and `message`fields. 1027// 1028// * For Image Object Detection: 1029// In the created directory files `image_object_detection_1.jsonl`, 1030// `image_object_detection_2.jsonl`,...,`image_object_detection_N.jsonl` 1031// will be created, where N may be 1, and depends on the 1032// total number of the successfully predicted images and annotations. 1033// Each .JSONL file will contain, per line, a JSON representation of a 1034// proto that wraps image's "ID" : "<id_value>" followed by a list of 1035// zero or more AnnotationPayload protos (called annotations), which 1036// have image_object_detection detail populated. A single image will 1037// be listed only once with all its annotations, and its annotations 1038// will never be split across files. 1039// If prediction for any image failed (partially or completely), then 1040// additional `errors_1.jsonl`, `errors_2.jsonl`,..., `errors_N.jsonl` 1041// files will be created (N depends on total number of failed 1042// predictions). These files will have a JSON representation of a proto 1043// that wraps the same "ID" : "<id_value>" but here followed by 1044// exactly one 1045// [`google.rpc.Status`](https://github.com/googleapis/googleapis/blob/master/google/rpc/status.proto) 1046// containing only `code` and `message`fields. 1047// * For Video Classification: 1048// In the created directory a video_classification.csv file, and a .JSON 1049// file per each video classification requested in the input (i.e. each 1050// line in given CSV(s)), will be created. 1051// 1052// The format of video_classification.csv is: 1053// GCS_FILE_PATH,TIME_SEGMENT_START,TIME_SEGMENT_END,JSON_FILE_NAME,STATUS 1054// where: 1055// GCS_FILE_PATH,TIME_SEGMENT_START,TIME_SEGMENT_END = matches 1 to 1 1056// the prediction input lines (i.e. video_classification.csv has 1057// precisely the same number of lines as the prediction input had.) 1058// JSON_FILE_NAME = Name of .JSON file in the output directory, which 1059// contains prediction responses for the video time segment. 1060// STATUS = "OK" if prediction completed successfully, or an error code 1061// with message otherwise. If STATUS is not "OK" then the .JSON file 1062// for that line may not exist or be empty. 1063// 1064// Each .JSON file, assuming STATUS is "OK", will contain a list of 1065// AnnotationPayload protos in JSON format, which are the predictions 1066// for the video time segment the file is assigned to in the 1067// video_classification.csv. All AnnotationPayload protos will have 1068// video_classification field set, and will be sorted by 1069// video_classification.type field (note that the returned types are 1070// governed by `classifaction_types` parameter in 1071// [PredictService.BatchPredictRequest.params][]). 1072// 1073// * For Video Object Tracking: 1074// In the created directory a video_object_tracking.csv file will be 1075// created, and multiple files video_object_trackinng_1.json, 1076// video_object_trackinng_2.json,..., video_object_trackinng_N.json, 1077// where N is the number of requests in the input (i.e. the number of 1078// lines in given CSV(s)). 1079// 1080// The format of video_object_tracking.csv is: 1081// GCS_FILE_PATH,TIME_SEGMENT_START,TIME_SEGMENT_END,JSON_FILE_NAME,STATUS 1082// where: 1083// GCS_FILE_PATH,TIME_SEGMENT_START,TIME_SEGMENT_END = matches 1 to 1 1084// the prediction input lines (i.e. video_object_tracking.csv has 1085// precisely the same number of lines as the prediction input had.) 1086// JSON_FILE_NAME = Name of .JSON file in the output directory, which 1087// contains prediction responses for the video time segment. 1088// STATUS = "OK" if prediction completed successfully, or an error 1089// code with message otherwise. If STATUS is not "OK" then the .JSON 1090// file for that line may not exist or be empty. 1091// 1092// Each .JSON file, assuming STATUS is "OK", will contain a list of 1093// AnnotationPayload protos in JSON format, which are the predictions 1094// for each frame of the video time segment the file is assigned to in 1095// video_object_tracking.csv. All AnnotationPayload protos will have 1096// video_object_tracking field set. 1097// * For Text Classification: 1098// In the created directory files `text_classification_1.jsonl`, 1099// `text_classification_2.jsonl`,...,`text_classification_N.jsonl` 1100// will be created, where N may be 1, and depends on the 1101// total number of inputs and annotations found. 1102// 1103// Each .JSONL file will contain, per line, a JSON representation of a 1104// proto that wraps input text file (or document) in 1105// the text snippet (or document) proto and a list of 1106// zero or more AnnotationPayload protos (called annotations), which 1107// have classification detail populated. A single text file (or 1108// document) will be listed only once with all its annotations, and its 1109// annotations will never be split across files. 1110// 1111// If prediction for any input file (or document) failed (partially or 1112// completely), then additional `errors_1.jsonl`, `errors_2.jsonl`,..., 1113// `errors_N.jsonl` files will be created (N depends on total number of 1114// failed predictions). These files will have a JSON representation of a 1115// proto that wraps input file followed by exactly one 1116// [`google.rpc.Status`](https://github.com/googleapis/googleapis/blob/master/google/rpc/status.proto) 1117// containing only `code` and `message`. 1118// 1119// * For Text Sentiment: 1120// In the created directory files `text_sentiment_1.jsonl`, 1121// `text_sentiment_2.jsonl`,...,`text_sentiment_N.jsonl` 1122// will be created, where N may be 1, and depends on the 1123// total number of inputs and annotations found. 1124// 1125// Each .JSONL file will contain, per line, a JSON representation of a 1126// proto that wraps input text file (or document) in 1127// the text snippet (or document) proto and a list of 1128// zero or more AnnotationPayload protos (called annotations), which 1129// have text_sentiment detail populated. A single text file (or 1130// document) will be listed only once with all its annotations, and its 1131// annotations will never be split across files. 1132// 1133// If prediction for any input file (or document) failed (partially or 1134// completely), then additional `errors_1.jsonl`, `errors_2.jsonl`,..., 1135// `errors_N.jsonl` files will be created (N depends on total number of 1136// failed predictions). These files will have a JSON representation of a 1137// proto that wraps input file followed by exactly one 1138// [`google.rpc.Status`](https://github.com/googleapis/googleapis/blob/master/google/rpc/status.proto) 1139// containing only `code` and `message`. 1140// 1141// * For Text Extraction: 1142// In the created directory files `text_extraction_1.jsonl`, 1143// `text_extraction_2.jsonl`,...,`text_extraction_N.jsonl` 1144// will be created, where N may be 1, and depends on the 1145// total number of inputs and annotations found. 1146// The contents of these .JSONL file(s) depend on whether the input 1147// used inline text, or documents. 1148// If input was inline, then each .JSONL file will contain, per line, 1149// a JSON representation of a proto that wraps given in request text 1150// snippet's "id" (if specified), followed by input text snippet, 1151// and a list of zero or more 1152// AnnotationPayload protos (called annotations), which have 1153// text_extraction detail populated. A single text snippet will be 1154// listed only once with all its annotations, and its annotations will 1155// never be split across files. 1156// If input used documents, then each .JSONL file will contain, per 1157// line, a JSON representation of a proto that wraps given in request 1158// document proto, followed by its OCR-ed representation in the form 1159// of a text snippet, finally followed by a list of zero or more 1160// AnnotationPayload protos (called annotations), which have 1161// text_extraction detail populated and refer, via their indices, to 1162// the OCR-ed text snippet. A single document (and its text snippet) 1163// will be listed only once with all its annotations, and its 1164// annotations will never be split across files. 1165// If prediction for any text snippet failed (partially or completely), 1166// then additional `errors_1.jsonl`, `errors_2.jsonl`,..., 1167// `errors_N.jsonl` files will be created (N depends on total number of 1168// failed predictions). These files will have a JSON representation of a 1169// proto that wraps either the "id" : "<id_value>" (in case of inline) 1170// or the document proto (in case of document) but here followed by 1171// exactly one 1172// [`google.rpc.Status`](https://github.com/googleapis/googleapis/blob/master/google/rpc/status.proto) 1173// containing only `code` and `message`. 1174// 1175// * For Tables: 1176// Output depends on whether 1177// [gcs_destination][google.cloud.automl.v1p1beta.BatchPredictOutputConfig.gcs_destination] 1178// or 1179// [bigquery_destination][google.cloud.automl.v1p1beta.BatchPredictOutputConfig.bigquery_destination] 1180// is set (either is allowed). 1181// Google Cloud Storage case: 1182// In the created directory files `tables_1.csv`, `tables_2.csv`,..., 1183// `tables_N.csv` will be created, where N may be 1, and depends on 1184// the total number of the successfully predicted rows. 1185// For all CLASSIFICATION 1186// [prediction_type-s][google.cloud.automl.v1p1beta.TablesModelMetadata.prediction_type]: 1187// Each .csv file will contain a header, listing all columns' 1188// [display_name-s][google.cloud.automl.v1p1beta.ColumnSpec.display_name] 1189// given on input followed by M target column names in the format of 1190// "<[target_column_specs][google.cloud.automl.v1p1beta.TablesModelMetadata.target_column_spec] 1191// [display_name][google.cloud.automl.v1p1beta.ColumnSpec.display_name]>_<target 1192// value>_score" where M is the number of distinct target values, 1193// i.e. number of distinct values in the target column of the table 1194// used to train the model. Subsequent lines will contain the 1195// respective values of successfully predicted rows, with the last, 1196// i.e. the target, columns having the corresponding prediction 1197// [scores][google.cloud.automl.v1p1beta.TablesAnnotation.score]. 1198// For REGRESSION and FORECASTING 1199// [prediction_type-s][google.cloud.automl.v1p1beta.TablesModelMetadata.prediction_type]: 1200// Each .csv file will contain a header, listing all columns' 1201// [display_name-s][google.cloud.automl.v1p1beta.display_name] 1202// given on input followed by the predicted target column with name 1203// in the format of 1204// "predicted_<[target_column_specs][google.cloud.automl.v1p1beta.TablesModelMetadata.target_column_spec] 1205// [display_name][google.cloud.automl.v1p1beta.ColumnSpec.display_name]>" 1206// Subsequent lines will contain the respective values of 1207// successfully predicted rows, with the last, i.e. the target, 1208// column having the predicted target value. 1209// If prediction for any rows failed, then an additional 1210// `errors_1.csv`, `errors_2.csv`,..., `errors_N.csv` will be 1211// created (N depends on total number of failed rows). These files 1212// will have analogous format as `tables_*.csv`, but always with a 1213// single target column having 1214// [`google.rpc.Status`](https://github.com/googleapis/googleapis/blob/master/google/rpc/status.proto) 1215// represented as a JSON string, and containing only `code` and 1216// `message`. 1217// BigQuery case: 1218// [bigquery_destination][google.cloud.automl.v1p1beta.OutputConfig.bigquery_destination] 1219// pointing to a BigQuery project must be set. In the given project a 1220// new dataset will be created with name 1221// `prediction_<model-display-name>_<timestamp-of-prediction-call>` 1222// where <model-display-name> will be made 1223// BigQuery-dataset-name compatible (e.g. most special characters will 1224// become underscores), and timestamp will be in 1225// YYYY_MM_DDThh_mm_ss_sssZ "based on ISO-8601" format. In the dataset 1226// two tables will be created, `predictions`, and `errors`. 1227// The `predictions` table's column names will be the input columns' 1228// [display_name-s][google.cloud.automl.v1p1beta.ColumnSpec.display_name] 1229// followed by the target column with name in the format of 1230// "predicted_<[target_column_specs][google.cloud.automl.v1p1beta.TablesModelMetadata.target_column_spec] 1231// [display_name][google.cloud.automl.v1p1beta.ColumnSpec.display_name]>" 1232// The input feature columns will contain the respective values of 1233// successfully predicted rows, with the target column having an 1234// ARRAY of 1235// [AnnotationPayloads][google.cloud.automl.v1p1beta.AnnotationPayload], 1236// represented as STRUCT-s, containing 1237// [TablesAnnotation][google.cloud.automl.v1p1beta.TablesAnnotation]. 1238// The `errors` table contains rows for which the prediction has 1239// failed, it has analogous input columns while the target column name 1240// is in the format of 1241// "errors_<[target_column_specs][google.cloud.automl.v1p1beta.TablesModelMetadata.target_column_spec] 1242// [display_name][google.cloud.automl.v1p1beta.ColumnSpec.display_name]>", 1243// and as a value has 1244// [`google.rpc.Status`](https://github.com/googleapis/googleapis/blob/master/google/rpc/status.proto) 1245// represented as a STRUCT, and containing only `code` and `message`. 1246message BatchPredictOutputConfig { 1247 // The destination of the output. 1248 oneof destination { 1249 // Required. The Google Cloud Storage location of the directory where the output is to 1250 // be written to. 1251 GcsDestination gcs_destination = 1 [(google.api.field_behavior) = REQUIRED]; 1252 } 1253} 1254 1255// Output configuration for ModelExport Action. 1256message ModelExportOutputConfig { 1257 // The destination of the output. 1258 oneof destination { 1259 // Required. The Google Cloud Storage location where the model is to be written to. 1260 // This location may only be set for the following model formats: 1261 // "tflite", "edgetpu_tflite", "tf_saved_model", "tf_js", "core_ml". 1262 // 1263 // Under the directory given as the destination a new one with name 1264 // "model-export-<model-display-name>-<timestamp-of-export-call>", 1265 // where timestamp is in YYYY-MM-DDThh:mm:ss.sssZ ISO-8601 format, 1266 // will be created. Inside the model and any of its supporting files 1267 // will be written. 1268 GcsDestination gcs_destination = 1 [(google.api.field_behavior) = REQUIRED]; 1269 } 1270 1271 // The format in which the model must be exported. The available, and default, 1272 // formats depend on the problem and model type (if given problem and type 1273 // combination doesn't have a format listed, it means its models are not 1274 // exportable): 1275 // 1276 // * For Image Classification mobile-low-latency-1, mobile-versatile-1, 1277 // mobile-high-accuracy-1: 1278 // "tflite" (default), "edgetpu_tflite", "tf_saved_model", "tf_js", 1279 // "docker". 1280 // 1281 // * For Image Classification mobile-core-ml-low-latency-1, 1282 // mobile-core-ml-versatile-1, mobile-core-ml-high-accuracy-1: 1283 // "core_ml" (default). 1284 // 1285 // * For Image Object Detection mobile-low-latency-1, mobile-versatile-1, 1286 // mobile-high-accuracy-1: 1287 // "tflite", "tf_saved_model", "tf_js". 1288 // Formats description: 1289 // 1290 // * tflite - Used for Android mobile devices. 1291 // * edgetpu_tflite - Used for [Edge TPU](https://cloud.google.com/edge-tpu/) 1292 // devices. 1293 // * tf_saved_model - A tensorflow model in SavedModel format. 1294 // * tf_js - A [TensorFlow.js](https://www.tensorflow.org/js) model that can 1295 // be used in the browser and in Node.js using JavaScript. 1296 // * docker - Used for Docker containers. Use the params field to customize 1297 // the container. The container is verified to work correctly on 1298 // ubuntu 16.04 operating system. See more at 1299 // [containers 1300 // quickstart](https://cloud.google.com/vision/automl/docs/containers-gcs-quickstart) 1301 // * core_ml - Used for iOS mobile devices. 1302 string model_format = 4; 1303 1304 // Additional model-type and format specific parameters describing the 1305 // requirements for the to be exported model files, any string must be up to 1306 // 25000 characters long. 1307 // 1308 // * For `docker` format: 1309 // `cpu_architecture` - (string) "x86_64" (default). 1310 // `gpu_architecture` - (string) "none" (default), "nvidia". 1311 map<string, string> params = 2; 1312} 1313 1314// The Google Cloud Storage location for the input content. 1315message GcsSource { 1316 // Required. Google Cloud Storage URIs to input files, up to 2000 1317 // characters long. Accepted forms: 1318 // * Full object path, e.g. gs://bucket/directory/object.csv 1319 repeated string input_uris = 1 [(google.api.field_behavior) = REQUIRED]; 1320} 1321 1322// The Google Cloud Storage location where the output is to be written to. 1323message GcsDestination { 1324 // Required. Google Cloud Storage URI to output directory, up to 2000 1325 // characters long. 1326 // Accepted forms: 1327 // * Prefix path: gs://bucket/directory 1328 // The requesting user must have write permission to the bucket. 1329 // The directory is created if it doesn't exist. 1330 string output_uri_prefix = 1 [(google.api.field_behavior) = REQUIRED]; 1331} 1332