1// Copyright 2021 Google LLC
2//
3// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at
6//
7//     http://www.apache.org/licenses/LICENSE-2.0
8//
9// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS,
11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12// See the License for the specific language governing permissions and
13// limitations under the License.
14
15syntax = "proto3";
16
17package google.cloud.translation.v3beta1;
18
19import "google/api/annotations.proto";
20import "google/api/client.proto";
21import "google/api/field_behavior.proto";
22import "google/api/resource.proto";
23import "google/longrunning/operations.proto";
24import "google/protobuf/timestamp.proto";
25
26option cc_enable_arenas = true;
27option csharp_namespace = "Google.Cloud.Translate.V3Beta1";
28option go_package = "cloud.google.com/go/translation/apiv3beta1/translationpb;translationpb";
29option java_multiple_files = true;
30option java_outer_classname = "TranslationServiceProto";
31option java_package = "com.google.cloud.translate.v3beta1";
32option php_namespace = "Google\\Cloud\\Translate\\V3beta1";
33option ruby_package = "Google::Cloud::Translate::V3beta1";
34
35// Proto file for the Cloud Translation API (v3beta1).
36
37// Provides natural language translation operations.
38service TranslationService {
39  option (google.api.default_host) = "translate.googleapis.com";
40  option (google.api.oauth_scopes) =
41      "https://www.googleapis.com/auth/cloud-platform,"
42      "https://www.googleapis.com/auth/cloud-translation";
43
44  // Translates input text and returns translated text.
45  rpc TranslateText(TranslateTextRequest) returns (TranslateTextResponse) {
46    option (google.api.http) = {
47      post: "/v3beta1/{parent=projects/*/locations/*}:translateText"
48      body: "*"
49      additional_bindings {
50        post: "/v3beta1/{parent=projects/*}:translateText"
51        body: "*"
52      }
53    };
54  }
55
56  // Detects the language of text within a request.
57  rpc DetectLanguage(DetectLanguageRequest) returns (DetectLanguageResponse) {
58    option (google.api.http) = {
59      post: "/v3beta1/{parent=projects/*/locations/*}:detectLanguage"
60      body: "*"
61      additional_bindings {
62        post: "/v3beta1/{parent=projects/*}:detectLanguage"
63        body: "*"
64      }
65    };
66    option (google.api.method_signature) = "parent,model,mime_type";
67  }
68
69  // Returns a list of supported languages for translation.
70  rpc GetSupportedLanguages(GetSupportedLanguagesRequest)
71      returns (SupportedLanguages) {
72    option (google.api.http) = {
73      get: "/v3beta1/{parent=projects/*/locations/*}/supportedLanguages"
74      additional_bindings {
75        get: "/v3beta1/{parent=projects/*}/supportedLanguages"
76      }
77    };
78    option (google.api.method_signature) = "parent,display_language_code,model";
79  }
80
81  // Translates documents in synchronous mode.
82  rpc TranslateDocument(TranslateDocumentRequest)
83      returns (TranslateDocumentResponse) {
84    option (google.api.http) = {
85      post: "/v3beta1/{parent=projects/*/locations/*}:translateDocument"
86      body: "*"
87    };
88  }
89
90  // Translates a large volume of text in asynchronous batch mode.
91  // This function provides real-time output as the inputs are being processed.
92  // If caller cancels a request, the partial results (for an input file, it's
93  // all or nothing) may still be available on the specified output location.
94  //
95  // This call returns immediately and you can
96  // use google.longrunning.Operation.name to poll the status of the call.
97  rpc BatchTranslateText(BatchTranslateTextRequest)
98      returns (google.longrunning.Operation) {
99    option (google.api.http) = {
100      post: "/v3beta1/{parent=projects/*/locations/*}:batchTranslateText"
101      body: "*"
102    };
103    option (google.longrunning.operation_info) = {
104      response_type: "BatchTranslateResponse"
105      metadata_type: "BatchTranslateMetadata"
106    };
107  }
108
109  // Translates a large volume of documents in asynchronous batch mode.
110  // This function provides real-time output as the inputs are being processed.
111  // If caller cancels a request, the partial results (for an input file, it's
112  // all or nothing) may still be available on the specified output location.
113  //
114  // This call returns immediately and you can use
115  // google.longrunning.Operation.name to poll the status of the call.
116  rpc BatchTranslateDocument(BatchTranslateDocumentRequest)
117      returns (google.longrunning.Operation) {
118    option (google.api.http) = {
119      post: "/v3beta1/{parent=projects/*/locations/*}:batchTranslateDocument"
120      body: "*"
121    };
122    option (google.api.method_signature) =
123        "parent,source_language_code,target_language_codes,input_configs,output_config";
124    option (google.longrunning.operation_info) = {
125      response_type: "BatchTranslateDocumentResponse"
126      metadata_type: "BatchTranslateDocumentMetadata"
127    };
128  }
129
130  // Creates a glossary and returns the long-running operation. Returns
131  // NOT_FOUND, if the project doesn't exist.
132  rpc CreateGlossary(CreateGlossaryRequest)
133      returns (google.longrunning.Operation) {
134    option (google.api.http) = {
135      post: "/v3beta1/{parent=projects/*/locations/*}/glossaries"
136      body: "glossary"
137    };
138    option (google.api.method_signature) = "parent,glossary";
139    option (google.longrunning.operation_info) = {
140      response_type: "Glossary"
141      metadata_type: "CreateGlossaryMetadata"
142    };
143  }
144
145  // Lists glossaries in a project. Returns NOT_FOUND, if the project doesn't
146  // exist.
147  rpc ListGlossaries(ListGlossariesRequest) returns (ListGlossariesResponse) {
148    option (google.api.http) = {
149      get: "/v3beta1/{parent=projects/*/locations/*}/glossaries"
150    };
151    option (google.api.method_signature) = "parent,filter";
152  }
153
154  // Gets a glossary. Returns NOT_FOUND, if the glossary doesn't
155  // exist.
156  rpc GetGlossary(GetGlossaryRequest) returns (Glossary) {
157    option (google.api.http) = {
158      get: "/v3beta1/{name=projects/*/locations/*/glossaries/*}"
159    };
160    option (google.api.method_signature) = "name";
161  }
162
163  // Deletes a glossary, or cancels glossary construction
164  // if the glossary isn't created yet.
165  // Returns NOT_FOUND, if the glossary doesn't exist.
166  rpc DeleteGlossary(DeleteGlossaryRequest)
167      returns (google.longrunning.Operation) {
168    option (google.api.http) = {
169      delete: "/v3beta1/{name=projects/*/locations/*/glossaries/*}"
170    };
171    option (google.api.method_signature) = "name";
172    option (google.longrunning.operation_info) = {
173      response_type: "DeleteGlossaryResponse"
174      metadata_type: "DeleteGlossaryMetadata"
175    };
176  }
177}
178
179// Configures which glossary should be used for a specific target language,
180// and defines options for applying that glossary.
181message TranslateTextGlossaryConfig {
182  // Required. Specifies the glossary used for this translation. Use
183  // this format: projects/*/locations/*/glossaries/*
184  string glossary = 1 [(google.api.field_behavior) = REQUIRED];
185
186  // Optional. Indicates match is case-insensitive.
187  // Default value is false if missing.
188  bool ignore_case = 2 [(google.api.field_behavior) = OPTIONAL];
189}
190
191// The request message for synchronous translation.
192message TranslateTextRequest {
193  // Required. The content of the input in string format.
194  // We recommend the total content be less than 30k codepoints. The max length
195  // of this field is 1024.
196  // Use BatchTranslateText for larger text.
197  repeated string contents = 1 [(google.api.field_behavior) = REQUIRED];
198
199  // Optional. The format of the source text, for example, "text/html",
200  //  "text/plain". If left blank, the MIME type defaults to "text/html".
201  string mime_type = 3 [(google.api.field_behavior) = OPTIONAL];
202
203  // Optional. The BCP-47 language code of the input text if
204  // known, for example, "en-US" or "sr-Latn". Supported language codes are
205  // listed in Language Support. If the source language isn't specified, the API
206  // attempts to identify the source language automatically and returns the
207  // source language within the response.
208  string source_language_code = 4 [(google.api.field_behavior) = OPTIONAL];
209
210  // Required. The BCP-47 language code to use for translation of the input
211  // text, set to one of the language codes listed in Language Support.
212  string target_language_code = 5 [(google.api.field_behavior) = REQUIRED];
213
214  // Required. Project or location to make a call. Must refer to a caller's
215  // project.
216  //
217  // Format: `projects/{project-number-or-id}` or
218  // `projects/{project-number-or-id}/locations/{location-id}`.
219  //
220  // For global calls, use `projects/{project-number-or-id}/locations/global` or
221  // `projects/{project-number-or-id}`.
222  //
223  // Non-global location is required for requests using AutoML models or
224  // custom glossaries.
225  //
226  // Models and glossaries must be within the same region (have same
227  // location-id), otherwise an INVALID_ARGUMENT (400) error is returned.
228  string parent = 8 [
229    (google.api.field_behavior) = REQUIRED,
230    (google.api.resource_reference) = {
231      type: "locations.googleapis.com/Location"
232    }
233  ];
234
235  // Optional. The `model` type requested for this translation.
236  //
237  // The format depends on model type:
238  //
239  // - AutoML Translation models:
240  //   `projects/{project-number-or-id}/locations/{location-id}/models/{model-id}`
241  //
242  // - General (built-in) models:
243  //   `projects/{project-number-or-id}/locations/{location-id}/models/general/nmt`,
244  //
245  //
246  // For global (non-regionalized) requests, use `location-id` `global`.
247  // For example,
248  // `projects/{project-number-or-id}/locations/global/models/general/nmt`.
249  //
250  // If not provided, the default Google model (NMT) will be used
251  string model = 6 [(google.api.field_behavior) = OPTIONAL];
252
253  // Optional. Glossary to be applied. The glossary must be
254  // within the same region (have the same location-id) as the model, otherwise
255  // an INVALID_ARGUMENT (400) error is returned.
256  TranslateTextGlossaryConfig glossary_config = 7
257      [(google.api.field_behavior) = OPTIONAL];
258
259  // Optional. The labels with user-defined metadata for the request.
260  //
261  // Label keys and values can be no longer than 63 characters
262  // (Unicode codepoints), can only contain lowercase letters, numeric
263  // characters, underscores and dashes. International characters are allowed.
264  // Label values are optional. Label keys must start with a letter.
265  //
266  // See https://cloud.google.com/translate/docs/labels for more information.
267  map<string, string> labels = 10 [(google.api.field_behavior) = OPTIONAL];
268}
269
270message TranslateTextResponse {
271  // Text translation responses with no glossary applied.
272  // This field has the same length as
273  // [`contents`][google.cloud.translation.v3beta1.TranslateTextRequest.contents].
274  repeated Translation translations = 1;
275
276  // Text translation responses if a glossary is provided in the request.
277  // This can be the same as
278  // [`translations`][google.cloud.translation.v3beta1.TranslateTextResponse.translations]
279  // if no terms apply. This field has the same length as
280  // [`contents`][google.cloud.translation.v3beta1.TranslateTextRequest.contents].
281  repeated Translation glossary_translations = 3;
282}
283
284// A single translation response.
285message Translation {
286  // Text translated into the target language.
287  // If an error occurs during translation, this field might be excluded from
288  // the response.
289  string translated_text = 1;
290
291  // Only present when `model` is present in the request.
292  // `model` here is normalized to have project number.
293  //
294  // For example:
295  // If the `model` requested in TranslationTextRequest is
296  // `projects/{project-id}/locations/{location-id}/models/general/nmt` then
297  // `model` here would be normalized to
298  // `projects/{project-number}/locations/{location-id}/models/general/nmt`.
299  string model = 2;
300
301  // The BCP-47 language code of source text in the initial request, detected
302  // automatically, if no source language was passed within the initial
303  // request. If the source language was passed, auto-detection of the language
304  // does not occur and this field is empty.
305  string detected_language_code = 4;
306
307  // The `glossary_config` used for this translation.
308  TranslateTextGlossaryConfig glossary_config = 3;
309}
310
311// The request message for language detection.
312message DetectLanguageRequest {
313  // Required. Project or location to make a call. Must refer to a caller's
314  // project.
315  //
316  // Format: `projects/{project-number-or-id}/locations/{location-id}` or
317  // `projects/{project-number-or-id}`.
318  //
319  // For global calls, use `projects/{project-number-or-id}/locations/global` or
320  // `projects/{project-number-or-id}`.
321  //
322  // Only models within the same region (has same location-id) can be used.
323  // Otherwise an INVALID_ARGUMENT (400) error is returned.
324  string parent = 5 [
325    (google.api.field_behavior) = REQUIRED,
326    (google.api.resource_reference) = {
327      type: "locations.googleapis.com/Location"
328    }
329  ];
330
331  // Optional. The language detection model to be used.
332  //
333  // Format:
334  // `projects/{project-number-or-id}/locations/{location-id}/models/language-detection/{model-id}`
335  //
336  // Only one language detection model is currently supported:
337  // `projects/{project-number-or-id}/locations/{location-id}/models/language-detection/default`.
338  //
339  // If not specified, the default model is used.
340  string model = 4 [(google.api.field_behavior) = OPTIONAL];
341
342  // Required. The source of the document from which to detect the language.
343  oneof source {
344    // The content of the input stored as a string.
345    string content = 1;
346  }
347
348  // Optional. The format of the source text, for example, "text/html",
349  // "text/plain". If left blank, the MIME type defaults to "text/html".
350  string mime_type = 3 [(google.api.field_behavior) = OPTIONAL];
351
352  // Optional. The labels with user-defined metadata for the request.
353  //
354  // Label keys and values can be no longer than 63 characters
355  // (Unicode codepoints), can only contain lowercase letters, numeric
356  // characters, underscores and dashes. International characters are allowed.
357  // Label values are optional. Label keys must start with a letter.
358  //
359  // See https://cloud.google.com/translate/docs/labels for more information.
360  map<string, string> labels = 6 [(google.api.field_behavior) = OPTIONAL];
361}
362
363// The response message for language detection.
364message DetectedLanguage {
365  // The BCP-47 language code of source content in the request, detected
366  // automatically.
367  string language_code = 1;
368
369  // The confidence of the detection result for this language.
370  float confidence = 2;
371}
372
373// The response message for language detection.
374message DetectLanguageResponse {
375  // A list of detected languages sorted by detection confidence in descending
376  // order. The most probable language first.
377  repeated DetectedLanguage languages = 1;
378}
379
380// The request message for discovering supported languages.
381message GetSupportedLanguagesRequest {
382  // Required. Project or location to make a call. Must refer to a caller's
383  // project.
384  //
385  // Format: `projects/{project-number-or-id}` or
386  // `projects/{project-number-or-id}/locations/{location-id}`.
387  //
388  // For global calls, use `projects/{project-number-or-id}/locations/global` or
389  // `projects/{project-number-or-id}`.
390  //
391  // Non-global location is required for AutoML models.
392  //
393  // Only models within the same region (have same location-id) can be used,
394  // otherwise an INVALID_ARGUMENT (400) error is returned.
395  string parent = 3 [
396    (google.api.field_behavior) = REQUIRED,
397    (google.api.resource_reference) = {
398      type: "locations.googleapis.com/Location"
399    }
400  ];
401
402  // Optional. The language to use to return localized, human readable names
403  // of supported languages. If missing, then display names are not returned
404  // in a response.
405  string display_language_code = 1 [(google.api.field_behavior) = OPTIONAL];
406
407  // Optional. Get supported languages of this model.
408  //
409  // The format depends on model type:
410  //
411  // - AutoML Translation models:
412  //   `projects/{project-number-or-id}/locations/{location-id}/models/{model-id}`
413  //
414  // - General (built-in) models:
415  //   `projects/{project-number-or-id}/locations/{location-id}/models/general/nmt`,
416  //
417  //
418  // Returns languages supported by the specified model.
419  // If missing, we get supported languages of Google general NMT model.
420  string model = 2 [(google.api.field_behavior) = OPTIONAL];
421}
422
423// The response message for discovering supported languages.
424message SupportedLanguages {
425  // A list of supported language responses. This list contains an entry
426  // for each language the Translation API supports.
427  repeated SupportedLanguage languages = 1;
428}
429
430// A single supported language response corresponds to information related
431// to one supported language.
432message SupportedLanguage {
433  // Supported language code, generally consisting of its ISO 639-1
434  // identifier, for example, 'en', 'ja'. In certain cases, BCP-47 codes
435  // including language and region identifiers are returned (for example,
436  // 'zh-TW' and 'zh-CN')
437  string language_code = 1;
438
439  // Human readable name of the language localized in the display language
440  // specified in the request.
441  string display_name = 2;
442
443  // Can be used as source language.
444  bool support_source = 3;
445
446  // Can be used as target language.
447  bool support_target = 4;
448}
449
450// The Google Cloud Storage location for the input content.
451message GcsSource {
452  // Required. Source data URI. For example, `gs://my_bucket/my_object`.
453  string input_uri = 1 [(google.api.field_behavior) = REQUIRED];
454}
455
456// Input configuration for BatchTranslateText request.
457message InputConfig {
458  // Optional. Can be "text/plain" or "text/html".
459  // For `.tsv`, "text/html" is used if mime_type is missing.
460  // For `.html`, this field must be "text/html" or empty.
461  // For `.txt`, this field must be "text/plain" or empty.
462  string mime_type = 1 [(google.api.field_behavior) = OPTIONAL];
463
464  // Required. Specify the input.
465  oneof source {
466    // Required. Google Cloud Storage location for the source input.
467    // This can be a single file (for example,
468    // `gs://translation-test/input.tsv`) or a wildcard (for example,
469    // `gs://translation-test/*`). If a file extension is `.tsv`, it can
470    // contain either one or two columns. The first column (optional) is the id
471    // of the text request. If the first column is missing, we use the row
472    // number (0-based) from the input file as the ID in the output file. The
473    // second column is the actual text to be
474    //  translated. We recommend each row be <= 10K Unicode codepoints,
475    // otherwise an error might be returned.
476    // Note that the input tsv must be RFC 4180 compliant.
477    //
478    // You could use https://github.com/Clever/csvlint to check potential
479    // formatting errors in your tsv file.
480    // csvlint --delimiter='\t' your_input_file.tsv
481    //
482    // The other supported file extensions are `.txt` or `.html`, which is
483    // treated as a single large chunk of text.
484    GcsSource gcs_source = 2;
485  }
486}
487
488// The Google Cloud Storage location for the output content.
489message GcsDestination {
490  // Required. There must be no files under 'output_uri_prefix'.
491  // 'output_uri_prefix' must end with "/" and start with "gs://", otherwise an
492  // INVALID_ARGUMENT (400) error is returned.
493  string output_uri_prefix = 1 [(google.api.field_behavior) = REQUIRED];
494}
495
496// Output configuration for BatchTranslateText request.
497message OutputConfig {
498  // Required. The destination of output.
499  oneof destination {
500    // Google Cloud Storage destination for output content.
501    // For every single input file (for example, gs://a/b/c.[extension]), we
502    // generate at most 2 * n output files. (n is the # of target_language_codes
503    // in the BatchTranslateTextRequest).
504    //
505    // Output files (tsv) generated are compliant with RFC 4180 except that
506    // record delimiters are '\n' instead of '\r\n'. We don't provide any way to
507    // change record delimiters.
508    //
509    // While the input files are being processed, we write/update an index file
510    // 'index.csv'  under 'output_uri_prefix' (for example,
511    // gs://translation-test/index.csv) The index file is generated/updated as
512    // new files are being translated. The format is:
513    //
514    // input_file,target_language_code,translations_file,errors_file,
515    // glossary_translations_file,glossary_errors_file
516    //
517    // input_file is one file we matched using gcs_source.input_uri.
518    // target_language_code is provided in the request.
519    // translations_file contains the translations. (details provided below)
520    // errors_file contains the errors during processing of the file. (details
521    // below). Both translations_file and errors_file could be empty
522    // strings if we have no content to output.
523    // glossary_translations_file and glossary_errors_file are always empty
524    // strings if the input_file is tsv. They could also be empty if we have no
525    // content to output.
526    //
527    // Once a row is present in index.csv, the input/output matching never
528    // changes. Callers should also expect all the content in input_file are
529    // processed and ready to be consumed (that is, no partial output file is
530    // written).
531    //
532    // Since index.csv will be keeping updated during the process, please make
533    // sure there is no custom retention policy applied on the output bucket
534    // that may avoid file updating.
535    // (https://cloud.google.com/storage/docs/bucket-lock?hl=en#retention-policy)
536    //
537    // The format of translations_file (for target language code 'trg') is:
538    // `gs://translation_test/a_b_c_'trg'_translations.[extension]`
539    //
540    // If the input file extension is tsv, the output has the following
541    // columns:
542    // Column 1: ID of the request provided in the input, if it's not
543    // provided in the input, then the input row number is used (0-based).
544    // Column 2: source sentence.
545    // Column 3: translation without applying a glossary. Empty string if there
546    // is an error.
547    // Column 4 (only present if a glossary is provided in the request):
548    // translation after applying the glossary. Empty string if there is an
549    // error applying the glossary. Could be same string as column 3 if there is
550    // no glossary applied.
551    //
552    // If input file extension is a txt or html, the translation is directly
553    // written to the output file. If glossary is requested, a separate
554    // glossary_translations_file has format of
555    // gs://translation_test/a_b_c_'trg'_glossary_translations.[extension]
556    //
557    // The format of errors file (for target language code 'trg') is:
558    // gs://translation_test/a_b_c_'trg'_errors.[extension]
559    //
560    // If the input file extension is tsv, errors_file contains the following:
561    // Column 1: ID of the request provided in the input, if it's not
562    // provided in the input, then the input row number is used (0-based).
563    // Column 2: source sentence.
564    // Column 3: Error detail for the translation. Could be empty.
565    // Column 4 (only present if a glossary is provided in the request):
566    // Error when applying the glossary.
567    //
568    // If the input file extension is txt or html, glossary_error_file will be
569    // generated that contains error details. glossary_error_file has format of
570    // gs://translation_test/a_b_c_'trg'_glossary_errors.[extension]
571    GcsDestination gcs_destination = 1;
572  }
573}
574
575// A document translation request input config.
576message DocumentInputConfig {
577  // Specifies the source for the document's content.
578  // The input file size should be <= 20MB for
579  // - application/vnd.openxmlformats-officedocument.wordprocessingml.document
580  // - application/vnd.openxmlformats-officedocument.presentationml.presentation
581  // - application/vnd.openxmlformats-officedocument.spreadsheetml.sheet
582  // The input file size should be <= 20MB and the maximum page limit is 20 for
583  // - application/pdf
584  oneof source {
585    // Document's content represented as a stream of bytes.
586    bytes content = 1;
587
588    // Google Cloud Storage location. This must be a single file.
589    // For example: gs://example_bucket/example_file.pdf
590    GcsSource gcs_source = 2;
591  }
592
593  // Specifies the input document's mime_type.
594  //
595  // If not specified it will be determined using the file extension for
596  // gcs_source provided files. For a file provided through bytes content the
597  // mime_type must be provided.
598  // Currently supported mime types are:
599  // - application/pdf
600  // - application/vnd.openxmlformats-officedocument.wordprocessingml.document
601  // - application/vnd.openxmlformats-officedocument.presentationml.presentation
602  // - application/vnd.openxmlformats-officedocument.spreadsheetml.sheet
603  string mime_type = 4;
604}
605
606// A document translation request output config.
607message DocumentOutputConfig {
608  // A URI destination for the translated document.
609  // It is optional to provide a destination. If provided the results from
610  // TranslateDocument will be stored in the destination.
611  // Whether a destination is provided or not, the translated documents will be
612  // returned within TranslateDocumentResponse.document_translation and
613  // TranslateDocumentResponse.glossary_document_translation.
614  oneof destination {
615    // Optional. Google Cloud Storage destination for the translation output,
616    // e.g., `gs://my_bucket/my_directory/`.
617    //
618    // The destination directory provided does not have to be empty, but the
619    // bucket must exist. If a file with the same name as the output file
620    // already exists in the destination an error will be returned.
621    //
622    // For a DocumentInputConfig.contents provided document, the output file
623    // will have the name "output_[trg]_translations.[ext]", where
624    // - [trg] corresponds to the translated file's language code,
625    // - [ext] corresponds to the translated file's extension according to its
626    // mime type.
627    //
628    //
629    // For a DocumentInputConfig.gcs_uri provided document, the output file will
630    // have a name according to its URI. For example: an input file with URI:
631    // "gs://a/b/c.[extension]" stored in a gcs_destination bucket with name
632    // "my_bucket" will have an output URI:
633    // "gs://my_bucket/a_b_c_[trg]_translations.[ext]", where
634    // - [trg] corresponds to the translated file's language code,
635    // - [ext] corresponds to the translated file's extension according to its
636    // mime type.
637    //
638    //
639    // If the document was directly provided through the request, then the
640    // output document will have the format:
641    // "gs://my_bucket/translated_document_[trg]_translations.[ext], where
642    // - [trg] corresponds to the translated file's language code,
643    // - [ext] corresponds to the translated file's extension according to its
644    // mime type.
645    //
646    // If a glossary was provided, then the output URI for the glossary
647    // translation will be equal to the default output URI but have
648    // `glossary_translations` instead of `translations`. For the previous
649    // example, its glossary URI would be:
650    // "gs://my_bucket/a_b_c_[trg]_glossary_translations.[ext]".
651    //
652    // Thus the max number of output files will be 2 (Translated document,
653    // Glossary translated document).
654    //
655    // Callers should expect no partial outputs. If there is any error during
656    // document translation, no output will be stored in the Cloud Storage
657    // bucket.
658    GcsDestination gcs_destination = 1 [(google.api.field_behavior) = OPTIONAL];
659  }
660
661  // Optional. Specifies the translated document's mime_type.
662  // If not specified, the translated file's mime type will be the same as the
663  // input file's mime type.
664  // Currently only support the output mime type to be the same as input mime
665  // type.
666  // - application/pdf
667  // - application/vnd.openxmlformats-officedocument.wordprocessingml.document
668  // - application/vnd.openxmlformats-officedocument.presentationml.presentation
669  // - application/vnd.openxmlformats-officedocument.spreadsheetml.sheet
670  string mime_type = 3 [(google.api.field_behavior) = OPTIONAL];
671}
672
673// A document translation request.
674message TranslateDocumentRequest {
675  // Required. Location to make a regional call.
676  //
677  // Format: `projects/{project-number-or-id}/locations/{location-id}`.
678  //
679  // For global calls, use `projects/{project-number-or-id}/locations/global`.
680  //
681  // Non-global location is required for requests using AutoML models or custom
682  // glossaries.
683  //
684  // Models and glossaries must be within the same region (have the same
685  // location-id), otherwise an INVALID_ARGUMENT (400) error is returned.
686  string parent = 1 [(google.api.field_behavior) = REQUIRED];
687
688  // Optional. The BCP-47 language code of the input document if known, for
689  // example, "en-US" or "sr-Latn". Supported language codes are listed in
690  // Language Support. If the source language isn't specified, the API attempts
691  // to identify the source language automatically and returns the source
692  // language within the response. Source language must be specified if the
693  // request contains a glossary or a custom model.
694  string source_language_code = 2 [(google.api.field_behavior) = OPTIONAL];
695
696  // Required. The BCP-47 language code to use for translation of the input
697  // document, set to one of the language codes listed in Language Support.
698  string target_language_code = 3 [(google.api.field_behavior) = REQUIRED];
699
700  // Required. Input configurations.
701  DocumentInputConfig document_input_config = 4
702      [(google.api.field_behavior) = REQUIRED];
703
704  // Optional. Output configurations.
705  // Defines if the output file should be stored within Cloud Storage as well
706  // as the desired output format. If not provided the translated file will
707  // only be returned through a byte-stream and its output mime type will be
708  // the same as the input file's mime type.
709  DocumentOutputConfig document_output_config = 5
710      [(google.api.field_behavior) = OPTIONAL];
711
712  // Optional. The `model` type requested for this translation.
713  //
714  // The format depends on model type:
715  //
716  // - AutoML Translation models:
717  //   `projects/{project-number-or-id}/locations/{location-id}/models/{model-id}`
718  //
719  // - General (built-in) models:
720  //   `projects/{project-number-or-id}/locations/{location-id}/models/general/nmt`,
721  //
722  //
723  // If not provided, the default Google model (NMT) will be used for
724  // translation.
725  string model = 6 [(google.api.field_behavior) = OPTIONAL];
726
727  // Optional. Glossary to be applied. The glossary must be within the same
728  // region (have the same location-id) as the model, otherwise an
729  // INVALID_ARGUMENT (400) error is returned.
730  TranslateTextGlossaryConfig glossary_config = 7
731      [(google.api.field_behavior) = OPTIONAL];
732
733  // Optional. The labels with user-defined metadata for the request.
734  //
735  // Label keys and values can be no longer than 63 characters (Unicode
736  // codepoints), can only contain lowercase letters, numeric characters,
737  // underscores and dashes. International characters are allowed. Label values
738  // are optional. Label keys must start with a letter.
739  //
740  // See https://cloud.google.com/translate/docs/advanced/labels for more
741  // information.
742  map<string, string> labels = 8 [(google.api.field_behavior) = OPTIONAL];
743}
744
745// A translated document message.
746message DocumentTranslation {
747  // The array of translated documents. It is expected to be size 1 for now. We
748  // may produce multiple translated documents in the future for other type of
749  // file formats.
750  repeated bytes byte_stream_outputs = 1;
751
752  // The translated document's mime type.
753  string mime_type = 2;
754
755  // The detected language for the input document.
756  // If the user did not provide the source language for the input document,
757  // this field will have the language code automatically detected. If the
758  // source language was passed, auto-detection of the language does not occur
759  // and this field is empty.
760  string detected_language_code = 3;
761}
762
763// A translated document response message.
764message TranslateDocumentResponse {
765  // Translated document.
766  DocumentTranslation document_translation = 1;
767
768  // The document's translation output if a glossary is provided in the request.
769  // This can be the same as [TranslateDocumentResponse.document_translation]
770  // if no glossary terms apply.
771  DocumentTranslation glossary_document_translation = 2;
772
773  // Only present when 'model' is present in the request.
774  // 'model' is normalized to have a project number.
775  //
776  // For example:
777  // If the 'model' field in TranslateDocumentRequest is:
778  // `projects/{project-id}/locations/{location-id}/models/general/nmt` then
779  // `model` here would be normalized to
780  // `projects/{project-number}/locations/{location-id}/models/general/nmt`.
781  string model = 3;
782
783  // The `glossary_config` used for this translation.
784  TranslateTextGlossaryConfig glossary_config = 4;
785}
786
787// The batch translation request.
788message BatchTranslateTextRequest {
789  // Required. Location to make a call. Must refer to a caller's project.
790  //
791  // Format: `projects/{project-number-or-id}/locations/{location-id}`.
792  //
793  // The `global` location is not supported for batch translation.
794  //
795  // Only AutoML Translation models or glossaries within the same region (have
796  // the same location-id) can be used, otherwise an INVALID_ARGUMENT (400)
797  // error is returned.
798  string parent = 1 [
799    (google.api.field_behavior) = REQUIRED,
800    (google.api.resource_reference) = {
801      type: "locations.googleapis.com/Location"
802    }
803  ];
804
805  // Required. Source language code.
806  string source_language_code = 2 [(google.api.field_behavior) = REQUIRED];
807
808  // Required. Specify up to 10 language codes here.
809  repeated string target_language_codes = 3
810      [(google.api.field_behavior) = REQUIRED];
811
812  // Optional. The models to use for translation. Map's key is target language
813  // code. Map's value is model name. Value can be a built-in general model,
814  // or an AutoML Translation model.
815  //
816  // The value format depends on model type:
817  //
818  // - AutoML Translation models:
819  //   `projects/{project-number-or-id}/locations/{location-id}/models/{model-id}`
820  //
821  // - General (built-in) models:
822  //   `projects/{project-number-or-id}/locations/{location-id}/models/general/nmt`,
823  //
824  //
825  // If the map is empty or a specific model is
826  // not requested for a language pair, then default google model (nmt) is used.
827  map<string, string> models = 4 [(google.api.field_behavior) = OPTIONAL];
828
829  // Required. Input configurations.
830  // The total number of files matched should be <= 100.
831  // The total content size should be <= 100M Unicode codepoints.
832  // The files must use UTF-8 encoding.
833  repeated InputConfig input_configs = 5
834      [(google.api.field_behavior) = REQUIRED];
835
836  // Required. Output configuration.
837  // If 2 input configs match to the same file (that is, same input path),
838  // we don't generate output for duplicate inputs.
839  OutputConfig output_config = 6 [(google.api.field_behavior) = REQUIRED];
840
841  // Optional. Glossaries to be applied for translation.
842  // It's keyed by target language code.
843  map<string, TranslateTextGlossaryConfig> glossaries = 7
844      [(google.api.field_behavior) = OPTIONAL];
845
846  // Optional. The labels with user-defined metadata for the request.
847  //
848  // Label keys and values can be no longer than 63 characters
849  // (Unicode codepoints), can only contain lowercase letters, numeric
850  // characters, underscores and dashes. International characters are allowed.
851  // Label values are optional. Label keys must start with a letter.
852  //
853  // See https://cloud.google.com/translate/docs/labels for more information.
854  map<string, string> labels = 9 [(google.api.field_behavior) = OPTIONAL];
855}
856
857// State metadata for the batch translation operation.
858message BatchTranslateMetadata {
859  // State of the job.
860  enum State {
861    // Invalid.
862    STATE_UNSPECIFIED = 0;
863
864    // Request is being processed.
865    RUNNING = 1;
866
867    // The batch is processed, and at least one item was successfully
868    // processed.
869    SUCCEEDED = 2;
870
871    // The batch is done and no item was successfully processed.
872    FAILED = 3;
873
874    // Request is in the process of being canceled after caller invoked
875    // longrunning.Operations.CancelOperation on the request id.
876    CANCELLING = 4;
877
878    // The batch is done after the user has called the
879    // longrunning.Operations.CancelOperation. Any records processed before the
880    // cancel command are output as specified in the request.
881    CANCELLED = 5;
882  }
883
884  // The state of the operation.
885  State state = 1;
886
887  // Number of successfully translated characters so far (Unicode codepoints).
888  int64 translated_characters = 2;
889
890  // Number of characters that have failed to process so far (Unicode
891  // codepoints).
892  int64 failed_characters = 3;
893
894  // Total number of characters (Unicode codepoints).
895  // This is the total number of codepoints from input files times the number of
896  // target languages and appears here shortly after the call is submitted.
897  int64 total_characters = 4;
898
899  // Time when the operation was submitted.
900  google.protobuf.Timestamp submit_time = 5;
901}
902
903// Stored in the
904// [google.longrunning.Operation.response][google.longrunning.Operation.response]
905// field returned by BatchTranslateText if at least one sentence is translated
906// successfully.
907message BatchTranslateResponse {
908  // Total number of characters (Unicode codepoints).
909  int64 total_characters = 1;
910
911  // Number of successfully translated characters (Unicode codepoints).
912  int64 translated_characters = 2;
913
914  // Number of characters that have failed to process (Unicode codepoints).
915  int64 failed_characters = 3;
916
917  // Time when the operation was submitted.
918  google.protobuf.Timestamp submit_time = 4;
919
920  // The time when the operation is finished and
921  // [google.longrunning.Operation.done][google.longrunning.Operation.done] is
922  // set to true.
923  google.protobuf.Timestamp end_time = 5;
924}
925
926// Input configuration for glossaries.
927message GlossaryInputConfig {
928  // Required. Specify the input.
929  oneof source {
930    // Required. Google Cloud Storage location of glossary data.
931    // File format is determined based on the filename extension. API returns
932    // [google.rpc.Code.INVALID_ARGUMENT] for unsupported URI-s and file
933    // formats. Wildcards are not allowed. This must be a single file in one of
934    // the following formats:
935    //
936    // For unidirectional glossaries:
937    //
938    // - TSV/CSV (`.tsv`/`.csv`): 2 column file, tab- or comma-separated.
939    //   The first column is source text. The second column is target text.
940    //   The file must not contain headers. That is, the first row is data, not
941    //   column names.
942    //
943    // - TMX (`.tmx`): TMX file with parallel data defining source/target term
944    // pairs.
945    //
946    // For equivalent term sets glossaries:
947    //
948    // - CSV (`.csv`): Multi-column CSV file defining equivalent glossary terms
949    //   in multiple languages. The format is defined for Google Translation
950    //   Toolkit and documented in [Use a
951    //   glossary](https://support.google.com/translatortoolkit/answer/6306379?hl=en).
952    GcsSource gcs_source = 1;
953  }
954}
955
956// Represents a glossary built from user provided data.
957message Glossary {
958  option (google.api.resource) = {
959    type: "translate.googleapis.com/Glossary"
960    pattern: "projects/{project}/locations/{location}/glossaries/{glossary}"
961  };
962
963  // Used with unidirectional glossaries.
964  message LanguageCodePair {
965    // Required. The BCP-47 language code of the input text, for example,
966    // "en-US". Expected to be an exact match for GlossaryTerm.language_code.
967    string source_language_code = 1;
968
969    // Required. The BCP-47 language code for translation output, for example,
970    // "zh-CN". Expected to be an exact match for GlossaryTerm.language_code.
971    string target_language_code = 2;
972  }
973
974  // Used with equivalent term set glossaries.
975  message LanguageCodesSet {
976    // The BCP-47 language code(s) for terms defined in the glossary.
977    // All entries are unique. The list contains at least two entries.
978    // Expected to be an exact match for GlossaryTerm.language_code.
979    repeated string language_codes = 1;
980  }
981
982  // Required. The resource name of the glossary. Glossary names have the form
983  // `projects/{project-number-or-id}/locations/{location-id}/glossaries/{glossary-id}`.
984  string name = 1 [(google.api.field_behavior) = REQUIRED];
985
986  // Languages supported by the glossary.
987  oneof languages {
988    // Used with unidirectional glossaries.
989    LanguageCodePair language_pair = 3;
990
991    // Used with equivalent term set glossaries.
992    LanguageCodesSet language_codes_set = 4;
993  }
994
995  // Required. Provides examples to build the glossary from.
996  // Total glossary must not exceed 10M Unicode codepoints.
997  GlossaryInputConfig input_config = 5;
998
999  // Output only. The number of entries defined in the glossary.
1000  int32 entry_count = 6 [(google.api.field_behavior) = OUTPUT_ONLY];
1001
1002  // Output only. When CreateGlossary was called.
1003  google.protobuf.Timestamp submit_time = 7
1004      [(google.api.field_behavior) = OUTPUT_ONLY];
1005
1006  // Output only. When the glossary creation was finished.
1007  google.protobuf.Timestamp end_time = 8
1008      [(google.api.field_behavior) = OUTPUT_ONLY];
1009}
1010
1011// Request message for CreateGlossary.
1012message CreateGlossaryRequest {
1013  // Required. The project name.
1014  string parent = 1 [
1015    (google.api.field_behavior) = REQUIRED,
1016    (google.api.resource_reference) = {
1017      type: "locations.googleapis.com/Location"
1018    }
1019  ];
1020
1021  // Required. The glossary to create.
1022  Glossary glossary = 2 [(google.api.field_behavior) = REQUIRED];
1023}
1024
1025// Request message for GetGlossary.
1026message GetGlossaryRequest {
1027  // Required. The name of the glossary to retrieve.
1028  string name = 1 [
1029    (google.api.field_behavior) = REQUIRED,
1030    (google.api.resource_reference) = {
1031      type: "translate.googleapis.com/Glossary"
1032    }
1033  ];
1034}
1035
1036// Request message for DeleteGlossary.
1037message DeleteGlossaryRequest {
1038  // Required. The name of the glossary to delete.
1039  string name = 1 [
1040    (google.api.field_behavior) = REQUIRED,
1041    (google.api.resource_reference) = {
1042      type: "translate.googleapis.com/Glossary"
1043    }
1044  ];
1045}
1046
1047// Request message for ListGlossaries.
1048message ListGlossariesRequest {
1049  // Required. The name of the project from which to list all of the glossaries.
1050  string parent = 1 [
1051    (google.api.field_behavior) = REQUIRED,
1052    (google.api.resource_reference) = {
1053      type: "locations.googleapis.com/Location"
1054    }
1055  ];
1056
1057  // Optional. Requested page size. The server may return fewer glossaries than
1058  // requested. If unspecified, the server picks an appropriate default.
1059  int32 page_size = 2 [(google.api.field_behavior) = OPTIONAL];
1060
1061  // Optional. A token identifying a page of results the server should return.
1062  // Typically, this is the value of [ListGlossariesResponse.next_page_token]
1063  // returned from the previous call to `ListGlossaries` method.
1064  // The first page is returned if `page_token`is empty or missing.
1065  string page_token = 3 [(google.api.field_behavior) = OPTIONAL];
1066
1067  // Optional. Filter specifying constraints of a list operation.
1068  // Specify the constraint by the format of "key=value", where key must be
1069  // "src" or "tgt", and the value must be a valid language code.
1070  // For multiple restrictions, concatenate them by "AND" (uppercase only),
1071  // such as: "src=en-US AND tgt=zh-CN". Notice that the exact match is used
1072  // here, which means using 'en-US' and 'en' can lead to different results,
1073  // which depends on the language code you used when you create the glossary.
1074  // For the unidirectional glossaries, the "src" and "tgt" add restrictions
1075  // on the source and target language code separately.
1076  // For the equivalent term set glossaries, the "src" and/or "tgt" add
1077  // restrictions on the term set.
1078  // For example: "src=en-US AND tgt=zh-CN" will only pick the unidirectional
1079  // glossaries which exactly match the source language code as "en-US" and the
1080  // target language code "zh-CN", but all equivalent term set glossaries which
1081  // contain "en-US" and "zh-CN" in their language set will be picked.
1082  // If missing, no filtering is performed.
1083  string filter = 4 [(google.api.field_behavior) = OPTIONAL];
1084}
1085
1086// Response message for ListGlossaries.
1087message ListGlossariesResponse {
1088  // The list of glossaries for a project.
1089  repeated Glossary glossaries = 1;
1090
1091  // A token to retrieve a page of results. Pass this value in the
1092  // [ListGlossariesRequest.page_token] field in the subsequent call to
1093  // `ListGlossaries` method to retrieve the next page of results.
1094  string next_page_token = 2;
1095}
1096
1097// Stored in the
1098// [google.longrunning.Operation.metadata][google.longrunning.Operation.metadata]
1099// field returned by CreateGlossary.
1100message CreateGlossaryMetadata {
1101  // Enumerates the possible states that the creation request can be in.
1102  enum State {
1103    // Invalid.
1104    STATE_UNSPECIFIED = 0;
1105
1106    // Request is being processed.
1107    RUNNING = 1;
1108
1109    // The glossary was successfully created.
1110    SUCCEEDED = 2;
1111
1112    // Failed to create the glossary.
1113    FAILED = 3;
1114
1115    // Request is in the process of being canceled after caller invoked
1116    // longrunning.Operations.CancelOperation on the request id.
1117    CANCELLING = 4;
1118
1119    // The glossary creation request was successfully canceled.
1120    CANCELLED = 5;
1121  }
1122
1123  // The name of the glossary that is being created.
1124  string name = 1;
1125
1126  // The current state of the glossary creation operation.
1127  State state = 2;
1128
1129  // The time when the operation was submitted to the server.
1130  google.protobuf.Timestamp submit_time = 3;
1131}
1132
1133// Stored in the
1134// [google.longrunning.Operation.metadata][google.longrunning.Operation.metadata]
1135// field returned by DeleteGlossary.
1136message DeleteGlossaryMetadata {
1137  // Enumerates the possible states that the creation request can be in.
1138  enum State {
1139    // Invalid.
1140    STATE_UNSPECIFIED = 0;
1141
1142    // Request is being processed.
1143    RUNNING = 1;
1144
1145    // The glossary was successfully deleted.
1146    SUCCEEDED = 2;
1147
1148    // Failed to delete the glossary.
1149    FAILED = 3;
1150
1151    // Request is in the process of being canceled after caller invoked
1152    // longrunning.Operations.CancelOperation on the request id.
1153    CANCELLING = 4;
1154
1155    // The glossary deletion request was successfully canceled.
1156    CANCELLED = 5;
1157  }
1158
1159  // The name of the glossary that is being deleted.
1160  string name = 1;
1161
1162  // The current state of the glossary deletion operation.
1163  State state = 2;
1164
1165  // The time when the operation was submitted to the server.
1166  google.protobuf.Timestamp submit_time = 3;
1167}
1168
1169// Stored in the
1170// [google.longrunning.Operation.response][google.longrunning.Operation.response]
1171// field returned by DeleteGlossary.
1172message DeleteGlossaryResponse {
1173  // The name of the deleted glossary.
1174  string name = 1;
1175
1176  // The time when the operation was submitted to the server.
1177  google.protobuf.Timestamp submit_time = 2;
1178
1179  // The time when the glossary deletion is finished and
1180  // [google.longrunning.Operation.done][google.longrunning.Operation.done] is
1181  // set to true.
1182  google.protobuf.Timestamp end_time = 3;
1183}
1184
1185// The BatchTranslateDocument request.
1186message BatchTranslateDocumentRequest {
1187  // Required. Location to make a regional call.
1188  //
1189  // Format: `projects/{project-number-or-id}/locations/{location-id}`.
1190  //
1191  // The `global` location is not supported for batch translation.
1192  //
1193  // Only AutoML Translation models or glossaries within the same region (have
1194  // the same location-id) can be used, otherwise an INVALID_ARGUMENT (400)
1195  // error is returned.
1196  string parent = 1 [
1197    (google.api.field_behavior) = REQUIRED,
1198    (google.api.resource_reference) = {
1199      type: "locations.googleapis.com/Location"
1200    }
1201  ];
1202
1203  // Required. The BCP-47 language code of the input document if known, for
1204  // example, "en-US" or "sr-Latn". Supported language codes are listed in
1205  // Language Support (https://cloud.google.com/translate/docs/languages).
1206  string source_language_code = 2 [(google.api.field_behavior) = REQUIRED];
1207
1208  // Required. The BCP-47 language code to use for translation of the input
1209  // document. Specify up to 10 language codes here.
1210  repeated string target_language_codes = 3
1211      [(google.api.field_behavior) = REQUIRED];
1212
1213  // Required. Input configurations.
1214  // The total number of files matched should be <= 100.
1215  // The total content size to translate should be <= 100M Unicode codepoints.
1216  // The files must use UTF-8 encoding.
1217  repeated BatchDocumentInputConfig input_configs = 4
1218      [(google.api.field_behavior) = REQUIRED];
1219
1220  // Required. Output configuration.
1221  // If 2 input configs match to the same file (that is, same input path),
1222  // we don't generate output for duplicate inputs.
1223  BatchDocumentOutputConfig output_config = 5
1224      [(google.api.field_behavior) = REQUIRED];
1225
1226  // Optional. The models to use for translation. Map's key is target language
1227  // code. Map's value is the model name. Value can be a built-in general model,
1228  // or an AutoML Translation model.
1229  //
1230  // The value format depends on model type:
1231  //
1232  // - AutoML Translation models:
1233  //   `projects/{project-number-or-id}/locations/{location-id}/models/{model-id}`
1234  //
1235  // - General (built-in) models:
1236  //   `projects/{project-number-or-id}/locations/{location-id}/models/general/nmt`,
1237  //
1238  //
1239  // If the map is empty or a specific model is not requested for a language
1240  // pair, then default google model (nmt) is used.
1241  map<string, string> models = 6 [(google.api.field_behavior) = OPTIONAL];
1242
1243  // Optional. Glossaries to be applied. It's keyed by target language code.
1244  map<string, TranslateTextGlossaryConfig> glossaries = 7
1245      [(google.api.field_behavior) = OPTIONAL];
1246
1247  // Optional. File format conversion map to be applied to all input files.
1248  // Map's key is the original mime_type. Map's value is the target mime_type of
1249  // translated documents.
1250  //
1251  // Supported file format conversion includes:
1252  // - `application/pdf` to
1253  //   `application/vnd.openxmlformats-officedocument.wordprocessingml.document`
1254  //
1255  // If nothing specified, output files will be in the same format as the
1256  // original file.
1257  map<string, string> format_conversions = 8
1258      [(google.api.field_behavior) = OPTIONAL];
1259}
1260
1261// Input configuration for BatchTranslateDocument request.
1262message BatchDocumentInputConfig {
1263  // Specify the input.
1264  oneof source {
1265    // Google Cloud Storage location for the source input.
1266    // This can be a single file (for example,
1267    // `gs://translation-test/input.docx`) or a wildcard (for example,
1268    // `gs://translation-test/*`).
1269    //
1270    // File mime type is determined based on extension. Supported mime type
1271    // includes:
1272    // - `pdf`, application/pdf
1273    // - `docx`,
1274    // application/vnd.openxmlformats-officedocument.wordprocessingml.document
1275    // - `pptx`,
1276    // application/vnd.openxmlformats-officedocument.presentationml.presentation
1277    // - `xlsx`,
1278    // application/vnd.openxmlformats-officedocument.spreadsheetml.sheet
1279    //
1280    // The max file size to support for `.docx`, `.pptx` and `.xlsx` is 100MB.
1281    // The max file size to support for `.pdf` is 1GB and the max page limit is
1282    // 1000 pages.
1283    // The max file size to support for all input documents is 1GB.
1284    GcsSource gcs_source = 1;
1285  }
1286}
1287
1288// Output configuration for BatchTranslateDocument request.
1289message BatchDocumentOutputConfig {
1290  // The destination of output. The destination directory provided must exist
1291  // and be empty.
1292  oneof destination {
1293    // Google Cloud Storage destination for output content.
1294    // For every single input document (for example, gs://a/b/c.[extension]), we
1295    // generate at most 2 * n output files. (n is the # of target_language_codes
1296    // in the BatchTranslateDocumentRequest).
1297    //
1298    // While the input documents are being processed, we write/update an index
1299    // file `index.csv` under `gcs_destination.output_uri_prefix` (for example,
1300    // gs://translation_output/index.csv) The index file is generated/updated as
1301    // new files are being translated. The format is:
1302    //
1303    // input_document,target_language_code,translation_output,error_output,
1304    // glossary_translation_output,glossary_error_output
1305    //
1306    // `input_document` is one file we matched using gcs_source.input_uri.
1307    // `target_language_code` is provided in the request.
1308    // `translation_output` contains the translations. (details provided below)
1309    // `error_output` contains the error message during processing of the file.
1310    // Both translations_file and errors_file could be empty strings if we have
1311    // no content to output.
1312    // `glossary_translation_output` and `glossary_error_output` are the
1313    // translated output/error when we apply glossaries. They could also be
1314    // empty if we have no content to output.
1315    //
1316    // Once a row is present in index.csv, the input/output matching never
1317    // changes. Callers should also expect all the content in input_file are
1318    // processed and ready to be consumed (that is, no partial output file is
1319    // written).
1320    //
1321    // Since index.csv will be keeping updated during the process, please make
1322    // sure there is no custom retention policy applied on the output bucket
1323    // that may avoid file updating.
1324    // (https://cloud.google.com/storage/docs/bucket-lock?hl=en#retention-policy)
1325    //
1326    // The naming format of translation output files follows (for target
1327    // language code [trg]): `translation_output`:
1328    // gs://translation_output/a_b_c_[trg]_translation.[extension]
1329    // `glossary_translation_output`:
1330    // gs://translation_test/a_b_c_[trg]_glossary_translation.[extension] The
1331    // output document will maintain the same file format as the input document.
1332    //
1333    // The naming format of error output files follows (for target language code
1334    // [trg]): `error_output`: gs://translation_test/a_b_c_[trg]_errors.txt
1335    // `glossary_error_output`:
1336    // gs://translation_test/a_b_c_[trg]_glossary_translation.txt The error
1337    // output is a txt file containing error details.
1338    GcsDestination gcs_destination = 1;
1339  }
1340}
1341
1342// Stored in the
1343// [google.longrunning.Operation.response][google.longrunning.Operation.response]
1344// field returned by BatchTranslateDocument if at least one document is
1345// translated successfully.
1346message BatchTranslateDocumentResponse {
1347  // Total number of pages to translate in all documents. Documents without
1348  // clear page definition (such as XLSX) are not counted.
1349  int64 total_pages = 1;
1350
1351  // Number of successfully translated pages in all documents. Documents without
1352  // clear page definition (such as XLSX) are not counted.
1353  int64 translated_pages = 2;
1354
1355  // Number of pages that failed to process in all documents. Documents without
1356  // clear page definition (such as XLSX) are not counted.
1357  int64 failed_pages = 3;
1358
1359  // Number of billable pages in documents with clear page definition (such as
1360  // PDF, DOCX, PPTX)
1361  int64 total_billable_pages = 4;
1362
1363  // Total number of characters (Unicode codepoints) in all documents.
1364  int64 total_characters = 5;
1365
1366  // Number of successfully translated characters (Unicode codepoints) in all
1367  // documents.
1368  int64 translated_characters = 6;
1369
1370  // Number of characters that have failed to process (Unicode codepoints) in
1371  // all documents.
1372  int64 failed_characters = 7;
1373
1374  // Number of billable characters (Unicode codepoints) in documents without
1375  // clear page definition, such as XLSX.
1376  int64 total_billable_characters = 8;
1377
1378  // Time when the operation was submitted.
1379  google.protobuf.Timestamp submit_time = 9;
1380
1381  // The time when the operation is finished and
1382  // [google.longrunning.Operation.done][google.longrunning.Operation.done] is
1383  // set to true.
1384  google.protobuf.Timestamp end_time = 10;
1385}
1386
1387// State metadata for the batch translation operation.
1388message BatchTranslateDocumentMetadata {
1389  // State of the job.
1390  enum State {
1391    // Invalid.
1392    STATE_UNSPECIFIED = 0;
1393
1394    // Request is being processed.
1395    RUNNING = 1;
1396
1397    // The batch is processed, and at least one item was successfully processed.
1398    SUCCEEDED = 2;
1399
1400    // The batch is done and no item was successfully processed.
1401    FAILED = 3;
1402
1403    // Request is in the process of being canceled after caller invoked
1404    // longrunning.Operations.CancelOperation on the request id.
1405    CANCELLING = 4;
1406
1407    // The batch is done after the user has called the
1408    // longrunning.Operations.CancelOperation. Any records processed before the
1409    // cancel command are output as specified in the request.
1410    CANCELLED = 5;
1411  }
1412
1413  // The state of the operation.
1414  State state = 1;
1415
1416  // Total number of pages to translate in all documents so far. Documents
1417  // without clear page definition (such as XLSX) are not counted.
1418  int64 total_pages = 2;
1419
1420  // Number of successfully translated pages in all documents so far. Documents
1421  // without clear page definition (such as XLSX) are not counted.
1422  int64 translated_pages = 3;
1423
1424  // Number of pages that failed to process in all documents so far. Documents
1425  // without clear page definition (such as XLSX) are not counted.
1426  int64 failed_pages = 4;
1427
1428  // Number of billable pages in documents with clear page definition (such as
1429  // PDF, DOCX, PPTX) so far.
1430  int64 total_billable_pages = 5;
1431
1432  // Total number of characters (Unicode codepoints) in all documents so far.
1433  int64 total_characters = 6;
1434
1435  // Number of successfully translated characters (Unicode codepoints) in all
1436  // documents so far.
1437  int64 translated_characters = 7;
1438
1439  // Number of characters that have failed to process (Unicode codepoints) in
1440  // all documents so far.
1441  int64 failed_characters = 8;
1442
1443  // Number of billable characters (Unicode codepoints) in documents without
1444  // clear page definition (such as XLSX) so far.
1445  int64 total_billable_characters = 9;
1446
1447  // Time when the operation was submitted.
1448  google.protobuf.Timestamp submit_time = 10;
1449}
1450