1// Copyright 2022 Google LLC
2//
3// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at
6//
7//     http://www.apache.org/licenses/LICENSE-2.0
8//
9// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS,
11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12// See the License for the specific language governing permissions and
13// limitations under the License.
14
15syntax = "proto3";
16
17package google.cloud.translation.v3;
18
19import "google/api/annotations.proto";
20import "google/api/client.proto";
21import "google/api/field_behavior.proto";
22import "google/api/resource.proto";
23import "google/longrunning/operations.proto";
24import "google/protobuf/timestamp.proto";
25
26option cc_enable_arenas = true;
27option csharp_namespace = "Google.Cloud.Translate.V3";
28option go_package = "cloud.google.com/go/translate/apiv3/translatepb;translatepb";
29option java_multiple_files = true;
30option java_outer_classname = "TranslationServiceProto";
31option java_package = "com.google.cloud.translate.v3";
32option php_namespace = "Google\\Cloud\\Translate\\V3";
33option ruby_package = "Google::Cloud::Translate::V3";
34
35// Proto file for the Cloud Translation API (v3 GA).
36
37// Provides natural language translation operations.
38service TranslationService {
39  option (google.api.default_host) = "translate.googleapis.com";
40  option (google.api.oauth_scopes) =
41      "https://www.googleapis.com/auth/cloud-platform,"
42      "https://www.googleapis.com/auth/cloud-translation";
43
44  // Translates input text and returns translated text.
45  rpc TranslateText(TranslateTextRequest) returns (TranslateTextResponse) {
46    option (google.api.http) = {
47      post: "/v3/{parent=projects/*/locations/*}:translateText"
48      body: "*"
49      additional_bindings {
50        post: "/v3/{parent=projects/*}:translateText"
51        body: "*"
52      }
53    };
54    option (google.api.method_signature) =
55        "parent,target_language_code,contents";
56    option (google.api.method_signature) =
57        "parent,model,mime_type,source_language_code,target_language_code,contents";
58  }
59
60  // Detects the language of text within a request.
61  rpc DetectLanguage(DetectLanguageRequest) returns (DetectLanguageResponse) {
62    option (google.api.http) = {
63      post: "/v3/{parent=projects/*/locations/*}:detectLanguage"
64      body: "*"
65      additional_bindings {
66        post: "/v3/{parent=projects/*}:detectLanguage"
67        body: "*"
68      }
69    };
70    option (google.api.method_signature) = "parent,model,mime_type,content";
71  }
72
73  // Returns a list of supported languages for translation.
74  rpc GetSupportedLanguages(GetSupportedLanguagesRequest)
75      returns (SupportedLanguages) {
76    option (google.api.http) = {
77      get: "/v3/{parent=projects/*/locations/*}/supportedLanguages"
78      additional_bindings { get: "/v3/{parent=projects/*}/supportedLanguages" }
79    };
80    option (google.api.method_signature) = "parent,model,display_language_code";
81  }
82
83  // Translates documents in synchronous mode.
84  rpc TranslateDocument(TranslateDocumentRequest)
85      returns (TranslateDocumentResponse) {
86    option (google.api.http) = {
87      post: "/v3/{parent=projects/*/locations/*}:translateDocument"
88      body: "*"
89    };
90  }
91
92  // Translates a large volume of text in asynchronous batch mode.
93  // This function provides real-time output as the inputs are being processed.
94  // If caller cancels a request, the partial results (for an input file, it's
95  // all or nothing) may still be available on the specified output location.
96  //
97  // This call returns immediately and you can
98  // use google.longrunning.Operation.name to poll the status of the call.
99  rpc BatchTranslateText(BatchTranslateTextRequest)
100      returns (google.longrunning.Operation) {
101    option (google.api.http) = {
102      post: "/v3/{parent=projects/*/locations/*}:batchTranslateText"
103      body: "*"
104    };
105    option (google.longrunning.operation_info) = {
106      response_type: "BatchTranslateResponse"
107      metadata_type: "BatchTranslateMetadata"
108    };
109  }
110
111  // Translates a large volume of document in asynchronous batch mode.
112  // This function provides real-time output as the inputs are being processed.
113  // If caller cancels a request, the partial results (for an input file, it's
114  // all or nothing) may still be available on the specified output location.
115  //
116  // This call returns immediately and you can use
117  // google.longrunning.Operation.name to poll the status of the call.
118  rpc BatchTranslateDocument(BatchTranslateDocumentRequest)
119      returns (google.longrunning.Operation) {
120    option (google.api.http) = {
121      post: "/v3/{parent=projects/*/locations/*}:batchTranslateDocument"
122      body: "*"
123    };
124    option (google.api.method_signature) =
125        "parent,source_language_code,target_language_codes,input_configs,output_config";
126    option (google.longrunning.operation_info) = {
127      response_type: "BatchTranslateDocumentResponse"
128      metadata_type: "BatchTranslateDocumentMetadata"
129    };
130  }
131
132  // Creates a glossary and returns the long-running operation. Returns
133  // NOT_FOUND, if the project doesn't exist.
134  rpc CreateGlossary(CreateGlossaryRequest)
135      returns (google.longrunning.Operation) {
136    option (google.api.http) = {
137      post: "/v3/{parent=projects/*/locations/*}/glossaries"
138      body: "glossary"
139    };
140    option (google.api.method_signature) = "parent,glossary";
141    option (google.longrunning.operation_info) = {
142      response_type: "Glossary"
143      metadata_type: "CreateGlossaryMetadata"
144    };
145  }
146
147  // Lists glossaries in a project. Returns NOT_FOUND, if the project doesn't
148  // exist.
149  rpc ListGlossaries(ListGlossariesRequest) returns (ListGlossariesResponse) {
150    option (google.api.http) = {
151      get: "/v3/{parent=projects/*/locations/*}/glossaries"
152    };
153    option (google.api.method_signature) = "parent";
154  }
155
156  // Gets a glossary. Returns NOT_FOUND, if the glossary doesn't
157  // exist.
158  rpc GetGlossary(GetGlossaryRequest) returns (Glossary) {
159    option (google.api.http) = {
160      get: "/v3/{name=projects/*/locations/*/glossaries/*}"
161    };
162    option (google.api.method_signature) = "name";
163  }
164
165  // Deletes a glossary, or cancels glossary construction
166  // if the glossary isn't created yet.
167  // Returns NOT_FOUND, if the glossary doesn't exist.
168  rpc DeleteGlossary(DeleteGlossaryRequest)
169      returns (google.longrunning.Operation) {
170    option (google.api.http) = {
171      delete: "/v3/{name=projects/*/locations/*/glossaries/*}"
172    };
173    option (google.api.method_signature) = "name";
174    option (google.longrunning.operation_info) = {
175      response_type: "DeleteGlossaryResponse"
176      metadata_type: "DeleteGlossaryMetadata"
177    };
178  }
179}
180
181// Configures which glossary should be used for a specific target language,
182// and defines options for applying that glossary.
183message TranslateTextGlossaryConfig {
184  // Required. The `glossary` to be applied for this translation.
185  //
186  // The format depends on the glossary:
187  //
188  // - User-provided custom glossary:
189  //   `projects/{project-number-or-id}/locations/{location-id}/glossaries/{glossary-id}`
190  string glossary = 1 [(google.api.field_behavior) = REQUIRED];
191
192  // Optional. Indicates match is case insensitive. The default value is `false`
193  // if missing.
194  bool ignore_case = 2 [(google.api.field_behavior) = OPTIONAL];
195}
196
197// The request message for synchronous translation.
198message TranslateTextRequest {
199  // Required. The content of the input in string format.
200  // We recommend the total content be less than 30,000 codepoints. The max
201  // length of this field is 1024. Use BatchTranslateText for larger text.
202  repeated string contents = 1 [(google.api.field_behavior) = REQUIRED];
203
204  // Optional. The format of the source text, for example, "text/html",
205  //  "text/plain". If left blank, the MIME type defaults to "text/html".
206  string mime_type = 3 [(google.api.field_behavior) = OPTIONAL];
207
208  // Optional. The ISO-639 language code of the input text if
209  // known, for example, "en-US" or "sr-Latn". Supported language codes are
210  // listed in Language Support. If the source language isn't specified, the API
211  // attempts to identify the source language automatically and returns the
212  // source language within the response.
213  string source_language_code = 4 [(google.api.field_behavior) = OPTIONAL];
214
215  // Required. The ISO-639 language code to use for translation of the input
216  // text, set to one of the language codes listed in Language Support.
217  string target_language_code = 5 [(google.api.field_behavior) = REQUIRED];
218
219  // Required. Project or location to make a call. Must refer to a caller's
220  // project.
221  //
222  // Format: `projects/{project-number-or-id}` or
223  // `projects/{project-number-or-id}/locations/{location-id}`.
224  //
225  // For global calls, use `projects/{project-number-or-id}/locations/global` or
226  // `projects/{project-number-or-id}`.
227  //
228  // Non-global location is required for requests using AutoML models or
229  // custom glossaries.
230  //
231  // Models and glossaries must be within the same region (have same
232  // location-id), otherwise an INVALID_ARGUMENT (400) error is returned.
233  string parent = 8 [
234    (google.api.field_behavior) = REQUIRED,
235    (google.api.resource_reference) = {
236      type: "locations.googleapis.com/Location"
237    }
238  ];
239
240  // Optional. The `model` type requested for this translation.
241  //
242  // The format depends on model type:
243  //
244  // - AutoML Translation models:
245  //   `projects/{project-number-or-id}/locations/{location-id}/models/{model-id}`
246  //
247  // - General (built-in) models:
248  //   `projects/{project-number-or-id}/locations/{location-id}/models/general/nmt`,
249  //
250  //
251  // For global (non-regionalized) requests, use `location-id` `global`.
252  // For example,
253  // `projects/{project-number-or-id}/locations/global/models/general/nmt`.
254  //
255  // If not provided, the default Google model (NMT) will be used.
256  string model = 6 [(google.api.field_behavior) = OPTIONAL];
257
258  // Optional. Glossary to be applied. The glossary must be
259  // within the same region (have the same location-id) as the model, otherwise
260  // an INVALID_ARGUMENT (400) error is returned.
261  TranslateTextGlossaryConfig glossary_config = 7
262      [(google.api.field_behavior) = OPTIONAL];
263
264  // Optional. The labels with user-defined metadata for the request.
265  //
266  // Label keys and values can be no longer than 63 characters
267  // (Unicode codepoints), can only contain lowercase letters, numeric
268  // characters, underscores and dashes. International characters are allowed.
269  // Label values are optional. Label keys must start with a letter.
270  //
271  // See https://cloud.google.com/translate/docs/advanced/labels for more
272  // information.
273  map<string, string> labels = 10 [(google.api.field_behavior) = OPTIONAL];
274}
275
276message TranslateTextResponse {
277  // Text translation responses with no glossary applied.
278  // This field has the same length as
279  // [`contents`][google.cloud.translation.v3.TranslateTextRequest.contents].
280  repeated Translation translations = 1;
281
282  // Text translation responses if a glossary is provided in the request.
283  // This can be the same as
284  // [`translations`][google.cloud.translation.v3.TranslateTextResponse.translations]
285  // if no terms apply. This field has the same length as
286  // [`contents`][google.cloud.translation.v3.TranslateTextRequest.contents].
287  repeated Translation glossary_translations = 3;
288}
289
290// A single translation response.
291message Translation {
292  // Text translated into the target language.
293  // If an error occurs during translation, this field might be excluded from
294  // the response.
295  string translated_text = 1;
296
297  // Only present when `model` is present in the request.
298  // `model` here is normalized to have project number.
299  //
300  // For example:
301  // If the `model` requested in TranslationTextRequest is
302  // `projects/{project-id}/locations/{location-id}/models/general/nmt` then
303  // `model` here would be normalized to
304  // `projects/{project-number}/locations/{location-id}/models/general/nmt`.
305  string model = 2;
306
307  // The ISO-639 language code of source text in the initial request, detected
308  // automatically, if no source language was passed within the initial
309  // request. If the source language was passed, auto-detection of the language
310  // does not occur and this field is empty.
311  string detected_language_code = 4;
312
313  // The `glossary_config` used for this translation.
314  TranslateTextGlossaryConfig glossary_config = 3;
315}
316
317// The request message for language detection.
318message DetectLanguageRequest {
319  // Required. Project or location to make a call. Must refer to a caller's
320  // project.
321  //
322  // Format: `projects/{project-number-or-id}/locations/{location-id}` or
323  // `projects/{project-number-or-id}`.
324  //
325  // For global calls, use `projects/{project-number-or-id}/locations/global` or
326  // `projects/{project-number-or-id}`.
327  //
328  // Only models within the same region (has same location-id) can be used.
329  // Otherwise an INVALID_ARGUMENT (400) error is returned.
330  string parent = 5 [
331    (google.api.field_behavior) = REQUIRED,
332    (google.api.resource_reference) = {
333      type: "locations.googleapis.com/Location"
334    }
335  ];
336
337  // Optional. The language detection model to be used.
338  //
339  // Format:
340  // `projects/{project-number-or-id}/locations/{location-id}/models/language-detection/{model-id}`
341  //
342  // Only one language detection model is currently supported:
343  // `projects/{project-number-or-id}/locations/{location-id}/models/language-detection/default`.
344  //
345  // If not specified, the default model is used.
346  string model = 4 [(google.api.field_behavior) = OPTIONAL];
347
348  // Required. The source of the document from which to detect the language.
349  oneof source {
350    // The content of the input stored as a string.
351    string content = 1;
352  }
353
354  // Optional. The format of the source text, for example, "text/html",
355  // "text/plain". If left blank, the MIME type defaults to "text/html".
356  string mime_type = 3 [(google.api.field_behavior) = OPTIONAL];
357
358  // Optional. The labels with user-defined metadata for the request.
359  //
360  // Label keys and values can be no longer than 63 characters
361  // (Unicode codepoints), can only contain lowercase letters, numeric
362  // characters, underscores and dashes. International characters are allowed.
363  // Label values are optional. Label keys must start with a letter.
364  //
365  // See https://cloud.google.com/translate/docs/advanced/labels for more
366  // information.
367  map<string, string> labels = 6 [(google.api.field_behavior) = OPTIONAL];
368}
369
370// The response message for language detection.
371message DetectedLanguage {
372  // The ISO-639 language code of the source content in the request, detected
373  // automatically.
374  string language_code = 1;
375
376  // The confidence of the detection result for this language.
377  float confidence = 2;
378}
379
380// The response message for language detection.
381message DetectLanguageResponse {
382  // The most probable language detected by the Translation API. For each
383  // request, the Translation API will always return only one result.
384  repeated DetectedLanguage languages = 1;
385}
386
387// The request message for discovering supported languages.
388message GetSupportedLanguagesRequest {
389  // Required. Project or location to make a call. Must refer to a caller's
390  // project.
391  //
392  // Format: `projects/{project-number-or-id}` or
393  // `projects/{project-number-or-id}/locations/{location-id}`.
394  //
395  // For global calls, use `projects/{project-number-or-id}/locations/global` or
396  // `projects/{project-number-or-id}`.
397  //
398  // Non-global location is required for AutoML models.
399  //
400  // Only models within the same region (have same location-id) can be used,
401  // otherwise an INVALID_ARGUMENT (400) error is returned.
402  string parent = 3 [
403    (google.api.field_behavior) = REQUIRED,
404    (google.api.resource_reference) = {
405      type: "locations.googleapis.com/Location"
406    }
407  ];
408
409  // Optional. The language to use to return localized, human readable names
410  // of supported languages. If missing, then display names are not returned
411  // in a response.
412  string display_language_code = 1 [(google.api.field_behavior) = OPTIONAL];
413
414  // Optional. Get supported languages of this model.
415  //
416  // The format depends on model type:
417  //
418  // - AutoML Translation models:
419  //   `projects/{project-number-or-id}/locations/{location-id}/models/{model-id}`
420  //
421  // - General (built-in) models:
422  //   `projects/{project-number-or-id}/locations/{location-id}/models/general/nmt`,
423  //
424  //
425  // Returns languages supported by the specified model.
426  // If missing, we get supported languages of Google general NMT model.
427  string model = 2 [(google.api.field_behavior) = OPTIONAL];
428}
429
430// The response message for discovering supported languages.
431message SupportedLanguages {
432  // A list of supported language responses. This list contains an entry
433  // for each language the Translation API supports.
434  repeated SupportedLanguage languages = 1;
435}
436
437// A single supported language response corresponds to information related
438// to one supported language.
439message SupportedLanguage {
440  // Supported language code, generally consisting of its ISO 639-1
441  // identifier, for example, 'en', 'ja'. In certain cases, ISO-639 codes
442  // including language and region identifiers are returned (for example,
443  // 'zh-TW' and 'zh-CN').
444  string language_code = 1;
445
446  // Human-readable name of the language localized in the display language
447  // specified in the request.
448  string display_name = 2;
449
450  // Can be used as a source language.
451  bool support_source = 3;
452
453  // Can be used as a target language.
454  bool support_target = 4;
455}
456
457// The Google Cloud Storage location for the input content.
458message GcsSource {
459  // Required. Source data URI. For example, `gs://my_bucket/my_object`.
460  string input_uri = 1 [(google.api.field_behavior) = REQUIRED];
461}
462
463// Input configuration for BatchTranslateText request.
464message InputConfig {
465  // Optional. Can be "text/plain" or "text/html".
466  // For `.tsv`, "text/html" is used if mime_type is missing.
467  // For `.html`, this field must be "text/html" or empty.
468  // For `.txt`, this field must be "text/plain" or empty.
469  string mime_type = 1 [(google.api.field_behavior) = OPTIONAL];
470
471  // Required. Specify the input.
472  oneof source {
473    // Required. Google Cloud Storage location for the source input.
474    // This can be a single file (for example,
475    // `gs://translation-test/input.tsv`) or a wildcard (for example,
476    // `gs://translation-test/*`). If a file extension is `.tsv`, it can
477    // contain either one or two columns. The first column (optional) is the id
478    // of the text request. If the first column is missing, we use the row
479    // number (0-based) from the input file as the ID in the output file. The
480    // second column is the actual text to be
481    //  translated. We recommend each row be <= 10K Unicode codepoints,
482    // otherwise an error might be returned.
483    // Note that the input tsv must be RFC 4180 compliant.
484    //
485    // You could use https://github.com/Clever/csvlint to check potential
486    // formatting errors in your tsv file.
487    // csvlint --delimiter='\t' your_input_file.tsv
488    //
489    // The other supported file extensions are `.txt` or `.html`, which is
490    // treated as a single large chunk of text.
491    GcsSource gcs_source = 2;
492  }
493}
494
495// The Google Cloud Storage location for the output content.
496message GcsDestination {
497  // Required. The bucket used in 'output_uri_prefix' must exist and there must
498  // be no files under 'output_uri_prefix'. 'output_uri_prefix' must end with
499  // "/" and start with "gs://". One 'output_uri_prefix' can only be used by one
500  // batch translation job at a time. Otherwise an INVALID_ARGUMENT (400) error
501  // is returned.
502  string output_uri_prefix = 1 [(google.api.field_behavior) = REQUIRED];
503}
504
505// Output configuration for BatchTranslateText request.
506message OutputConfig {
507  // Required. The destination of output.
508  oneof destination {
509    // Google Cloud Storage destination for output content.
510    // For every single input file (for example, gs://a/b/c.[extension]), we
511    // generate at most 2 * n output files. (n is the # of target_language_codes
512    // in the BatchTranslateTextRequest).
513    //
514    // Output files (tsv) generated are compliant with RFC 4180 except that
515    // record delimiters are '\n' instead of '\r\n'. We don't provide any way to
516    // change record delimiters.
517    //
518    // While the input files are being processed, we write/update an index file
519    // 'index.csv'  under 'output_uri_prefix' (for example,
520    // gs://translation-test/index.csv) The index file is generated/updated as
521    // new files are being translated. The format is:
522    //
523    // input_file,target_language_code,translations_file,errors_file,
524    // glossary_translations_file,glossary_errors_file
525    //
526    // input_file is one file we matched using gcs_source.input_uri.
527    // target_language_code is provided in the request.
528    // translations_file contains the translations. (details provided below)
529    // errors_file contains the errors during processing of the file. (details
530    // below). Both translations_file and errors_file could be empty
531    // strings if we have no content to output.
532    // glossary_translations_file and glossary_errors_file are always empty
533    // strings if the input_file is tsv. They could also be empty if we have no
534    // content to output.
535    //
536    // Once a row is present in index.csv, the input/output matching never
537    // changes. Callers should also expect all the content in input_file are
538    // processed and ready to be consumed (that is, no partial output file is
539    // written).
540    //
541    // Since index.csv will be keeping updated during the process, please make
542    // sure there is no custom retention policy applied on the output bucket
543    // that may avoid file updating.
544    // (https://cloud.google.com/storage/docs/bucket-lock#retention-policy)
545    //
546    // The format of translations_file (for target language code 'trg') is:
547    // `gs://translation_test/a_b_c_'trg'_translations.[extension]`
548    //
549    // If the input file extension is tsv, the output has the following
550    // columns:
551    // Column 1: ID of the request provided in the input, if it's not
552    // provided in the input, then the input row number is used (0-based).
553    // Column 2: source sentence.
554    // Column 3: translation without applying a glossary. Empty string if there
555    // is an error.
556    // Column 4 (only present if a glossary is provided in the request):
557    // translation after applying the glossary. Empty string if there is an
558    // error applying the glossary. Could be same string as column 3 if there is
559    // no glossary applied.
560    //
561    // If input file extension is a txt or html, the translation is directly
562    // written to the output file. If glossary is requested, a separate
563    // glossary_translations_file has format of
564    // gs://translation_test/a_b_c_'trg'_glossary_translations.[extension]
565    //
566    // The format of errors file (for target language code 'trg') is:
567    // gs://translation_test/a_b_c_'trg'_errors.[extension]
568    //
569    // If the input file extension is tsv, errors_file contains the following:
570    // Column 1: ID of the request provided in the input, if it's not
571    // provided in the input, then the input row number is used (0-based).
572    // Column 2: source sentence.
573    // Column 3: Error detail for the translation. Could be empty.
574    // Column 4 (only present if a glossary is provided in the request):
575    // Error when applying the glossary.
576    //
577    // If the input file extension is txt or html, glossary_error_file will be
578    // generated that contains error details. glossary_error_file has format of
579    // gs://translation_test/a_b_c_'trg'_glossary_errors.[extension]
580    GcsDestination gcs_destination = 1;
581  }
582}
583
584// A document translation request input config.
585message DocumentInputConfig {
586  // Specifies the source for the document's content.
587  // The input file size should be <= 20MB for
588  // - application/vnd.openxmlformats-officedocument.wordprocessingml.document
589  // - application/vnd.openxmlformats-officedocument.presentationml.presentation
590  // - application/vnd.openxmlformats-officedocument.spreadsheetml.sheet
591  // The input file size should be <= 20MB and the maximum page limit is 20 for
592  // - application/pdf
593  oneof source {
594    // Document's content represented as a stream of bytes.
595    bytes content = 1;
596
597    // Google Cloud Storage location. This must be a single file.
598    // For example: gs://example_bucket/example_file.pdf
599    GcsSource gcs_source = 2;
600  }
601
602  // Specifies the input document's mime_type.
603  //
604  // If not specified it will be determined using the file extension for
605  // gcs_source provided files. For a file provided through bytes content the
606  // mime_type must be provided.
607  // Currently supported mime types are:
608  // - application/pdf
609  // - application/vnd.openxmlformats-officedocument.wordprocessingml.document
610  // - application/vnd.openxmlformats-officedocument.presentationml.presentation
611  // - application/vnd.openxmlformats-officedocument.spreadsheetml.sheet
612  string mime_type = 4;
613}
614
615// A document translation request output config.
616message DocumentOutputConfig {
617  // A URI destination for the translated document.
618  // It is optional to provide a destination. If provided the results from
619  // TranslateDocument will be stored in the destination.
620  // Whether a destination is provided or not, the translated documents will be
621  // returned within TranslateDocumentResponse.document_translation and
622  // TranslateDocumentResponse.glossary_document_translation.
623  oneof destination {
624    // Optional. Google Cloud Storage destination for the translation output,
625    // e.g., `gs://my_bucket/my_directory/`.
626    //
627    // The destination directory provided does not have to be empty, but the
628    // bucket must exist. If a file with the same name as the output file
629    // already exists in the destination an error will be returned.
630    //
631    // For a DocumentInputConfig.contents provided document, the output file
632    // will have the name "output_[trg]_translations.[ext]", where
633    // - [trg] corresponds to the translated file's language code,
634    // - [ext] corresponds to the translated file's extension according to its
635    // mime type.
636    //
637    //
638    // For a DocumentInputConfig.gcs_uri provided document, the output file will
639    // have a name according to its URI. For example: an input file with URI:
640    // "gs://a/b/c.[extension]" stored in a gcs_destination bucket with name
641    // "my_bucket" will have an output URI:
642    // "gs://my_bucket/a_b_c_[trg]_translations.[ext]", where
643    // - [trg] corresponds to the translated file's language code,
644    // - [ext] corresponds to the translated file's extension according to its
645    // mime type.
646    //
647    //
648    // If the document was directly provided through the request, then the
649    // output document will have the format:
650    // "gs://my_bucket/translated_document_[trg]_translations.[ext], where
651    // - [trg] corresponds to the translated file's language code,
652    // - [ext] corresponds to the translated file's extension according to its
653    // mime type.
654    //
655    // If a glossary was provided, then the output URI for the glossary
656    // translation will be equal to the default output URI but have
657    // `glossary_translations` instead of `translations`. For the previous
658    // example, its glossary URI would be:
659    // "gs://my_bucket/a_b_c_[trg]_glossary_translations.[ext]".
660    //
661    // Thus the max number of output files will be 2 (Translated document,
662    // Glossary translated document).
663    //
664    // Callers should expect no partial outputs. If there is any error during
665    // document translation, no output will be stored in the Cloud Storage
666    // bucket.
667    GcsDestination gcs_destination = 1 [(google.api.field_behavior) = OPTIONAL];
668  }
669
670  // Optional. Specifies the translated document's mime_type.
671  // If not specified, the translated file's mime type will be the same as the
672  // input file's mime type.
673  // Currently only support the output mime type to be the same as input mime
674  // type.
675  // - application/pdf
676  // - application/vnd.openxmlformats-officedocument.wordprocessingml.document
677  // - application/vnd.openxmlformats-officedocument.presentationml.presentation
678  // - application/vnd.openxmlformats-officedocument.spreadsheetml.sheet
679  string mime_type = 3 [(google.api.field_behavior) = OPTIONAL];
680}
681
682// A document translation request.
683message TranslateDocumentRequest {
684  // Required. Location to make a regional call.
685  //
686  // Format: `projects/{project-number-or-id}/locations/{location-id}`.
687  //
688  // For global calls, use `projects/{project-number-or-id}/locations/global` or
689  // `projects/{project-number-or-id}`.
690  //
691  // Non-global location is required for requests using AutoML models or custom
692  // glossaries.
693  //
694  // Models and glossaries must be within the same region (have the same
695  // location-id), otherwise an INVALID_ARGUMENT (400) error is returned.
696  string parent = 1 [(google.api.field_behavior) = REQUIRED];
697
698  // Optional. The ISO-639 language code of the input document if known, for
699  // example, "en-US" or "sr-Latn". Supported language codes are listed in
700  // Language Support. If the source language isn't specified, the API attempts
701  // to identify the source language automatically and returns the source
702  // language within the response. Source language must be specified if the
703  // request contains a glossary or a custom model.
704  string source_language_code = 2 [(google.api.field_behavior) = OPTIONAL];
705
706  // Required. The ISO-639 language code to use for translation of the input
707  // document, set to one of the language codes listed in Language Support.
708  string target_language_code = 3 [(google.api.field_behavior) = REQUIRED];
709
710  // Required. Input configurations.
711  DocumentInputConfig document_input_config = 4
712      [(google.api.field_behavior) = REQUIRED];
713
714  // Optional. Output configurations.
715  // Defines if the output file should be stored within Cloud Storage as well
716  // as the desired output format. If not provided the translated file will
717  // only be returned through a byte-stream and its output mime type will be
718  // the same as the input file's mime type.
719  DocumentOutputConfig document_output_config = 5
720      [(google.api.field_behavior) = OPTIONAL];
721
722  // Optional. The `model` type requested for this translation.
723  //
724  // The format depends on model type:
725  //
726  // - AutoML Translation models:
727  //   `projects/{project-number-or-id}/locations/{location-id}/models/{model-id}`
728  //
729  // - General (built-in) models:
730  //   `projects/{project-number-or-id}/locations/{location-id}/models/general/nmt`,
731  //
732  //
733  // If not provided, the default Google model (NMT) will be used for
734  // translation.
735  string model = 6 [(google.api.field_behavior) = OPTIONAL];
736
737  // Optional. Glossary to be applied. The glossary must be within the same
738  // region (have the same location-id) as the model, otherwise an
739  // INVALID_ARGUMENT (400) error is returned.
740  TranslateTextGlossaryConfig glossary_config = 7
741      [(google.api.field_behavior) = OPTIONAL];
742
743  // Optional. The labels with user-defined metadata for the request.
744  //
745  // Label keys and values can be no longer than 63 characters (Unicode
746  // codepoints), can only contain lowercase letters, numeric characters,
747  // underscores and dashes. International characters are allowed. Label values
748  // are optional. Label keys must start with a letter.
749  //
750  // See https://cloud.google.com/translate/docs/advanced/labels for more
751  // information.
752  map<string, string> labels = 8 [(google.api.field_behavior) = OPTIONAL];
753
754  // Optional. This flag is to support user customized attribution.
755  // If not provided, the default is `Machine Translated by Google`.
756  // Customized attribution should follow rules in
757  // https://cloud.google.com/translate/attribution#attribution_and_logos
758  string customized_attribution = 10 [(google.api.field_behavior) = OPTIONAL];
759
760  // Optional. If true, the page limit of online native pdf translation is 300
761  // and only native pdf pages will be translated.
762  bool is_translate_native_pdf_only = 11
763      [(google.api.field_behavior) = OPTIONAL];
764
765  // Optional. If true, use the text removal to remove the shadow text on
766  // background image for native pdf translation.
767  // Shadow removal feature can only be enabled when
768  // is_translate_native_pdf_only is false
769  bool enable_shadow_removal_native_pdf = 12
770      [(google.api.field_behavior) = OPTIONAL];
771}
772
773// A translated document message.
774message DocumentTranslation {
775  // The array of translated documents. It is expected to be size 1 for now. We
776  // may produce multiple translated documents in the future for other type of
777  // file formats.
778  repeated bytes byte_stream_outputs = 1;
779
780  // The translated document's mime type.
781  string mime_type = 2;
782
783  // The detected language for the input document.
784  // If the user did not provide the source language for the input document,
785  // this field will have the language code automatically detected. If the
786  // source language was passed, auto-detection of the language does not occur
787  // and this field is empty.
788  string detected_language_code = 3;
789}
790
791// A translated document response message.
792message TranslateDocumentResponse {
793  // Translated document.
794  DocumentTranslation document_translation = 1;
795
796  // The document's translation output if a glossary is provided in the request.
797  // This can be the same as [TranslateDocumentResponse.document_translation]
798  // if no glossary terms apply.
799  DocumentTranslation glossary_document_translation = 2;
800
801  // Only present when 'model' is present in the request.
802  // 'model' is normalized to have a project number.
803  //
804  // For example:
805  // If the 'model' field in TranslateDocumentRequest is:
806  // `projects/{project-id}/locations/{location-id}/models/general/nmt` then
807  // `model` here would be normalized to
808  // `projects/{project-number}/locations/{location-id}/models/general/nmt`.
809  string model = 3;
810
811  // The `glossary_config` used for this translation.
812  TranslateTextGlossaryConfig glossary_config = 4;
813}
814
815// The batch translation request.
816message BatchTranslateTextRequest {
817  // Required. Location to make a call. Must refer to a caller's project.
818  //
819  // Format: `projects/{project-number-or-id}/locations/{location-id}`.
820  //
821  // The `global` location is not supported for batch translation.
822  //
823  // Only AutoML Translation models or glossaries within the same region (have
824  // the same location-id) can be used, otherwise an INVALID_ARGUMENT (400)
825  // error is returned.
826  string parent = 1 [
827    (google.api.field_behavior) = REQUIRED,
828    (google.api.resource_reference) = {
829      type: "locations.googleapis.com/Location"
830    }
831  ];
832
833  // Required. Source language code.
834  string source_language_code = 2 [(google.api.field_behavior) = REQUIRED];
835
836  // Required. Specify up to 10 language codes here.
837  repeated string target_language_codes = 3
838      [(google.api.field_behavior) = REQUIRED];
839
840  // Optional. The models to use for translation. Map's key is target language
841  // code. Map's value is model name. Value can be a built-in general model,
842  // or an AutoML Translation model.
843  //
844  // The value format depends on model type:
845  //
846  // - AutoML Translation models:
847  //   `projects/{project-number-or-id}/locations/{location-id}/models/{model-id}`
848  //
849  // - General (built-in) models:
850  //   `projects/{project-number-or-id}/locations/{location-id}/models/general/nmt`,
851  //
852  //
853  // If the map is empty or a specific model is
854  // not requested for a language pair, then default google model (nmt) is used.
855  map<string, string> models = 4 [(google.api.field_behavior) = OPTIONAL];
856
857  // Required. Input configurations.
858  // The total number of files matched should be <= 100.
859  // The total content size should be <= 100M Unicode codepoints.
860  // The files must use UTF-8 encoding.
861  repeated InputConfig input_configs = 5
862      [(google.api.field_behavior) = REQUIRED];
863
864  // Required. Output configuration.
865  // If 2 input configs match to the same file (that is, same input path),
866  // we don't generate output for duplicate inputs.
867  OutputConfig output_config = 6 [(google.api.field_behavior) = REQUIRED];
868
869  // Optional. Glossaries to be applied for translation.
870  // It's keyed by target language code.
871  map<string, TranslateTextGlossaryConfig> glossaries = 7
872      [(google.api.field_behavior) = OPTIONAL];
873
874  // Optional. The labels with user-defined metadata for the request.
875  //
876  // Label keys and values can be no longer than 63 characters
877  // (Unicode codepoints), can only contain lowercase letters, numeric
878  // characters, underscores and dashes. International characters are allowed.
879  // Label values are optional. Label keys must start with a letter.
880  //
881  // See https://cloud.google.com/translate/docs/advanced/labels for more
882  // information.
883  map<string, string> labels = 9 [(google.api.field_behavior) = OPTIONAL];
884}
885
886// State metadata for the batch translation operation.
887message BatchTranslateMetadata {
888  // State of the job.
889  enum State {
890    // Invalid.
891    STATE_UNSPECIFIED = 0;
892
893    // Request is being processed.
894    RUNNING = 1;
895
896    // The batch is processed, and at least one item was successfully
897    // processed.
898    SUCCEEDED = 2;
899
900    // The batch is done and no item was successfully processed.
901    FAILED = 3;
902
903    // Request is in the process of being canceled after caller invoked
904    // longrunning.Operations.CancelOperation on the request id.
905    CANCELLING = 4;
906
907    // The batch is done after the user has called the
908    // longrunning.Operations.CancelOperation. Any records processed before the
909    // cancel command are output as specified in the request.
910    CANCELLED = 5;
911  }
912
913  // The state of the operation.
914  State state = 1;
915
916  // Number of successfully translated characters so far (Unicode codepoints).
917  int64 translated_characters = 2;
918
919  // Number of characters that have failed to process so far (Unicode
920  // codepoints).
921  int64 failed_characters = 3;
922
923  // Total number of characters (Unicode codepoints).
924  // This is the total number of codepoints from input files times the number of
925  // target languages and appears here shortly after the call is submitted.
926  int64 total_characters = 4;
927
928  // Time when the operation was submitted.
929  google.protobuf.Timestamp submit_time = 5;
930}
931
932// Stored in the
933// [google.longrunning.Operation.response][google.longrunning.Operation.response]
934// field returned by BatchTranslateText if at least one sentence is translated
935// successfully.
936message BatchTranslateResponse {
937  // Total number of characters (Unicode codepoints).
938  int64 total_characters = 1;
939
940  // Number of successfully translated characters (Unicode codepoints).
941  int64 translated_characters = 2;
942
943  // Number of characters that have failed to process (Unicode codepoints).
944  int64 failed_characters = 3;
945
946  // Time when the operation was submitted.
947  google.protobuf.Timestamp submit_time = 4;
948
949  // The time when the operation is finished and
950  // [google.longrunning.Operation.done][google.longrunning.Operation.done] is
951  // set to true.
952  google.protobuf.Timestamp end_time = 5;
953}
954
955// Input configuration for glossaries.
956message GlossaryInputConfig {
957  // Required. Specify the input.
958  oneof source {
959    // Required. Google Cloud Storage location of glossary data.
960    // File format is determined based on the filename extension. API returns
961    // [google.rpc.Code.INVALID_ARGUMENT] for unsupported URI-s and file
962    // formats. Wildcards are not allowed. This must be a single file in one of
963    // the following formats:
964    //
965    // For unidirectional glossaries:
966    //
967    // - TSV/CSV (`.tsv`/`.csv`): Two column file, tab- or comma-separated.
968    //   The first column is source text. The second column is target text.
969    //   No headers in this file. The first row contains data and not column
970    //   names.
971    //
972    // - TMX (`.tmx`): TMX file with parallel data defining source/target term
973    // pairs.
974    //
975    // For equivalent term sets glossaries:
976    //
977    // - CSV (`.csv`): Multi-column CSV file defining equivalent glossary terms
978    //   in multiple languages. See documentation for more information -
979    //   [glossaries](https://cloud.google.com/translate/docs/advanced/glossary).
980    GcsSource gcs_source = 1;
981  }
982}
983
984// Represents a glossary built from user-provided data.
985message Glossary {
986  option (google.api.resource) = {
987    type: "translate.googleapis.com/Glossary"
988    pattern: "projects/{project}/locations/{location}/glossaries/{glossary}"
989  };
990
991  // Used with unidirectional glossaries.
992  message LanguageCodePair {
993    // Required. The ISO-639 language code of the input text, for example,
994    // "en-US". Expected to be an exact match for GlossaryTerm.language_code.
995    string source_language_code = 1;
996
997    // Required. The ISO-639 language code for translation output, for example,
998    // "zh-CN". Expected to be an exact match for GlossaryTerm.language_code.
999    string target_language_code = 2;
1000  }
1001
1002  // Used with equivalent term set glossaries.
1003  message LanguageCodesSet {
1004    // The ISO-639 language code(s) for terms defined in the glossary.
1005    // All entries are unique. The list contains at least two entries.
1006    // Expected to be an exact match for GlossaryTerm.language_code.
1007    repeated string language_codes = 1;
1008  }
1009
1010  // Required. The resource name of the glossary. Glossary names have the form
1011  // `projects/{project-number-or-id}/locations/{location-id}/glossaries/{glossary-id}`.
1012  string name = 1 [(google.api.field_behavior) = REQUIRED];
1013
1014  // Languages supported by the glossary.
1015  oneof languages {
1016    // Used with unidirectional glossaries.
1017    LanguageCodePair language_pair = 3;
1018
1019    // Used with equivalent term set glossaries.
1020    LanguageCodesSet language_codes_set = 4;
1021  }
1022
1023  // Required. Provides examples to build the glossary from.
1024  // Total glossary must not exceed 10M Unicode codepoints.
1025  GlossaryInputConfig input_config = 5;
1026
1027  // Output only. The number of entries defined in the glossary.
1028  int32 entry_count = 6 [(google.api.field_behavior) = OUTPUT_ONLY];
1029
1030  // Output only. When CreateGlossary was called.
1031  google.protobuf.Timestamp submit_time = 7
1032      [(google.api.field_behavior) = OUTPUT_ONLY];
1033
1034  // Output only. When the glossary creation was finished.
1035  google.protobuf.Timestamp end_time = 8
1036      [(google.api.field_behavior) = OUTPUT_ONLY];
1037
1038  // Optional. The display name of the glossary.
1039  string display_name = 9 [(google.api.field_behavior) = OPTIONAL];
1040}
1041
1042// Request message for CreateGlossary.
1043message CreateGlossaryRequest {
1044  // Required. The project name.
1045  string parent = 1 [
1046    (google.api.field_behavior) = REQUIRED,
1047    (google.api.resource_reference) = {
1048      type: "locations.googleapis.com/Location"
1049    }
1050  ];
1051
1052  // Required. The glossary to create.
1053  Glossary glossary = 2 [(google.api.field_behavior) = REQUIRED];
1054}
1055
1056// Request message for GetGlossary.
1057message GetGlossaryRequest {
1058  // Required. The name of the glossary to retrieve.
1059  string name = 1 [
1060    (google.api.field_behavior) = REQUIRED,
1061    (google.api.resource_reference) = {
1062      type: "translate.googleapis.com/Glossary"
1063    }
1064  ];
1065}
1066
1067// Request message for DeleteGlossary.
1068message DeleteGlossaryRequest {
1069  // Required. The name of the glossary to delete.
1070  string name = 1 [
1071    (google.api.field_behavior) = REQUIRED,
1072    (google.api.resource_reference) = {
1073      type: "translate.googleapis.com/Glossary"
1074    }
1075  ];
1076}
1077
1078// Request message for ListGlossaries.
1079message ListGlossariesRequest {
1080  // Required. The name of the project from which to list all of the glossaries.
1081  string parent = 1 [
1082    (google.api.field_behavior) = REQUIRED,
1083    (google.api.resource_reference) = {
1084      type: "locations.googleapis.com/Location"
1085    }
1086  ];
1087
1088  // Optional. Requested page size. The server may return fewer glossaries than
1089  // requested. If unspecified, the server picks an appropriate default.
1090  int32 page_size = 2 [(google.api.field_behavior) = OPTIONAL];
1091
1092  // Optional. A token identifying a page of results the server should return.
1093  // Typically, this is the value of [ListGlossariesResponse.next_page_token]
1094  // returned from the previous call to `ListGlossaries` method.
1095  // The first page is returned if `page_token`is empty or missing.
1096  string page_token = 3 [(google.api.field_behavior) = OPTIONAL];
1097
1098  // Optional. Filter specifying constraints of a list operation.
1099  // Specify the constraint by the format of "key=value", where key must be
1100  // "src" or "tgt", and the value must be a valid language code.
1101  // For multiple restrictions, concatenate them by "AND" (uppercase only),
1102  // such as: "src=en-US AND tgt=zh-CN". Notice that the exact match is used
1103  // here, which means using 'en-US' and 'en' can lead to different results,
1104  // which depends on the language code you used when you create the glossary.
1105  // For the unidirectional glossaries, the "src" and "tgt" add restrictions
1106  // on the source and target language code separately.
1107  // For the equivalent term set glossaries, the "src" and/or "tgt" add
1108  // restrictions on the term set.
1109  // For example: "src=en-US AND tgt=zh-CN" will only pick the unidirectional
1110  // glossaries which exactly match the source language code as "en-US" and the
1111  // target language code "zh-CN", but all equivalent term set glossaries which
1112  // contain "en-US" and "zh-CN" in their language set will be picked.
1113  // If missing, no filtering is performed.
1114  string filter = 4 [(google.api.field_behavior) = OPTIONAL];
1115}
1116
1117// Response message for ListGlossaries.
1118message ListGlossariesResponse {
1119  // The list of glossaries for a project.
1120  repeated Glossary glossaries = 1;
1121
1122  // A token to retrieve a page of results. Pass this value in the
1123  // [ListGlossariesRequest.page_token] field in the subsequent call to
1124  // `ListGlossaries` method to retrieve the next page of results.
1125  string next_page_token = 2;
1126}
1127
1128// Stored in the
1129// [google.longrunning.Operation.metadata][google.longrunning.Operation.metadata]
1130// field returned by CreateGlossary.
1131message CreateGlossaryMetadata {
1132  // Enumerates the possible states that the creation request can be in.
1133  enum State {
1134    // Invalid.
1135    STATE_UNSPECIFIED = 0;
1136
1137    // Request is being processed.
1138    RUNNING = 1;
1139
1140    // The glossary was successfully created.
1141    SUCCEEDED = 2;
1142
1143    // Failed to create the glossary.
1144    FAILED = 3;
1145
1146    // Request is in the process of being canceled after caller invoked
1147    // longrunning.Operations.CancelOperation on the request id.
1148    CANCELLING = 4;
1149
1150    // The glossary creation request was successfully canceled.
1151    CANCELLED = 5;
1152  }
1153
1154  // The name of the glossary that is being created.
1155  string name = 1;
1156
1157  // The current state of the glossary creation operation.
1158  State state = 2;
1159
1160  // The time when the operation was submitted to the server.
1161  google.protobuf.Timestamp submit_time = 3;
1162}
1163
1164// Stored in the
1165// [google.longrunning.Operation.metadata][google.longrunning.Operation.metadata]
1166// field returned by DeleteGlossary.
1167message DeleteGlossaryMetadata {
1168  // Enumerates the possible states that the creation request can be in.
1169  enum State {
1170    // Invalid.
1171    STATE_UNSPECIFIED = 0;
1172
1173    // Request is being processed.
1174    RUNNING = 1;
1175
1176    // The glossary was successfully deleted.
1177    SUCCEEDED = 2;
1178
1179    // Failed to delete the glossary.
1180    FAILED = 3;
1181
1182    // Request is in the process of being canceled after caller invoked
1183    // longrunning.Operations.CancelOperation on the request id.
1184    CANCELLING = 4;
1185
1186    // The glossary deletion request was successfully canceled.
1187    CANCELLED = 5;
1188  }
1189
1190  // The name of the glossary that is being deleted.
1191  string name = 1;
1192
1193  // The current state of the glossary deletion operation.
1194  State state = 2;
1195
1196  // The time when the operation was submitted to the server.
1197  google.protobuf.Timestamp submit_time = 3;
1198}
1199
1200// Stored in the
1201// [google.longrunning.Operation.response][google.longrunning.Operation.response]
1202// field returned by DeleteGlossary.
1203message DeleteGlossaryResponse {
1204  // The name of the deleted glossary.
1205  string name = 1;
1206
1207  // The time when the operation was submitted to the server.
1208  google.protobuf.Timestamp submit_time = 2;
1209
1210  // The time when the glossary deletion is finished and
1211  // [google.longrunning.Operation.done][google.longrunning.Operation.done] is
1212  // set to true.
1213  google.protobuf.Timestamp end_time = 3;
1214}
1215
1216// The BatchTranslateDocument request.
1217message BatchTranslateDocumentRequest {
1218  // Required. Location to make a regional call.
1219  //
1220  // Format: `projects/{project-number-or-id}/locations/{location-id}`.
1221  //
1222  // The `global` location is not supported for batch translation.
1223  //
1224  // Only AutoML Translation models or glossaries within the same region (have
1225  // the same location-id) can be used, otherwise an INVALID_ARGUMENT (400)
1226  // error is returned.
1227  string parent = 1 [
1228    (google.api.field_behavior) = REQUIRED,
1229    (google.api.resource_reference) = {
1230      type: "locations.googleapis.com/Location"
1231    }
1232  ];
1233
1234  // Required. The ISO-639 language code of the input document if known, for
1235  // example, "en-US" or "sr-Latn". Supported language codes are listed in
1236  // [Language Support](https://cloud.google.com/translate/docs/languages).
1237  string source_language_code = 2 [(google.api.field_behavior) = REQUIRED];
1238
1239  // Required. The ISO-639 language code to use for translation of the input
1240  // document. Specify up to 10 language codes here.
1241  repeated string target_language_codes = 3
1242      [(google.api.field_behavior) = REQUIRED];
1243
1244  // Required. Input configurations.
1245  // The total number of files matched should be <= 100.
1246  // The total content size to translate should be <= 100M Unicode codepoints.
1247  // The files must use UTF-8 encoding.
1248  repeated BatchDocumentInputConfig input_configs = 4
1249      [(google.api.field_behavior) = REQUIRED];
1250
1251  // Required. Output configuration.
1252  // If 2 input configs match to the same file (that is, same input path),
1253  // we don't generate output for duplicate inputs.
1254  BatchDocumentOutputConfig output_config = 5
1255      [(google.api.field_behavior) = REQUIRED];
1256
1257  // Optional. The models to use for translation. Map's key is target language
1258  // code. Map's value is the model name. Value can be a built-in general model,
1259  // or an AutoML Translation model.
1260  //
1261  // The value format depends on model type:
1262  //
1263  // - AutoML Translation models:
1264  //   `projects/{project-number-or-id}/locations/{location-id}/models/{model-id}`
1265  //
1266  // - General (built-in) models:
1267  //   `projects/{project-number-or-id}/locations/{location-id}/models/general/nmt`,
1268  //
1269  //
1270  // If the map is empty or a specific model is
1271  // not requested for a language pair, then default google model (nmt) is used.
1272  map<string, string> models = 6 [(google.api.field_behavior) = OPTIONAL];
1273
1274  // Optional. Glossaries to be applied. It's keyed by target language code.
1275  map<string, TranslateTextGlossaryConfig> glossaries = 7
1276      [(google.api.field_behavior) = OPTIONAL];
1277
1278  // Optional. File format conversion map to be applied to all input files.
1279  // Map's key is the original mime_type. Map's value is the target mime_type of
1280  // translated documents.
1281  //
1282  // Supported file format conversion includes:
1283  // - `application/pdf` to
1284  //   `application/vnd.openxmlformats-officedocument.wordprocessingml.document`
1285  //
1286  // If nothing specified, output files will be in the same format as the
1287  // original file.
1288  map<string, string> format_conversions = 8
1289      [(google.api.field_behavior) = OPTIONAL];
1290
1291  // Optional. This flag is to support user customized attribution.
1292  // If not provided, the default is `Machine Translated by Google`.
1293  // Customized attribution should follow rules in
1294  // https://cloud.google.com/translate/attribution#attribution_and_logos
1295  string customized_attribution = 10 [(google.api.field_behavior) = OPTIONAL];
1296}
1297
1298// Input configuration for BatchTranslateDocument request.
1299message BatchDocumentInputConfig {
1300  // Specify the input.
1301  oneof source {
1302    // Google Cloud Storage location for the source input.
1303    // This can be a single file (for example,
1304    // `gs://translation-test/input.docx`) or a wildcard (for example,
1305    // `gs://translation-test/*`).
1306    //
1307    // File mime type is determined based on extension. Supported mime type
1308    // includes:
1309    // - `pdf`, application/pdf
1310    // - `docx`,
1311    // application/vnd.openxmlformats-officedocument.wordprocessingml.document
1312    // - `pptx`,
1313    // application/vnd.openxmlformats-officedocument.presentationml.presentation
1314    // - `xlsx`,
1315    // application/vnd.openxmlformats-officedocument.spreadsheetml.sheet
1316    //
1317    // The max file size to support for `.docx`, `.pptx` and `.xlsx` is 100MB.
1318    // The max file size to support for `.pdf` is 1GB and the max page limit is
1319    // 1000 pages.
1320    // The max file size to support for all input documents is 1GB.
1321    GcsSource gcs_source = 1;
1322  }
1323}
1324
1325// Output configuration for BatchTranslateDocument request.
1326message BatchDocumentOutputConfig {
1327  // The destination of output. The destination directory provided must exist
1328  // and be empty.
1329  oneof destination {
1330    // Google Cloud Storage destination for output content.
1331    // For every single input document (for example, gs://a/b/c.[extension]), we
1332    // generate at most 2 * n output files. (n is the # of target_language_codes
1333    // in the BatchTranslateDocumentRequest).
1334    //
1335    // While the input documents are being processed, we write/update an index
1336    // file `index.csv` under `gcs_destination.output_uri_prefix` (for example,
1337    // gs://translation_output/index.csv) The index file is generated/updated as
1338    // new files are being translated. The format is:
1339    //
1340    // input_document,target_language_code,translation_output,error_output,
1341    // glossary_translation_output,glossary_error_output
1342    //
1343    // `input_document` is one file we matched using gcs_source.input_uri.
1344    // `target_language_code` is provided in the request.
1345    // `translation_output` contains the translations. (details provided below)
1346    // `error_output` contains the error message during processing of the file.
1347    // Both translations_file and errors_file could be empty strings if we have
1348    // no content to output.
1349    // `glossary_translation_output` and `glossary_error_output` are the
1350    // translated output/error when we apply glossaries. They could also be
1351    // empty if we have no content to output.
1352    //
1353    // Once a row is present in index.csv, the input/output matching never
1354    // changes. Callers should also expect all the content in input_file are
1355    // processed and ready to be consumed (that is, no partial output file is
1356    // written).
1357    //
1358    // Since index.csv will be keeping updated during the process, please make
1359    // sure there is no custom retention policy applied on the output bucket
1360    // that may avoid file updating.
1361    // (https://cloud.google.com/storage/docs/bucket-lock#retention-policy)
1362    //
1363    // The naming format of translation output files follows (for target
1364    // language code [trg]): `translation_output`:
1365    // gs://translation_output/a_b_c_[trg]_translation.[extension]
1366    // `glossary_translation_output`:
1367    // gs://translation_test/a_b_c_[trg]_glossary_translation.[extension] The
1368    // output document will maintain the same file format as the input document.
1369    //
1370    // The naming format of error output files follows (for target language code
1371    // [trg]): `error_output`: gs://translation_test/a_b_c_[trg]_errors.txt
1372    // `glossary_error_output`:
1373    // gs://translation_test/a_b_c_[trg]_glossary_translation.txt The error
1374    // output is a txt file containing error details.
1375    GcsDestination gcs_destination = 1;
1376  }
1377}
1378
1379// Stored in the
1380// [google.longrunning.Operation.response][google.longrunning.Operation.response]
1381// field returned by BatchTranslateDocument if at least one document is
1382// translated successfully.
1383message BatchTranslateDocumentResponse {
1384  // Total number of pages to translate in all documents. Documents without
1385  // clear page definition (such as XLSX) are not counted.
1386  int64 total_pages = 1;
1387
1388  // Number of successfully translated pages in all documents. Documents without
1389  // clear page definition (such as XLSX) are not counted.
1390  int64 translated_pages = 2;
1391
1392  // Number of pages that failed to process in all documents. Documents without
1393  // clear page definition (such as XLSX) are not counted.
1394  int64 failed_pages = 3;
1395
1396  // Number of billable pages in documents with clear page definition (such as
1397  // PDF, DOCX, PPTX)
1398  int64 total_billable_pages = 4;
1399
1400  // Total number of characters (Unicode codepoints) in all documents.
1401  int64 total_characters = 5;
1402
1403  // Number of successfully translated characters (Unicode codepoints) in all
1404  // documents.
1405  int64 translated_characters = 6;
1406
1407  // Number of characters that have failed to process (Unicode codepoints) in
1408  // all documents.
1409  int64 failed_characters = 7;
1410
1411  // Number of billable characters (Unicode codepoints) in documents without
1412  // clear page definition, such as XLSX.
1413  int64 total_billable_characters = 8;
1414
1415  // Time when the operation was submitted.
1416  google.protobuf.Timestamp submit_time = 9;
1417
1418  // The time when the operation is finished and
1419  // [google.longrunning.Operation.done][google.longrunning.Operation.done] is
1420  // set to true.
1421  google.protobuf.Timestamp end_time = 10;
1422}
1423
1424// State metadata for the batch translation operation.
1425message BatchTranslateDocumentMetadata {
1426  // State of the job.
1427  enum State {
1428    // Invalid.
1429    STATE_UNSPECIFIED = 0;
1430
1431    // Request is being processed.
1432    RUNNING = 1;
1433
1434    // The batch is processed, and at least one item was successfully processed.
1435    SUCCEEDED = 2;
1436
1437    // The batch is done and no item was successfully processed.
1438    FAILED = 3;
1439
1440    // Request is in the process of being canceled after caller invoked
1441    // longrunning.Operations.CancelOperation on the request id.
1442    CANCELLING = 4;
1443
1444    // The batch is done after the user has called the
1445    // longrunning.Operations.CancelOperation. Any records processed before the
1446    // cancel command are output as specified in the request.
1447    CANCELLED = 5;
1448  }
1449
1450  // The state of the operation.
1451  State state = 1;
1452
1453  // Total number of pages to translate in all documents so far. Documents
1454  // without clear page definition (such as XLSX) are not counted.
1455  int64 total_pages = 2;
1456
1457  // Number of successfully translated pages in all documents so far. Documents
1458  // without clear page definition (such as XLSX) are not counted.
1459  int64 translated_pages = 3;
1460
1461  // Number of pages that failed to process in all documents so far. Documents
1462  // without clear page definition (such as XLSX) are not counted.
1463  int64 failed_pages = 4;
1464
1465  // Number of billable pages in documents with clear page definition (such as
1466  // PDF, DOCX, PPTX) so far.
1467  int64 total_billable_pages = 5;
1468
1469  // Total number of characters (Unicode codepoints) in all documents so far.
1470  int64 total_characters = 6;
1471
1472  // Number of successfully translated characters (Unicode codepoints) in all
1473  // documents so far.
1474  int64 translated_characters = 7;
1475
1476  // Number of characters that have failed to process (Unicode codepoints) in
1477  // all documents so far.
1478  int64 failed_characters = 8;
1479
1480  // Number of billable characters (Unicode codepoints) in documents without
1481  // clear page definition (such as XLSX) so far.
1482  int64 total_billable_characters = 9;
1483
1484  // Time when the operation was submitted.
1485  google.protobuf.Timestamp submit_time = 10;
1486}
1487