xref: /aosp_15_r20/external/googleapis/google/cloud/speech/v2/cloud_speech.proto (revision d5c09012810ac0c9f33fe448fb6da8260d444cc9)
1// Copyright 2023 Google LLC
2//
3// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at
6//
7//     http://www.apache.org/licenses/LICENSE-2.0
8//
9// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS,
11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12// See the License for the specific language governing permissions and
13// limitations under the License.
14
15syntax = "proto3";
16
17package google.cloud.speech.v2;
18
19import "google/api/annotations.proto";
20import "google/api/client.proto";
21import "google/api/field_behavior.proto";
22import "google/api/resource.proto";
23import "google/longrunning/operations.proto";
24import "google/protobuf/duration.proto";
25import "google/protobuf/field_mask.proto";
26import "google/protobuf/timestamp.proto";
27import "google/rpc/status.proto";
28
29option go_package = "cloud.google.com/go/speech/apiv2/speechpb;speechpb";
30option java_multiple_files = true;
31option java_outer_classname = "CloudSpeechProto";
32option java_package = "com.google.cloud.speech.v2";
33option (google.api.resource_definition) = {
34  type: "cloudkms.googleapis.com/CryptoKey"
35  pattern: "projects/{project}/locations/{location}/keyRings/{key_ring}/cryptoKeys/{crypto_key}"
36};
37option (google.api.resource_definition) = {
38  type: "cloudkms.googleapis.com/CryptoKeyVersion"
39  pattern: "projects/{project}/locations/{location}/keyRings/{key_ring}/cryptoKeys/{crypto_key}/cryptoKeyVersions/{crypto_key_version}"
40};
41
42// Enables speech transcription and resource management.
43service Speech {
44  option (google.api.default_host) = "speech.googleapis.com";
45  option (google.api.oauth_scopes) =
46      "https://www.googleapis.com/auth/cloud-platform";
47
48  // Creates a [Recognizer][google.cloud.speech.v2.Recognizer].
49  rpc CreateRecognizer(CreateRecognizerRequest)
50      returns (google.longrunning.Operation) {
51    option (google.api.http) = {
52      post: "/v2/{parent=projects/*/locations/*}/recognizers"
53      body: "recognizer"
54    };
55    option (google.api.method_signature) = "parent,recognizer,recognizer_id";
56    option (google.longrunning.operation_info) = {
57      response_type: "Recognizer"
58      metadata_type: "OperationMetadata"
59    };
60  }
61
62  // Lists Recognizers.
63  rpc ListRecognizers(ListRecognizersRequest)
64      returns (ListRecognizersResponse) {
65    option (google.api.http) = {
66      get: "/v2/{parent=projects/*/locations/*}/recognizers"
67    };
68    option (google.api.method_signature) = "parent";
69  }
70
71  // Returns the requested
72  // [Recognizer][google.cloud.speech.v2.Recognizer]. Fails with
73  // [NOT_FOUND][google.rpc.Code.NOT_FOUND] if the requested Recognizer doesn't
74  // exist.
75  rpc GetRecognizer(GetRecognizerRequest) returns (Recognizer) {
76    option (google.api.http) = {
77      get: "/v2/{name=projects/*/locations/*/recognizers/*}"
78    };
79    option (google.api.method_signature) = "name";
80  }
81
82  // Updates the [Recognizer][google.cloud.speech.v2.Recognizer].
83  rpc UpdateRecognizer(UpdateRecognizerRequest)
84      returns (google.longrunning.Operation) {
85    option (google.api.http) = {
86      patch: "/v2/{recognizer.name=projects/*/locations/*/recognizers/*}"
87      body: "recognizer"
88    };
89    option (google.api.method_signature) = "recognizer,update_mask";
90    option (google.longrunning.operation_info) = {
91      response_type: "Recognizer"
92      metadata_type: "OperationMetadata"
93    };
94  }
95
96  // Deletes the [Recognizer][google.cloud.speech.v2.Recognizer].
97  rpc DeleteRecognizer(DeleteRecognizerRequest)
98      returns (google.longrunning.Operation) {
99    option (google.api.http) = {
100      delete: "/v2/{name=projects/*/locations/*/recognizers/*}"
101    };
102    option (google.api.method_signature) = "name";
103    option (google.longrunning.operation_info) = {
104      response_type: "Recognizer"
105      metadata_type: "OperationMetadata"
106    };
107  }
108
109  // Undeletes the [Recognizer][google.cloud.speech.v2.Recognizer].
110  rpc UndeleteRecognizer(UndeleteRecognizerRequest)
111      returns (google.longrunning.Operation) {
112    option (google.api.http) = {
113      post: "/v2/{name=projects/*/locations/*/recognizers/*}:undelete"
114      body: "*"
115    };
116    option (google.api.method_signature) = "name";
117    option (google.longrunning.operation_info) = {
118      response_type: "Recognizer"
119      metadata_type: "OperationMetadata"
120    };
121  }
122
123  // Performs synchronous Speech recognition: receive results after all audio
124  // has been sent and processed.
125  rpc Recognize(RecognizeRequest) returns (RecognizeResponse) {
126    option (google.api.http) = {
127      post: "/v2/{recognizer=projects/*/locations/*/recognizers/*}:recognize"
128      body: "*"
129    };
130    option (google.api.method_signature) =
131        "recognizer,config,config_mask,content";
132    option (google.api.method_signature) = "recognizer,config,config_mask,uri";
133  }
134
135  // Performs bidirectional streaming speech recognition: receive results while
136  // sending audio. This method is only available via the gRPC API (not REST).
137  rpc StreamingRecognize(stream StreamingRecognizeRequest)
138      returns (stream StreamingRecognizeResponse) {}
139
140  // Performs batch asynchronous speech recognition: send a request with N
141  // audio files and receive a long running operation that can be polled to see
142  // when the transcriptions are finished.
143  rpc BatchRecognize(BatchRecognizeRequest)
144      returns (google.longrunning.Operation) {
145    option (google.api.http) = {
146      post: "/v2/{recognizer=projects/*/locations/*/recognizers/*}:batchRecognize"
147      body: "*"
148    };
149    option (google.api.method_signature) =
150        "recognizer,config,config_mask,files";
151    option (google.longrunning.operation_info) = {
152      response_type: "BatchRecognizeResponse"
153      metadata_type: "OperationMetadata"
154    };
155  }
156
157  // Returns the requested [Config][google.cloud.speech.v2.Config].
158  rpc GetConfig(GetConfigRequest) returns (Config) {
159    option (google.api.http) = {
160      get: "/v2/{name=projects/*/locations/*/config}"
161    };
162    option (google.api.method_signature) = "name";
163  }
164
165  // Updates the [Config][google.cloud.speech.v2.Config].
166  rpc UpdateConfig(UpdateConfigRequest) returns (Config) {
167    option (google.api.http) = {
168      patch: "/v2/{config.name=projects/*/locations/*/config}"
169      body: "config"
170    };
171    option (google.api.method_signature) = "config,update_mask";
172  }
173
174  // Creates a [CustomClass][google.cloud.speech.v2.CustomClass].
175  rpc CreateCustomClass(CreateCustomClassRequest)
176      returns (google.longrunning.Operation) {
177    option (google.api.http) = {
178      post: "/v2/{parent=projects/*/locations/*}/customClasses"
179      body: "custom_class"
180    };
181    option (google.api.method_signature) =
182        "parent,custom_class,custom_class_id";
183    option (google.longrunning.operation_info) = {
184      response_type: "CustomClass"
185      metadata_type: "OperationMetadata"
186    };
187  }
188
189  // Lists CustomClasses.
190  rpc ListCustomClasses(ListCustomClassesRequest)
191      returns (ListCustomClassesResponse) {
192    option (google.api.http) = {
193      get: "/v2/{parent=projects/*/locations/*}/customClasses"
194    };
195    option (google.api.method_signature) = "parent";
196  }
197
198  // Returns the requested
199  // [CustomClass][google.cloud.speech.v2.CustomClass].
200  rpc GetCustomClass(GetCustomClassRequest) returns (CustomClass) {
201    option (google.api.http) = {
202      get: "/v2/{name=projects/*/locations/*/customClasses/*}"
203    };
204    option (google.api.method_signature) = "name";
205  }
206
207  // Updates the [CustomClass][google.cloud.speech.v2.CustomClass].
208  rpc UpdateCustomClass(UpdateCustomClassRequest)
209      returns (google.longrunning.Operation) {
210    option (google.api.http) = {
211      patch: "/v2/{custom_class.name=projects/*/locations/*/customClasses/*}"
212      body: "custom_class"
213    };
214    option (google.api.method_signature) = "custom_class,update_mask";
215    option (google.longrunning.operation_info) = {
216      response_type: "CustomClass"
217      metadata_type: "OperationMetadata"
218    };
219  }
220
221  // Deletes the [CustomClass][google.cloud.speech.v2.CustomClass].
222  rpc DeleteCustomClass(DeleteCustomClassRequest)
223      returns (google.longrunning.Operation) {
224    option (google.api.http) = {
225      delete: "/v2/{name=projects/*/locations/*/customClasses/*}"
226    };
227    option (google.api.method_signature) = "name";
228    option (google.longrunning.operation_info) = {
229      response_type: "CustomClass"
230      metadata_type: "OperationMetadata"
231    };
232  }
233
234  // Undeletes the [CustomClass][google.cloud.speech.v2.CustomClass].
235  rpc UndeleteCustomClass(UndeleteCustomClassRequest)
236      returns (google.longrunning.Operation) {
237    option (google.api.http) = {
238      post: "/v2/{name=projects/*/locations/*/customClasses/*}:undelete"
239      body: "*"
240    };
241    option (google.api.method_signature) = "name";
242    option (google.longrunning.operation_info) = {
243      response_type: "CustomClass"
244      metadata_type: "OperationMetadata"
245    };
246  }
247
248  // Creates a [PhraseSet][google.cloud.speech.v2.PhraseSet].
249  rpc CreatePhraseSet(CreatePhraseSetRequest)
250      returns (google.longrunning.Operation) {
251    option (google.api.http) = {
252      post: "/v2/{parent=projects/*/locations/*}/phraseSets"
253      body: "phrase_set"
254    };
255    option (google.api.method_signature) = "parent,phrase_set,phrase_set_id";
256    option (google.longrunning.operation_info) = {
257      response_type: "PhraseSet"
258      metadata_type: "OperationMetadata"
259    };
260  }
261
262  // Lists PhraseSets.
263  rpc ListPhraseSets(ListPhraseSetsRequest) returns (ListPhraseSetsResponse) {
264    option (google.api.http) = {
265      get: "/v2/{parent=projects/*/locations/*}/phraseSets"
266    };
267    option (google.api.method_signature) = "parent";
268  }
269
270  // Returns the requested
271  // [PhraseSet][google.cloud.speech.v2.PhraseSet].
272  rpc GetPhraseSet(GetPhraseSetRequest) returns (PhraseSet) {
273    option (google.api.http) = {
274      get: "/v2/{name=projects/*/locations/*/phraseSets/*}"
275    };
276    option (google.api.method_signature) = "name";
277  }
278
279  // Updates the [PhraseSet][google.cloud.speech.v2.PhraseSet].
280  rpc UpdatePhraseSet(UpdatePhraseSetRequest)
281      returns (google.longrunning.Operation) {
282    option (google.api.http) = {
283      patch: "/v2/{phrase_set.name=projects/*/locations/*/phraseSets/*}"
284      body: "phrase_set"
285    };
286    option (google.api.method_signature) = "phrase_set,update_mask";
287    option (google.longrunning.operation_info) = {
288      response_type: "PhraseSet"
289      metadata_type: "OperationMetadata"
290    };
291  }
292
293  // Deletes the [PhraseSet][google.cloud.speech.v2.PhraseSet].
294  rpc DeletePhraseSet(DeletePhraseSetRequest)
295      returns (google.longrunning.Operation) {
296    option (google.api.http) = {
297      delete: "/v2/{name=projects/*/locations/*/phraseSets/*}"
298    };
299    option (google.api.method_signature) = "name";
300    option (google.longrunning.operation_info) = {
301      response_type: "PhraseSet"
302      metadata_type: "OperationMetadata"
303    };
304  }
305
306  // Undeletes the [PhraseSet][google.cloud.speech.v2.PhraseSet].
307  rpc UndeletePhraseSet(UndeletePhraseSetRequest)
308      returns (google.longrunning.Operation) {
309    option (google.api.http) = {
310      post: "/v2/{name=projects/*/locations/*/phraseSets/*}:undelete"
311      body: "*"
312    };
313    option (google.api.method_signature) = "name";
314    option (google.longrunning.operation_info) = {
315      response_type: "PhraseSet"
316      metadata_type: "OperationMetadata"
317    };
318  }
319}
320
321// Request message for the
322// [CreateRecognizer][google.cloud.speech.v2.Speech.CreateRecognizer] method.
323message CreateRecognizerRequest {
324  // Required. The Recognizer to create.
325  Recognizer recognizer = 1 [(google.api.field_behavior) = REQUIRED];
326
327  // If set, validate the request and preview the Recognizer, but do not
328  // actually create it.
329  bool validate_only = 2;
330
331  // The ID to use for the Recognizer, which will become the final component of
332  // the Recognizer's resource name.
333  //
334  // This value should be 4-63 characters, and valid characters
335  // are /[a-z][0-9]-/.
336  string recognizer_id = 3;
337
338  // Required. The project and location where this Recognizer will be created.
339  // The expected format is `projects/{project}/locations/{location}`.
340  string parent = 4 [
341    (google.api.field_behavior) = REQUIRED,
342    (google.api.resource_reference) = {
343      child_type: "speech.googleapis.com/Recognizer"
344    }
345  ];
346}
347
348// Represents the metadata of a long-running operation.
349message OperationMetadata {
350  // The time the operation was created.
351  google.protobuf.Timestamp create_time = 1;
352
353  // The time the operation was last updated.
354  google.protobuf.Timestamp update_time = 2;
355
356  // The resource path for the target of the operation.
357  string resource = 3;
358
359  // The method that triggered the operation.
360  string method = 4;
361
362  // The [KMS key
363  // name](https://cloud.google.com/kms/docs/resource-hierarchy#keys) with which
364  // the content of the Operation is encrypted. The expected format is
365  // `projects/{project}/locations/{location}/keyRings/{key_ring}/cryptoKeys/{crypto_key}`.
366  string kms_key_name = 6 [(google.api.resource_reference) = {
367    type: "cloudkms.googleapis.com/CryptoKey"
368  }];
369
370  // The [KMS key version
371  // name](https://cloud.google.com/kms/docs/resource-hierarchy#key_versions)
372  // with which content of the Operation is encrypted. The expected format is
373  // `projects/{project}/locations/{location}/keyRings/{key_ring}/cryptoKeys/{crypto_key}/cryptoKeyVersions/{crypto_key_version}`.
374  string kms_key_version_name = 7 [(google.api.resource_reference) = {
375    type: "cloudkms.googleapis.com/CryptoKeyVersion"
376  }];
377
378  // The request that spawned the Operation.
379  oneof request {
380    // The BatchRecognizeRequest that spawned the Operation.
381    BatchRecognizeRequest batch_recognize_request = 8;
382
383    // The CreateRecognizerRequest that spawned the Operation.
384    CreateRecognizerRequest create_recognizer_request = 9;
385
386    // The UpdateRecognizerRequest that spawned the Operation.
387    UpdateRecognizerRequest update_recognizer_request = 10;
388
389    // The DeleteRecognizerRequest that spawned the Operation.
390    DeleteRecognizerRequest delete_recognizer_request = 11;
391
392    // The UndeleteRecognizerRequest that spawned the Operation.
393    UndeleteRecognizerRequest undelete_recognizer_request = 12;
394
395    // The CreateCustomClassRequest that spawned the Operation.
396    CreateCustomClassRequest create_custom_class_request = 13;
397
398    // The UpdateCustomClassRequest that spawned the Operation.
399    UpdateCustomClassRequest update_custom_class_request = 14;
400
401    // The DeleteCustomClassRequest that spawned the Operation.
402    DeleteCustomClassRequest delete_custom_class_request = 15;
403
404    // The UndeleteCustomClassRequest that spawned the Operation.
405    UndeleteCustomClassRequest undelete_custom_class_request = 16;
406
407    // The CreatePhraseSetRequest that spawned the Operation.
408    CreatePhraseSetRequest create_phrase_set_request = 17;
409
410    // The UpdatePhraseSetRequest that spawned the Operation.
411    UpdatePhraseSetRequest update_phrase_set_request = 18;
412
413    // The DeletePhraseSetRequest that spawned the Operation.
414    DeletePhraseSetRequest delete_phrase_set_request = 19;
415
416    // The UndeletePhraseSetRequest that spawned the Operation.
417    UndeletePhraseSetRequest undelete_phrase_set_request = 20;
418
419    // The UpdateConfigRequest that spawned the Operation.
420    UpdateConfigRequest update_config_request = 21 [deprecated = true];
421  }
422
423  // The percent progress of the Operation. Values can range from 0-100. If the
424  // value is 100, then the operation is finished.
425  int32 progress_percent = 22;
426
427  // Specific metadata per RPC.
428  oneof metadata {
429    // Metadata specific to the BatchRecognize method.
430    BatchRecognizeMetadata batch_recognize_metadata = 23;
431  }
432}
433
434// Request message for the
435// [ListRecognizers][google.cloud.speech.v2.Speech.ListRecognizers] method.
436message ListRecognizersRequest {
437  // Required. The project and location of Recognizers to list. The expected
438  // format is `projects/{project}/locations/{location}`.
439  string parent = 1 [
440    (google.api.field_behavior) = REQUIRED,
441    (google.api.resource_reference) = {
442      type: "locations.googleapis.com/Location"
443    }
444  ];
445
446  // The maximum number of Recognizers to return. The service may return fewer
447  // than this value. If unspecified, at most 5 Recognizers will be returned.
448  // The maximum value is 100; values above 100 will be coerced to 100.
449  int32 page_size = 2;
450
451  // A page token, received from a previous
452  // [ListRecognizers][google.cloud.speech.v2.Speech.ListRecognizers] call.
453  // Provide this to retrieve the subsequent page.
454  //
455  // When paginating, all other parameters provided to
456  // [ListRecognizers][google.cloud.speech.v2.Speech.ListRecognizers] must match
457  // the call that provided the page token.
458  string page_token = 3;
459
460  // Whether, or not, to show resources that have been deleted.
461  bool show_deleted = 4;
462}
463
464// Response message for the
465// [ListRecognizers][google.cloud.speech.v2.Speech.ListRecognizers] method.
466message ListRecognizersResponse {
467  // The list of requested Recognizers.
468  repeated Recognizer recognizers = 1;
469
470  // A token, which can be sent as
471  // [page_token][google.cloud.speech.v2.ListRecognizersRequest.page_token] to
472  // retrieve the next page. If this field is omitted, there are no subsequent
473  // pages. This token expires after 72 hours.
474  string next_page_token = 2;
475}
476
477// Request message for the
478// [GetRecognizer][google.cloud.speech.v2.Speech.GetRecognizer] method.
479message GetRecognizerRequest {
480  // Required. The name of the Recognizer to retrieve. The expected format is
481  // `projects/{project}/locations/{location}/recognizers/{recognizer}`.
482  string name = 1 [
483    (google.api.field_behavior) = REQUIRED,
484    (google.api.resource_reference) = {
485      type: "speech.googleapis.com/Recognizer"
486    }
487  ];
488}
489
490// Request message for the
491// [UpdateRecognizer][google.cloud.speech.v2.Speech.UpdateRecognizer] method.
492message UpdateRecognizerRequest {
493  // Required. The Recognizer to update.
494  //
495  // The Recognizer's `name` field is used to identify the Recognizer to update.
496  // Format: `projects/{project}/locations/{location}/recognizers/{recognizer}`.
497  Recognizer recognizer = 1 [(google.api.field_behavior) = REQUIRED];
498
499  // The list of fields to update. If empty, all non-default valued fields are
500  // considered for update. Use `*` to update the entire Recognizer resource.
501  google.protobuf.FieldMask update_mask = 2;
502
503  // If set, validate the request and preview the updated Recognizer, but do not
504  // actually update it.
505  bool validate_only = 4;
506}
507
508// Request message for the
509// [DeleteRecognizer][google.cloud.speech.v2.Speech.DeleteRecognizer] method.
510message DeleteRecognizerRequest {
511  // Required. The name of the Recognizer to delete.
512  // Format: `projects/{project}/locations/{location}/recognizers/{recognizer}`
513  string name = 1 [
514    (google.api.field_behavior) = REQUIRED,
515    (google.api.resource_reference) = {
516      type: "speech.googleapis.com/Recognizer"
517    }
518  ];
519
520  // If set, validate the request and preview the deleted Recognizer, but do not
521  // actually delete it.
522  bool validate_only = 2;
523
524  // If set to true, and the Recognizer is not found, the request will succeed
525  // and  be a no-op (no Operation is recorded in this case).
526  bool allow_missing = 4;
527
528  // This checksum is computed by the server based on the value of other
529  // fields. This may be sent on update, undelete, and delete requests to ensure
530  // the client has an up-to-date value before proceeding.
531  string etag = 3;
532}
533
534// Request message for the
535// [UndeleteRecognizer][google.cloud.speech.v2.Speech.UndeleteRecognizer]
536// method.
537message UndeleteRecognizerRequest {
538  // Required. The name of the Recognizer to undelete.
539  // Format: `projects/{project}/locations/{location}/recognizers/{recognizer}`
540  string name = 1 [
541    (google.api.field_behavior) = REQUIRED,
542    (google.api.resource_reference) = {
543      type: "speech.googleapis.com/Recognizer"
544    }
545  ];
546
547  // If set, validate the request and preview the undeleted Recognizer, but do
548  // not actually undelete it.
549  bool validate_only = 3;
550
551  // This checksum is computed by the server based on the value of other
552  // fields. This may be sent on update, undelete, and delete requests to ensure
553  // the client has an up-to-date value before proceeding.
554  string etag = 4;
555}
556
557// A Recognizer message. Stores recognition configuration and metadata.
558message Recognizer {
559  option (google.api.resource) = {
560    type: "speech.googleapis.com/Recognizer"
561    pattern: "projects/{project}/locations/{location}/recognizers/{recognizer}"
562    style: DECLARATIVE_FRIENDLY
563  };
564
565  // Set of states that define the lifecycle of a Recognizer.
566  enum State {
567    // The default value. This value is used if the state is omitted.
568    STATE_UNSPECIFIED = 0;
569
570    // The Recognizer is active and ready for use.
571    ACTIVE = 2;
572
573    // This Recognizer has been deleted.
574    DELETED = 4;
575  }
576
577  // Output only. Identifier. The resource name of the Recognizer.
578  // Format: `projects/{project}/locations/{location}/recognizers/{recognizer}`.
579  string name = 1 [
580    (google.api.field_behavior) = OUTPUT_ONLY,
581    (google.api.field_behavior) = IDENTIFIER
582  ];
583
584  // Output only. System-assigned unique identifier for the Recognizer.
585  string uid = 2 [(google.api.field_behavior) = OUTPUT_ONLY];
586
587  // User-settable, human-readable name for the Recognizer. Must be 63
588  // characters or less.
589  string display_name = 3;
590
591  // Optional. This field is now deprecated. Prefer the
592  // [`model`][google.cloud.speech.v2.RecognitionConfig.model] field in the
593  // [`RecognitionConfig`][google.cloud.speech.v2.RecognitionConfig] message.
594  //
595  // Which model to use for recognition requests. Select the model best suited
596  // to your domain to get best results.
597  //
598  // Guidance for choosing which model to use can be found in the [Transcription
599  // Models
600  // Documentation](https://cloud.google.com/speech-to-text/v2/docs/transcription-model)
601  // and the models supported in each region can be found in the [Table Of
602  // Supported
603  // Models](https://cloud.google.com/speech-to-text/v2/docs/speech-to-text-supported-languages).
604  string model = 4 [deprecated = true, (google.api.field_behavior) = OPTIONAL];
605
606  // Optional. This field is now deprecated. Prefer the
607  // [`language_codes`][google.cloud.speech.v2.RecognitionConfig.language_codes]
608  // field in the
609  // [`RecognitionConfig`][google.cloud.speech.v2.RecognitionConfig] message.
610  //
611  // The language of the supplied audio as a
612  // [BCP-47](https://www.rfc-editor.org/rfc/bcp/bcp47.txt) language tag.
613  //
614  // Supported languages for each model are listed in the [Table of Supported
615  // Models](https://cloud.google.com/speech-to-text/v2/docs/speech-to-text-supported-languages).
616  //
617  // If additional languages are provided, recognition result will contain
618  // recognition in the most likely language detected. The recognition result
619  // will include the language tag of the language detected in the audio.
620  // When you create or update a Recognizer, these values are
621  // stored in normalized BCP-47 form. For example, "en-us" is stored as
622  // "en-US".
623  repeated string language_codes = 17
624      [deprecated = true, (google.api.field_behavior) = OPTIONAL];
625
626  // Default configuration to use for requests with this Recognizer.
627  // This can be overwritten by inline configuration in the
628  // [RecognizeRequest.config][google.cloud.speech.v2.RecognizeRequest.config]
629  // field.
630  RecognitionConfig default_recognition_config = 6;
631
632  // Allows users to store small amounts of arbitrary data.
633  // Both the key and the value must be 63 characters or less each.
634  // At most 100 annotations.
635  map<string, string> annotations = 7;
636
637  // Output only. The Recognizer lifecycle state.
638  State state = 8 [(google.api.field_behavior) = OUTPUT_ONLY];
639
640  // Output only. Creation time.
641  google.protobuf.Timestamp create_time = 9
642      [(google.api.field_behavior) = OUTPUT_ONLY];
643
644  // Output only. The most recent time this Recognizer was modified.
645  google.protobuf.Timestamp update_time = 10
646      [(google.api.field_behavior) = OUTPUT_ONLY];
647
648  // Output only. The time at which this Recognizer was requested for deletion.
649  google.protobuf.Timestamp delete_time = 11
650      [(google.api.field_behavior) = OUTPUT_ONLY];
651
652  // Output only. The time at which this Recognizer will be purged.
653  google.protobuf.Timestamp expire_time = 14
654      [(google.api.field_behavior) = OUTPUT_ONLY];
655
656  // Output only. This checksum is computed by the server based on the value of
657  // other fields. This may be sent on update, undelete, and delete requests to
658  // ensure the client has an up-to-date value before proceeding.
659  string etag = 12 [(google.api.field_behavior) = OUTPUT_ONLY];
660
661  // Output only. Whether or not this Recognizer is in the process of being
662  // updated.
663  bool reconciling = 13 [(google.api.field_behavior) = OUTPUT_ONLY];
664
665  // Output only. The [KMS key
666  // name](https://cloud.google.com/kms/docs/resource-hierarchy#keys) with which
667  // the Recognizer is encrypted. The expected format is
668  // `projects/{project}/locations/{location}/keyRings/{key_ring}/cryptoKeys/{crypto_key}`.
669  string kms_key_name = 15 [
670    (google.api.field_behavior) = OUTPUT_ONLY,
671    (google.api.resource_reference) = {
672      type: "cloudkms.googleapis.com/CryptoKey"
673    }
674  ];
675
676  // Output only. The [KMS key version
677  // name](https://cloud.google.com/kms/docs/resource-hierarchy#key_versions)
678  // with which the Recognizer is encrypted. The expected format is
679  // `projects/{project}/locations/{location}/keyRings/{key_ring}/cryptoKeys/{crypto_key}/cryptoKeyVersions/{crypto_key_version}`.
680  string kms_key_version_name = 16 [
681    (google.api.field_behavior) = OUTPUT_ONLY,
682    (google.api.resource_reference) = {
683      type: "cloudkms.googleapis.com/CryptoKeyVersion"
684    }
685  ];
686}
687
688// Automatically detected decoding parameters.
689// Supported for the following encodings:
690//
691// * WAV_LINEAR16: 16-bit signed little-endian PCM samples in a WAV container.
692//
693// * WAV_MULAW: 8-bit companded mulaw samples in a WAV container.
694//
695// * WAV_ALAW: 8-bit companded alaw samples in a WAV container.
696//
697// * RFC4867_5_AMR: AMR frames with an rfc4867.5 header.
698//
699// * RFC4867_5_AMRWB: AMR-WB frames with an rfc4867.5 header.
700//
701// * FLAC: FLAC frames in the "native FLAC" container format.
702//
703// * MP3: MPEG audio frames with optional (ignored) ID3 metadata.
704//
705// * OGG_OPUS: Opus audio frames in an Ogg container.
706//
707// * WEBM_OPUS: Opus audio frames in a WebM container.
708//
709// * M4A: M4A audio format.
710message AutoDetectDecodingConfig {}
711
712// Explicitly specified decoding parameters.
713message ExplicitDecodingConfig {
714  // Supported audio data encodings.
715  enum AudioEncoding {
716    // Default value. This value is unused.
717    AUDIO_ENCODING_UNSPECIFIED = 0;
718
719    // Headerless 16-bit signed little-endian PCM samples.
720    LINEAR16 = 1;
721
722    // Headerless 8-bit companded mulaw samples.
723    MULAW = 2;
724
725    // Headerless 8-bit companded alaw samples.
726    ALAW = 3;
727  }
728
729  // Required. Encoding of the audio data sent for recognition.
730  AudioEncoding encoding = 1 [(google.api.field_behavior) = REQUIRED];
731
732  // Sample rate in Hertz of the audio data sent for recognition. Valid
733  // values are: 8000-48000. 16000 is optimal. For best results, set the
734  // sampling rate of the audio source to 16000 Hz. If that's not possible, use
735  // the native sample rate of the audio source (instead of re-sampling).
736  // Supported for the following encodings:
737  //
738  // * LINEAR16: Headerless 16-bit signed little-endian PCM samples.
739  //
740  // * MULAW: Headerless 8-bit companded mulaw samples.
741  //
742  // * ALAW: Headerless 8-bit companded alaw samples.
743  int32 sample_rate_hertz = 2;
744
745  // Number of channels present in the audio data sent for recognition.
746  // Supported for the following encodings:
747  //
748  // * LINEAR16: Headerless 16-bit signed little-endian PCM samples.
749  //
750  // * MULAW: Headerless 8-bit companded mulaw samples.
751  //
752  // * ALAW: Headerless 8-bit companded alaw samples.
753  //
754  // The maximum allowed value is 8.
755  int32 audio_channel_count = 3;
756}
757
758// Configuration to enable speaker diarization.
759message SpeakerDiarizationConfig {
760  // Required. Minimum number of speakers in the conversation. This range gives
761  // you more flexibility by allowing the system to automatically determine the
762  // correct number of speakers.
763  //
764  // To fix the number of speakers detected in the audio, set
765  // `min_speaker_count` = `max_speaker_count`.
766  int32 min_speaker_count = 2 [(google.api.field_behavior) = REQUIRED];
767
768  // Required. Maximum number of speakers in the conversation. Valid values are:
769  // 1-6. Must be >= `min_speaker_count`. This range gives you more flexibility
770  // by allowing the system to automatically determine the correct number of
771  // speakers.
772  int32 max_speaker_count = 3 [(google.api.field_behavior) = REQUIRED];
773}
774
775// Available recognition features.
776message RecognitionFeatures {
777  // Options for how to recognize multi-channel audio.
778  enum MultiChannelMode {
779    // Default value for the multi-channel mode. If the audio contains
780    // multiple channels, only the first channel will be transcribed; other
781    // channels will be ignored.
782    MULTI_CHANNEL_MODE_UNSPECIFIED = 0;
783
784    // If selected, each channel in the provided audio is transcribed
785    // independently. This cannot be selected if the selected
786    // [model][google.cloud.speech.v2.Recognizer.model] is `latest_short`.
787    SEPARATE_RECOGNITION_PER_CHANNEL = 1;
788  }
789
790  // If set to `true`, the server will attempt to filter out profanities,
791  // replacing all but the initial character in each filtered word with
792  // asterisks, for instance, "f***". If set to `false` or omitted, profanities
793  // won't be filtered out.
794  bool profanity_filter = 1;
795
796  // If `true`, the top result includes a list of words and the start and end
797  // time offsets (timestamps) for those words. If `false`, no word-level time
798  // offset information is returned. The default is `false`.
799  bool enable_word_time_offsets = 2;
800
801  // If `true`, the top result includes a list of words and the confidence for
802  // those words. If `false`, no word-level confidence information is returned.
803  // The default is `false`.
804  bool enable_word_confidence = 3;
805
806  // If `true`, adds punctuation to recognition result hypotheses. This feature
807  // is only available in select languages. The default `false` value does not
808  // add punctuation to result hypotheses.
809  bool enable_automatic_punctuation = 4;
810
811  // The spoken punctuation behavior for the call. If `true`, replaces spoken
812  // punctuation with the corresponding symbols in the request. For example,
813  // "how are you question mark" becomes "how are you?". See
814  // https://cloud.google.com/speech-to-text/docs/spoken-punctuation for
815  // support. If `false`, spoken punctuation is not replaced.
816  bool enable_spoken_punctuation = 14;
817
818  // The spoken emoji behavior for the call. If `true`, adds spoken emoji
819  // formatting for the request. This will replace spoken emojis with the
820  // corresponding Unicode symbols in the final transcript. If `false`, spoken
821  // emojis are not replaced.
822  bool enable_spoken_emojis = 15;
823
824  // Mode for recognizing multi-channel audio.
825  MultiChannelMode multi_channel_mode = 17;
826
827  // Configuration to enable speaker diarization and set additional
828  // parameters to make diarization better suited for your application.
829  // When this is enabled, we send all the words from the beginning of the
830  // audio for the top alternative in every consecutive STREAMING responses.
831  // This is done in order to improve our speaker tags as our models learn to
832  // identify the speakers in the conversation over time.
833  // For non-streaming requests, the diarization results will be provided only
834  // in the top alternative of the FINAL SpeechRecognitionResult.
835  SpeakerDiarizationConfig diarization_config = 9;
836
837  // Maximum number of recognition hypotheses to be returned.
838  // The server may return fewer than `max_alternatives`.
839  // Valid values are `0`-`30`. A value of `0` or `1` will return a maximum of
840  // one. If omitted, will return a maximum of one.
841  int32 max_alternatives = 16;
842}
843
844// Transcription normalization configuration. Use transcription normalization
845// to automatically replace parts of the transcript with phrases of your
846// choosing. For StreamingRecognize, this normalization only applies to stable
847// partial transcripts (stability > 0.8) and final transcripts.
848message TranscriptNormalization {
849  // A single replacement configuration.
850  message Entry {
851    // What to replace. Max length is 100 characters.
852    string search = 1;
853
854    // What to replace with. Max length is 100 characters.
855    string replace = 2;
856
857    // Whether the search is case sensitive.
858    bool case_sensitive = 3;
859  }
860
861  // A list of replacement entries. We will perform replacement with one entry
862  // at a time. For example, the second entry in ["cat" => "dog", "mountain cat"
863  // => "mountain dog"] will never be applied because we will always process the
864  // first entry before it. At most 100 entries.
865  repeated Entry entries = 1;
866}
867
868// Translation configuration. Use to translate the given audio into text for the
869// desired language.
870message TranslationConfig {
871  // Required. The language code to translate to.
872  string target_language = 1 [(google.api.field_behavior) = REQUIRED];
873}
874
875// Provides "hints" to the speech recognizer to favor specific words and phrases
876// in the results. PhraseSets can be specified as an inline resource, or a
877// reference to an existing PhraseSet resource.
878message SpeechAdaptation {
879  // A biasing PhraseSet, which can be either a string referencing the name of
880  // an existing PhraseSets resource, or an inline definition of a PhraseSet.
881  message AdaptationPhraseSet {
882    oneof value {
883      // The name of an existing PhraseSet resource. The user must have read
884      // access to the resource and it must not be deleted.
885      string phrase_set = 1 [(google.api.resource_reference) = {
886        type: "speech.googleapis.com/PhraseSet"
887      }];
888
889      // An inline defined PhraseSet.
890      PhraseSet inline_phrase_set = 2;
891    }
892  }
893
894  // A list of inline or referenced PhraseSets.
895  repeated AdaptationPhraseSet phrase_sets = 1;
896
897  // A list of inline CustomClasses. Existing CustomClass resources can be
898  // referenced directly in a PhraseSet.
899  repeated CustomClass custom_classes = 2;
900}
901
902// Provides information to the Recognizer that specifies how to process the
903// recognition request.
904message RecognitionConfig {
905  // Decoding parameters for audio being sent for recognition.
906  oneof decoding_config {
907    // Automatically detect decoding parameters.
908    // Preferred for supported formats.
909    AutoDetectDecodingConfig auto_decoding_config = 7;
910
911    // Explicitly specified decoding parameters.
912    // Required if using headerless PCM audio (linear16, mulaw, alaw).
913    ExplicitDecodingConfig explicit_decoding_config = 8;
914  }
915
916  // Optional. Which model to use for recognition requests. Select the model
917  // best suited to your domain to get best results.
918  //
919  // Guidance for choosing which model to use can be found in the [Transcription
920  // Models
921  // Documentation](https://cloud.google.com/speech-to-text/v2/docs/transcription-model)
922  // and the models supported in each region can be found in the [Table Of
923  // Supported
924  // Models](https://cloud.google.com/speech-to-text/v2/docs/speech-to-text-supported-languages).
925  string model = 9 [(google.api.field_behavior) = OPTIONAL];
926
927  // Optional. The language of the supplied audio as a
928  // [BCP-47](https://www.rfc-editor.org/rfc/bcp/bcp47.txt) language tag.
929  // Language tags are normalized to BCP-47 before they are used eg "en-us"
930  // becomes "en-US".
931  //
932  // Supported languages for each model are listed in the [Table of Supported
933  // Models](https://cloud.google.com/speech-to-text/v2/docs/speech-to-text-supported-languages).
934  //
935  // If additional languages are provided, recognition result will contain
936  // recognition in the most likely language detected. The recognition result
937  // will include the language tag of the language detected in the audio.
938  repeated string language_codes = 10 [(google.api.field_behavior) = OPTIONAL];
939
940  // Speech recognition features to enable.
941  RecognitionFeatures features = 2;
942
943  // Speech adaptation context that weights recognizer predictions for specific
944  // words and phrases.
945  SpeechAdaptation adaptation = 6;
946
947  // Optional. Use transcription normalization to automatically replace parts of
948  // the transcript with phrases of your choosing. For StreamingRecognize, this
949  // normalization only applies to stable partial transcripts (stability > 0.8)
950  // and final transcripts.
951  TranscriptNormalization transcript_normalization = 11
952      [(google.api.field_behavior) = OPTIONAL];
953
954  // Optional. Optional configuration used to automatically run translation on
955  // the given audio to the desired language for supported models.
956  TranslationConfig translation_config = 15
957      [(google.api.field_behavior) = OPTIONAL];
958}
959
960// Request message for the
961// [Recognize][google.cloud.speech.v2.Speech.Recognize] method. Either
962// `content` or `uri` must be supplied. Supplying both or neither returns
963// [INVALID_ARGUMENT][google.rpc.Code.INVALID_ARGUMENT]. See [content
964// limits](https://cloud.google.com/speech-to-text/quotas#content).
965message RecognizeRequest {
966  // Required. The name of the Recognizer to use during recognition. The
967  // expected format is
968  // `projects/{project}/locations/{location}/recognizers/{recognizer}`. The
969  // {recognizer} segment may be set to `_` to use an empty implicit Recognizer.
970  string recognizer = 3 [
971    (google.api.field_behavior) = REQUIRED,
972    (google.api.resource_reference) = {
973      type: "speech.googleapis.com/Recognizer"
974    }
975  ];
976
977  // Features and audio metadata to use for the Automatic Speech Recognition.
978  // This field in combination with the
979  // [config_mask][google.cloud.speech.v2.RecognizeRequest.config_mask] field
980  // can be used to override parts of the
981  // [default_recognition_config][google.cloud.speech.v2.Recognizer.default_recognition_config]
982  // of the Recognizer resource.
983  RecognitionConfig config = 1;
984
985  // The list of fields in
986  // [config][google.cloud.speech.v2.RecognizeRequest.config] that override the
987  // values in the
988  // [default_recognition_config][google.cloud.speech.v2.Recognizer.default_recognition_config]
989  // of the recognizer during this recognition request. If no mask is provided,
990  // all non-default valued fields in
991  // [config][google.cloud.speech.v2.RecognizeRequest.config] override the
992  // values in the recognizer for this recognition request. If a mask is
993  // provided, only the fields listed in the mask override the config in the
994  // recognizer for this recognition request. If a wildcard (`*`) is provided,
995  // [config][google.cloud.speech.v2.RecognizeRequest.config] completely
996  // overrides and replaces the config in the recognizer for this recognition
997  // request.
998  google.protobuf.FieldMask config_mask = 8;
999
1000  // The audio source, which is either inline content or a Google Cloud
1001  // Storage URI.
1002  oneof audio_source {
1003    // The audio data bytes encoded as specified in
1004    // [RecognitionConfig][google.cloud.speech.v2.RecognitionConfig]. As
1005    // with all bytes fields, proto buffers use a pure binary representation,
1006    // whereas JSON representations use base64.
1007    bytes content = 5;
1008
1009    // URI that points to a file that contains audio data bytes as specified in
1010    // [RecognitionConfig][google.cloud.speech.v2.RecognitionConfig]. The file
1011    // must not be compressed (for example, gzip). Currently, only Google Cloud
1012    // Storage URIs are supported, which must be specified in the following
1013    // format: `gs://bucket_name/object_name` (other URI formats return
1014    // [INVALID_ARGUMENT][google.rpc.Code.INVALID_ARGUMENT]). For more
1015    // information, see [Request
1016    // URIs](https://cloud.google.com/storage/docs/reference-uris).
1017    string uri = 6;
1018  }
1019}
1020
1021// Metadata about the recognition request and response.
1022message RecognitionResponseMetadata {
1023  // When available, billed audio seconds for the corresponding request.
1024  google.protobuf.Duration total_billed_duration = 6;
1025}
1026
1027// Alternative hypotheses (a.k.a. n-best list).
1028message SpeechRecognitionAlternative {
1029  // Transcript text representing the words that the user spoke.
1030  string transcript = 1;
1031
1032  // The confidence estimate between 0.0 and 1.0. A higher number
1033  // indicates an estimated greater likelihood that the recognized words are
1034  // correct. This field is set only for the top alternative of a non-streaming
1035  // result or, of a streaming result where
1036  // [is_final][google.cloud.speech.v2.StreamingRecognitionResult.is_final] is
1037  // set to `true`. This field is not guaranteed to be accurate and users should
1038  // not rely on it to be always provided. The default of 0.0 is a sentinel
1039  // value indicating `confidence` was not set.
1040  float confidence = 2;
1041
1042  // A list of word-specific information for each recognized word.
1043  // When the
1044  // [SpeakerDiarizationConfig][google.cloud.speech.v2.SpeakerDiarizationConfig]
1045  // is set, you will see all the words from the beginning of the audio.
1046  repeated WordInfo words = 3;
1047}
1048
1049// Word-specific information for recognized words.
1050message WordInfo {
1051  // Time offset relative to the beginning of the audio,
1052  // and corresponding to the start of the spoken word.
1053  // This field is only set if
1054  // [enable_word_time_offsets][google.cloud.speech.v2.RecognitionFeatures.enable_word_time_offsets]
1055  // is `true` and only in the top hypothesis. This is an experimental feature
1056  // and the accuracy of the time offset can vary.
1057  google.protobuf.Duration start_offset = 1;
1058
1059  // Time offset relative to the beginning of the audio,
1060  // and corresponding to the end of the spoken word.
1061  // This field is only set if
1062  // [enable_word_time_offsets][google.cloud.speech.v2.RecognitionFeatures.enable_word_time_offsets]
1063  // is `true` and only in the top hypothesis. This is an experimental feature
1064  // and the accuracy of the time offset can vary.
1065  google.protobuf.Duration end_offset = 2;
1066
1067  // The word corresponding to this set of information.
1068  string word = 3;
1069
1070  // The confidence estimate between 0.0 and 1.0. A higher number
1071  // indicates an estimated greater likelihood that the recognized words are
1072  // correct. This field is set only for the top alternative of a non-streaming
1073  // result or, of a streaming result where
1074  // [is_final][google.cloud.speech.v2.StreamingRecognitionResult.is_final] is
1075  // set to `true`. This field is not guaranteed to be accurate and users should
1076  // not rely on it to be always provided. The default of 0.0 is a sentinel
1077  // value indicating `confidence` was not set.
1078  float confidence = 4;
1079
1080  // A distinct label is assigned for every speaker within the audio. This field
1081  // specifies which one of those speakers was detected to have spoken this
1082  // word. `speaker_label` is set if
1083  // [SpeakerDiarizationConfig][google.cloud.speech.v2.SpeakerDiarizationConfig]
1084  // is given and only in the top alternative.
1085  string speaker_label = 6;
1086}
1087
1088// A speech recognition result corresponding to a portion of the audio.
1089message SpeechRecognitionResult {
1090  // May contain one or more recognition hypotheses. These alternatives are
1091  // ordered in terms of accuracy, with the top (first) alternative being the
1092  // most probable, as ranked by the recognizer.
1093  repeated SpeechRecognitionAlternative alternatives = 1;
1094
1095  // For multi-channel audio, this is the channel number corresponding to the
1096  // recognized result for the audio from that channel.
1097  // For `audio_channel_count` = `N`, its output values can range from `1` to
1098  // `N`.
1099  int32 channel_tag = 2;
1100
1101  // Time offset of the end of this result relative to the beginning of the
1102  // audio.
1103  google.protobuf.Duration result_end_offset = 4;
1104
1105  // Output only. The [BCP-47](https://www.rfc-editor.org/rfc/bcp/bcp47.txt)
1106  // language tag of the language in this result. This language code was
1107  // detected to have the most likelihood of being spoken in the audio.
1108  string language_code = 5 [(google.api.field_behavior) = OUTPUT_ONLY];
1109}
1110
1111// Response message for the
1112// [Recognize][google.cloud.speech.v2.Speech.Recognize] method.
1113message RecognizeResponse {
1114  // Sequential list of transcription results corresponding to sequential
1115  // portions of audio.
1116  repeated SpeechRecognitionResult results = 3;
1117
1118  // Metadata about the recognition.
1119  RecognitionResponseMetadata metadata = 2;
1120}
1121
1122// Available recognition features specific to streaming recognition requests.
1123message StreamingRecognitionFeatures {
1124  // Events that a timeout can be set on for voice activity.
1125  message VoiceActivityTimeout {
1126    // Duration to timeout the stream if no speech begins. If this is set and
1127    // no speech is detected in this duration at the start of the stream, the
1128    // server will close the stream.
1129    google.protobuf.Duration speech_start_timeout = 1;
1130
1131    // Duration to timeout the stream after speech ends. If this is set and no
1132    // speech is detected in this duration after speech was detected, the server
1133    // will close the stream.
1134    google.protobuf.Duration speech_end_timeout = 2;
1135  }
1136
1137  // If `true`, responses with voice activity speech events will be returned as
1138  // they are detected.
1139  bool enable_voice_activity_events = 1;
1140
1141  // Whether or not to stream interim results to the client. If set to true,
1142  // interim results will be streamed to the client. Otherwise, only the final
1143  // response will be streamed back.
1144  bool interim_results = 2;
1145
1146  // If set, the server will automatically close the stream after the specified
1147  // duration has elapsed after the last VOICE_ACTIVITY speech event has been
1148  // sent. The field `voice_activity_events` must also be set to true.
1149  VoiceActivityTimeout voice_activity_timeout = 3;
1150}
1151
1152// Provides configuration information for the StreamingRecognize request.
1153message StreamingRecognitionConfig {
1154  // Required. Features and audio metadata to use for the Automatic Speech
1155  // Recognition. This field in combination with the
1156  // [config_mask][google.cloud.speech.v2.StreamingRecognitionConfig.config_mask]
1157  // field can be used to override parts of the
1158  // [default_recognition_config][google.cloud.speech.v2.Recognizer.default_recognition_config]
1159  // of the Recognizer resource.
1160  RecognitionConfig config = 1 [(google.api.field_behavior) = REQUIRED];
1161
1162  // The list of fields in
1163  // [config][google.cloud.speech.v2.StreamingRecognitionConfig.config] that
1164  // override the values in the
1165  // [default_recognition_config][google.cloud.speech.v2.Recognizer.default_recognition_config]
1166  // of the recognizer during this recognition request. If no mask is provided,
1167  // all non-default valued fields in
1168  // [config][google.cloud.speech.v2.StreamingRecognitionConfig.config] override
1169  // the values in the Recognizer for this recognition request. If a mask is
1170  // provided, only the fields listed in the mask override the config in the
1171  // Recognizer for this recognition request. If a wildcard (`*`) is provided,
1172  // [config][google.cloud.speech.v2.StreamingRecognitionConfig.config]
1173  // completely overrides and replaces the config in the recognizer for this
1174  // recognition request.
1175  google.protobuf.FieldMask config_mask = 3;
1176
1177  // Speech recognition features to enable specific to streaming audio
1178  // recognition requests.
1179  StreamingRecognitionFeatures streaming_features = 2;
1180}
1181
1182// Request message for the
1183// [StreamingRecognize][google.cloud.speech.v2.Speech.StreamingRecognize]
1184// method. Multiple
1185// [StreamingRecognizeRequest][google.cloud.speech.v2.StreamingRecognizeRequest]
1186// messages are sent in one call.
1187//
1188// If the [Recognizer][google.cloud.speech.v2.Recognizer] referenced by
1189// [recognizer][google.cloud.speech.v2.StreamingRecognizeRequest.recognizer]
1190// contains a fully specified request configuration then the stream may only
1191// contain messages with only
1192// [audio][google.cloud.speech.v2.StreamingRecognizeRequest.audio] set.
1193//
1194// Otherwise the first message must contain a
1195// [recognizer][google.cloud.speech.v2.StreamingRecognizeRequest.recognizer] and
1196// a
1197// [streaming_config][google.cloud.speech.v2.StreamingRecognizeRequest.streaming_config]
1198// message that together fully specify the request configuration and must not
1199// contain [audio][google.cloud.speech.v2.StreamingRecognizeRequest.audio]. All
1200// subsequent messages must only have
1201// [audio][google.cloud.speech.v2.StreamingRecognizeRequest.audio] set.
1202message StreamingRecognizeRequest {
1203  // Required. The name of the Recognizer to use during recognition. The
1204  // expected format is
1205  // `projects/{project}/locations/{location}/recognizers/{recognizer}`. The
1206  // {recognizer} segment may be set to `_` to use an empty implicit Recognizer.
1207  string recognizer = 3 [
1208    (google.api.field_behavior) = REQUIRED,
1209    (google.api.resource_reference) = {
1210      type: "speech.googleapis.com/Recognizer"
1211    }
1212  ];
1213
1214  oneof streaming_request {
1215    // StreamingRecognitionConfig to be used in this recognition attempt.
1216    // If provided, it will override the default RecognitionConfig stored in the
1217    // Recognizer.
1218    StreamingRecognitionConfig streaming_config = 6;
1219
1220    // Inline audio bytes to be Recognized.
1221    // Maximum size for this field is 15 KB per request.
1222    bytes audio = 5;
1223  }
1224}
1225
1226// Request message for the
1227// [BatchRecognize][google.cloud.speech.v2.Speech.BatchRecognize]
1228// method.
1229message BatchRecognizeRequest {
1230  // Possible processing strategies for batch requests.
1231  enum ProcessingStrategy {
1232    // Default value for the processing strategy. The request is processed as
1233    // soon as its received.
1234    PROCESSING_STRATEGY_UNSPECIFIED = 0;
1235
1236    // If selected, processes the request during lower utilization periods for a
1237    // price discount. The request is fulfilled within 24 hours.
1238    DYNAMIC_BATCHING = 1;
1239  }
1240
1241  // Required. The name of the Recognizer to use during recognition. The
1242  // expected format is
1243  // `projects/{project}/locations/{location}/recognizers/{recognizer}`. The
1244  // {recognizer} segment may be set to `_` to use an empty implicit Recognizer.
1245  string recognizer = 1 [
1246    (google.api.field_behavior) = REQUIRED,
1247    (google.api.resource_reference) = {
1248      type: "speech.googleapis.com/Recognizer"
1249    }
1250  ];
1251
1252  // Features and audio metadata to use for the Automatic Speech Recognition.
1253  // This field in combination with the
1254  // [config_mask][google.cloud.speech.v2.BatchRecognizeRequest.config_mask]
1255  // field can be used to override parts of the
1256  // [default_recognition_config][google.cloud.speech.v2.Recognizer.default_recognition_config]
1257  // of the Recognizer resource.
1258  RecognitionConfig config = 4;
1259
1260  // The list of fields in
1261  // [config][google.cloud.speech.v2.BatchRecognizeRequest.config] that override
1262  // the values in the
1263  // [default_recognition_config][google.cloud.speech.v2.Recognizer.default_recognition_config]
1264  // of the recognizer during this recognition request. If no mask is provided,
1265  // all given fields in
1266  // [config][google.cloud.speech.v2.BatchRecognizeRequest.config] override the
1267  // values in the recognizer for this recognition request. If a mask is
1268  // provided, only the fields listed in the mask override the config in the
1269  // recognizer for this recognition request. If a wildcard (`*`) is provided,
1270  // [config][google.cloud.speech.v2.BatchRecognizeRequest.config] completely
1271  // overrides and replaces the config in the recognizer for this recognition
1272  // request.
1273  google.protobuf.FieldMask config_mask = 5;
1274
1275  // Audio files with file metadata for ASR.
1276  // The maximum number of files allowed to be specified is 5.
1277  repeated BatchRecognizeFileMetadata files = 3;
1278
1279  // Configuration options for where to output the transcripts of each file.
1280  RecognitionOutputConfig recognition_output_config = 6;
1281
1282  // Processing strategy to use for this request.
1283  ProcessingStrategy processing_strategy = 7;
1284}
1285
1286// Output configurations for Cloud Storage.
1287message GcsOutputConfig {
1288  // The Cloud Storage URI prefix with which recognition results will be
1289  // written.
1290  string uri = 1;
1291}
1292
1293// Output configurations for inline response.
1294message InlineOutputConfig {}
1295
1296// Output configurations for serialized `BatchRecognizeResults` protos.
1297message NativeOutputFileFormatConfig {}
1298
1299// Output configurations for [WebVTT](https://www.w3.org/TR/webvtt1/) formatted
1300// subtitle file.
1301message VttOutputFileFormatConfig {}
1302
1303// Output configurations [SubRip
1304// Text](https://www.matroska.org/technical/subtitles.html#srt-subtitles)
1305// formatted subtitle file.
1306message SrtOutputFileFormatConfig {}
1307
1308// Configuration for the format of the results stored to `output`.
1309message OutputFormatConfig {
1310  // Configuration for the native output format. If this field is set or if no
1311  // other output format field is set then transcripts will be written to the
1312  // sink in the native format.
1313  NativeOutputFileFormatConfig native = 1;
1314
1315  // Configuration for the vtt output format. If this field is set then
1316  // transcripts will be written to the sink in the vtt format.
1317  VttOutputFileFormatConfig vtt = 2;
1318
1319  // Configuration for the srt output format. If this field is set then
1320  // transcripts will be written to the sink in the srt format.
1321  SrtOutputFileFormatConfig srt = 3;
1322}
1323
1324// Configuration options for the output(s) of recognition.
1325message RecognitionOutputConfig {
1326  oneof output {
1327    // If this message is populated, recognition results are written to the
1328    // provided Google Cloud Storage URI.
1329    GcsOutputConfig gcs_output_config = 1;
1330
1331    // If this message is populated, recognition results are provided in the
1332    // [BatchRecognizeResponse][google.cloud.speech.v2.BatchRecognizeResponse]
1333    // message of the Operation when completed. This is only supported when
1334    // calling [BatchRecognize][google.cloud.speech.v2.Speech.BatchRecognize]
1335    // with just one audio file.
1336    InlineOutputConfig inline_response_config = 2;
1337  }
1338
1339  // Optional. Configuration for the format of the results stored to `output`.
1340  // If unspecified transcripts will be written in the `NATIVE` format only.
1341  OutputFormatConfig output_format_config = 3
1342      [(google.api.field_behavior) = OPTIONAL];
1343}
1344
1345// Response message for
1346// [BatchRecognize][google.cloud.speech.v2.Speech.BatchRecognize] that is
1347// packaged into a longrunning [Operation][google.longrunning.Operation].
1348message BatchRecognizeResponse {
1349  // Map from filename to the final result for that file.
1350  map<string, BatchRecognizeFileResult> results = 1;
1351
1352  // When available, billed audio seconds for the corresponding request.
1353  google.protobuf.Duration total_billed_duration = 2;
1354}
1355
1356// Output type for Cloud Storage of BatchRecognize transcripts. Though this
1357// proto isn't returned in this API anywhere, the Cloud Storage transcripts will
1358// be this proto serialized and should be parsed as such.
1359message BatchRecognizeResults {
1360  // Sequential list of transcription results corresponding to sequential
1361  // portions of audio.
1362  repeated SpeechRecognitionResult results = 1;
1363
1364  // Metadata about the recognition.
1365  RecognitionResponseMetadata metadata = 2;
1366}
1367
1368// Final results written to Cloud Storage.
1369message CloudStorageResult {
1370  // The Cloud Storage URI to which recognition results were written.
1371  string uri = 1;
1372
1373  // The Cloud Storage URI to which recognition results were written as VTT
1374  // formatted captions. This is populated only when `VTT` output is requested.
1375  string vtt_format_uri = 2;
1376
1377  // The Cloud Storage URI to which recognition results were written as SRT
1378  // formatted captions. This is populated only when `SRT` output is requested.
1379  string srt_format_uri = 3;
1380}
1381
1382// Final results returned inline in the recognition response.
1383message InlineResult {
1384  // The transcript for the audio file.
1385  BatchRecognizeResults transcript = 1;
1386
1387  // The transcript for the audio file as VTT formatted captions. This is
1388  // populated only when `VTT` output is requested.
1389  string vtt_captions = 2;
1390
1391  // The transcript for the audio file as SRT formatted captions. This is
1392  // populated only when `SRT` output is requested.
1393  string srt_captions = 3;
1394}
1395
1396// Final results for a single file.
1397message BatchRecognizeFileResult {
1398  // Error if one was encountered.
1399  google.rpc.Status error = 2;
1400
1401  RecognitionResponseMetadata metadata = 3;
1402
1403  oneof result {
1404    // Recognition results written to Cloud Storage. This is
1405    // populated only when
1406    // [GcsOutputConfig][google.cloud.speech.v2.GcsOutputConfig] is set in
1407    // the
1408    // [RecognitionOutputConfig][[google.cloud.speech.v2.RecognitionOutputConfig].
1409    CloudStorageResult cloud_storage_result = 5;
1410
1411    // Recognition results. This is populated only when
1412    // [InlineOutputConfig][google.cloud.speech.v2.InlineOutputConfig] is set in
1413    // the
1414    // [RecognitionOutputConfig][[google.cloud.speech.v2.RecognitionOutputConfig].
1415    InlineResult inline_result = 6;
1416  }
1417
1418  // Deprecated. Use `cloud_storage_result.native_format_uri` instead.
1419  string uri = 1 [deprecated = true];
1420
1421  // Deprecated. Use `inline_result.transcript` instead.
1422  BatchRecognizeResults transcript = 4 [deprecated = true];
1423}
1424
1425// Metadata about transcription for a single file (for example, progress
1426// percent).
1427message BatchRecognizeTranscriptionMetadata {
1428  // How much of the file has been transcribed so far.
1429  int32 progress_percent = 1;
1430
1431  // Error if one was encountered.
1432  google.rpc.Status error = 2;
1433
1434  // The Cloud Storage URI to which recognition results will be written.
1435  string uri = 3;
1436}
1437
1438// Operation metadata for
1439// [BatchRecognize][google.cloud.speech.v2.Speech.BatchRecognize].
1440message BatchRecognizeMetadata {
1441  // Map from provided filename to the transcription metadata for that file.
1442  map<string, BatchRecognizeTranscriptionMetadata> transcription_metadata = 1;
1443}
1444
1445// Metadata about a single file in a batch for BatchRecognize.
1446message BatchRecognizeFileMetadata {
1447  // The audio source, which is a Google Cloud Storage URI.
1448  oneof audio_source {
1449    // Cloud Storage URI for the audio file.
1450    string uri = 1;
1451  }
1452
1453  // Features and audio metadata to use for the Automatic Speech Recognition.
1454  // This field in combination with the
1455  // [config_mask][google.cloud.speech.v2.BatchRecognizeFileMetadata.config_mask]
1456  // field can be used to override parts of the
1457  // [default_recognition_config][google.cloud.speech.v2.Recognizer.default_recognition_config]
1458  // of the Recognizer resource as well as the
1459  // [config][google.cloud.speech.v2.BatchRecognizeRequest.config] at the
1460  // request level.
1461  RecognitionConfig config = 4;
1462
1463  // The list of fields in
1464  // [config][google.cloud.speech.v2.BatchRecognizeFileMetadata.config] that
1465  // override the values in the
1466  // [default_recognition_config][google.cloud.speech.v2.Recognizer.default_recognition_config]
1467  // of the recognizer during this recognition request. If no mask is provided,
1468  // all non-default valued fields in
1469  // [config][google.cloud.speech.v2.BatchRecognizeFileMetadata.config] override
1470  // the values in the recognizer for this recognition request. If a mask is
1471  // provided, only the fields listed in the mask override the config in the
1472  // recognizer for this recognition request. If a wildcard (`*`) is provided,
1473  // [config][google.cloud.speech.v2.BatchRecognizeFileMetadata.config]
1474  // completely overrides and replaces the config in the recognizer for this
1475  // recognition request.
1476  google.protobuf.FieldMask config_mask = 5;
1477}
1478
1479// A streaming speech recognition result corresponding to a portion of the audio
1480// that is currently being processed.
1481message StreamingRecognitionResult {
1482  // May contain one or more recognition hypotheses. These alternatives are
1483  // ordered in terms of accuracy, with the top (first) alternative being the
1484  // most probable, as ranked by the recognizer.
1485  repeated SpeechRecognitionAlternative alternatives = 1;
1486
1487  // If `false`, this
1488  // [StreamingRecognitionResult][google.cloud.speech.v2.StreamingRecognitionResult]
1489  // represents an interim result that may change. If `true`, this is the final
1490  // time the speech service will return this particular
1491  // [StreamingRecognitionResult][google.cloud.speech.v2.StreamingRecognitionResult],
1492  // the recognizer will not return any further hypotheses for this portion of
1493  // the transcript and corresponding audio.
1494  bool is_final = 2;
1495
1496  // An estimate of the likelihood that the recognizer will not change its guess
1497  // about this interim result. Values range from 0.0 (completely unstable)
1498  // to 1.0 (completely stable). This field is only provided for interim results
1499  // ([is_final][google.cloud.speech.v2.StreamingRecognitionResult.is_final]=`false`).
1500  // The default of 0.0 is a sentinel value indicating `stability` was not set.
1501  float stability = 3;
1502
1503  // Time offset of the end of this result relative to the beginning of the
1504  // audio.
1505  google.protobuf.Duration result_end_offset = 4;
1506
1507  // For multi-channel audio, this is the channel number corresponding to the
1508  // recognized result for the audio from that channel.
1509  // For
1510  // `audio_channel_count` = `N`, its output values can range from `1` to `N`.
1511  int32 channel_tag = 5;
1512
1513  // Output only. The [BCP-47](https://www.rfc-editor.org/rfc/bcp/bcp47.txt)
1514  // language tag of the language in this result. This language code was
1515  // detected to have the most likelihood of being spoken in the audio.
1516  string language_code = 6 [(google.api.field_behavior) = OUTPUT_ONLY];
1517}
1518
1519// `StreamingRecognizeResponse` is the only message returned to the client by
1520// `StreamingRecognize`. A series of zero or more `StreamingRecognizeResponse`
1521// messages are streamed back to the client. If there is no recognizable
1522// audio then no messages are streamed back to the client.
1523//
1524// Here are some examples of `StreamingRecognizeResponse`s that might
1525// be returned while processing audio:
1526//
1527// 1. results { alternatives { transcript: "tube" } stability: 0.01 }
1528//
1529// 2. results { alternatives { transcript: "to be a" } stability: 0.01 }
1530//
1531// 3. results { alternatives { transcript: "to be" } stability: 0.9 }
1532//    results { alternatives { transcript: " or not to be" } stability: 0.01 }
1533//
1534// 4. results { alternatives { transcript: "to be or not to be"
1535//                             confidence: 0.92 }
1536//              alternatives { transcript: "to bee or not to bee" }
1537//              is_final: true }
1538//
1539// 5. results { alternatives { transcript: " that's" } stability: 0.01 }
1540//
1541// 6. results { alternatives { transcript: " that is" } stability: 0.9 }
1542//    results { alternatives { transcript: " the question" } stability: 0.01 }
1543//
1544// 7. results { alternatives { transcript: " that is the question"
1545//                             confidence: 0.98 }
1546//              alternatives { transcript: " that was the question" }
1547//              is_final: true }
1548//
1549// Notes:
1550//
1551// - Only two of the above responses #4 and #7 contain final results; they are
1552//   indicated by `is_final: true`. Concatenating these together generates the
1553//   full transcript: "to be or not to be that is the question".
1554//
1555// - The others contain interim `results`. #3 and #6 contain two interim
1556//   `results`: the first portion has a high stability and is less likely to
1557//   change; the second portion has a low stability and is very likely to
1558//   change. A UI designer might choose to show only high stability `results`.
1559//
1560// - The specific `stability` and `confidence` values shown above are only for
1561//   illustrative purposes. Actual values may vary.
1562//
1563// - In each response, only one of these fields will be set:
1564//     `error`,
1565//     `speech_event_type`, or
1566//     one or more (repeated) `results`.
1567message StreamingRecognizeResponse {
1568  // Indicates the type of speech event.
1569  enum SpeechEventType {
1570    // No speech event specified.
1571    SPEECH_EVENT_TYPE_UNSPECIFIED = 0;
1572
1573    // This event indicates that the server has detected the end of the user's
1574    // speech utterance and expects no additional speech. Therefore, the server
1575    // will not process additional audio and will close the gRPC bidirectional
1576    // stream. This event is only sent if there was a force cutoff due to
1577    // silence being detected early. This event is only available through the
1578    // `latest_short` [model][google.cloud.speech.v2.Recognizer.model].
1579    END_OF_SINGLE_UTTERANCE = 1;
1580
1581    // This event indicates that the server has detected the beginning of human
1582    // voice activity in the stream. This event can be returned multiple times
1583    // if speech starts and stops repeatedly throughout the stream. This event
1584    // is only sent if `voice_activity_events` is set to true.
1585    SPEECH_ACTIVITY_BEGIN = 2;
1586
1587    // This event indicates that the server has detected the end of human voice
1588    // activity in the stream. This event can be returned multiple times if
1589    // speech starts and stops repeatedly throughout the stream. This event is
1590    // only sent if `voice_activity_events` is set to true.
1591    SPEECH_ACTIVITY_END = 3;
1592  }
1593
1594  // This repeated list contains zero or more results that
1595  // correspond to consecutive portions of the audio currently being processed.
1596  // It contains zero or one
1597  // [is_final][google.cloud.speech.v2.StreamingRecognitionResult.is_final]=`true`
1598  // result (the newly settled portion), followed by zero or more
1599  // [is_final][google.cloud.speech.v2.StreamingRecognitionResult.is_final]=`false`
1600  // results (the interim results).
1601  repeated StreamingRecognitionResult results = 6;
1602
1603  // Indicates the type of speech event.
1604  SpeechEventType speech_event_type = 3;
1605
1606  // Time offset between the beginning of the audio and event emission.
1607  google.protobuf.Duration speech_event_offset = 7;
1608
1609  // Metadata about the recognition.
1610  RecognitionResponseMetadata metadata = 5;
1611}
1612
1613// Message representing the config for the Speech-to-Text API. This includes an
1614// optional [KMS key](https://cloud.google.com/kms/docs/resource-hierarchy#keys)
1615// with which incoming data will be encrypted.
1616message Config {
1617  option (google.api.resource) = {
1618    type: "speech.googleapis.com/Config"
1619    pattern: "projects/{project}/locations/{location}/config"
1620  };
1621
1622  // Output only. Identifier. The name of the config resource. There is exactly
1623  // one config resource per project per location. The expected format is
1624  // `projects/{project}/locations/{location}/config`.
1625  string name = 1 [
1626    (google.api.field_behavior) = OUTPUT_ONLY,
1627    (google.api.field_behavior) = IDENTIFIER
1628  ];
1629
1630  // Optional. An optional [KMS key
1631  // name](https://cloud.google.com/kms/docs/resource-hierarchy#keys) that if
1632  // present, will be used to encrypt Speech-to-Text resources at-rest. Updating
1633  // this key will not encrypt existing resources using this key; only new
1634  // resources will be encrypted using this key. The expected format is
1635  // `projects/{project}/locations/{location}/keyRings/{key_ring}/cryptoKeys/{crypto_key}`.
1636  string kms_key_name = 2 [
1637    (google.api.field_behavior) = OPTIONAL,
1638    (google.api.resource_reference) = {
1639      type: "cloudkms.googleapis.com/CryptoKey"
1640    }
1641  ];
1642
1643  // Output only. The most recent time this resource was modified.
1644  google.protobuf.Timestamp update_time = 3
1645      [(google.api.field_behavior) = OUTPUT_ONLY];
1646}
1647
1648// Request message for the
1649// [GetConfig][google.cloud.speech.v2.Speech.GetConfig] method.
1650message GetConfigRequest {
1651  // Required. The name of the config to retrieve. There is exactly one config
1652  // resource per project per location. The expected format is
1653  // `projects/{project}/locations/{location}/config`.
1654  string name = 1 [
1655    (google.api.field_behavior) = REQUIRED,
1656    (google.api.resource_reference) = { type: "speech.googleapis.com/Config" }
1657  ];
1658}
1659
1660// Request message for the
1661// [UpdateConfig][google.cloud.speech.v2.Speech.UpdateConfig] method.
1662message UpdateConfigRequest {
1663  // Required. The config to update.
1664  //
1665  // The config's `name` field is used to identify the config to be updated.
1666  // The expected format is `projects/{project}/locations/{location}/config`.
1667  Config config = 1 [(google.api.field_behavior) = REQUIRED];
1668
1669  // The list of fields to be updated.
1670  google.protobuf.FieldMask update_mask = 2;
1671}
1672
1673// CustomClass for biasing in speech recognition. Used to define a set of words
1674// or phrases that represents a common concept or theme likely to appear in your
1675// audio, for example a list of passenger ship names.
1676message CustomClass {
1677  option (google.api.resource) = {
1678    type: "speech.googleapis.com/CustomClass"
1679    pattern: "projects/{project}/locations/{location}/customClasses/{custom_class}"
1680    style: DECLARATIVE_FRIENDLY
1681  };
1682
1683  // An item of the class.
1684  message ClassItem {
1685    // The class item's value.
1686    string value = 1;
1687  }
1688
1689  // Set of states that define the lifecycle of a CustomClass.
1690  enum State {
1691    // Unspecified state.  This is only used/useful for distinguishing
1692    // unset values.
1693    STATE_UNSPECIFIED = 0;
1694
1695    // The normal and active state.
1696    ACTIVE = 2;
1697
1698    // This CustomClass has been deleted.
1699    DELETED = 4;
1700  }
1701
1702  // Output only. Identifier. The resource name of the CustomClass.
1703  // Format:
1704  // `projects/{project}/locations/{location}/customClasses/{custom_class}`.
1705  string name = 1 [
1706    (google.api.field_behavior) = OUTPUT_ONLY,
1707    (google.api.field_behavior) = IDENTIFIER
1708  ];
1709
1710  // Output only. System-assigned unique identifier for the CustomClass.
1711  string uid = 2 [(google.api.field_behavior) = OUTPUT_ONLY];
1712
1713  // Optional. User-settable, human-readable name for the CustomClass. Must be
1714  // 63 characters or less.
1715  string display_name = 4 [(google.api.field_behavior) = OPTIONAL];
1716
1717  // A collection of class items.
1718  repeated ClassItem items = 5;
1719
1720  // Output only. The CustomClass lifecycle state.
1721  State state = 15 [(google.api.field_behavior) = OUTPUT_ONLY];
1722
1723  // Output only. Creation time.
1724  google.protobuf.Timestamp create_time = 6
1725      [(google.api.field_behavior) = OUTPUT_ONLY];
1726
1727  // Output only. The most recent time this resource was modified.
1728  google.protobuf.Timestamp update_time = 7
1729      [(google.api.field_behavior) = OUTPUT_ONLY];
1730
1731  // Output only. The time at which this resource was requested for deletion.
1732  google.protobuf.Timestamp delete_time = 8
1733      [(google.api.field_behavior) = OUTPUT_ONLY];
1734
1735  // Output only. The time at which this resource will be purged.
1736  google.protobuf.Timestamp expire_time = 9
1737      [(google.api.field_behavior) = OUTPUT_ONLY];
1738
1739  // Optional. Allows users to store small amounts of arbitrary data.
1740  // Both the key and the value must be 63 characters or less each.
1741  // At most 100 annotations.
1742  map<string, string> annotations = 10 [(google.api.field_behavior) = OPTIONAL];
1743
1744  // Output only. This checksum is computed by the server based on the value of
1745  // other fields. This may be sent on update, undelete, and delete requests to
1746  // ensure the client has an up-to-date value before proceeding.
1747  string etag = 11 [(google.api.field_behavior) = OUTPUT_ONLY];
1748
1749  // Output only. Whether or not this CustomClass is in the process of being
1750  // updated.
1751  bool reconciling = 12 [(google.api.field_behavior) = OUTPUT_ONLY];
1752
1753  // Output only. The [KMS key
1754  // name](https://cloud.google.com/kms/docs/resource-hierarchy#keys) with which
1755  // the CustomClass is encrypted. The expected format is
1756  // `projects/{project}/locations/{location}/keyRings/{key_ring}/cryptoKeys/{crypto_key}`.
1757  string kms_key_name = 13 [
1758    (google.api.field_behavior) = OUTPUT_ONLY,
1759    (google.api.resource_reference) = {
1760      type: "cloudkms.googleapis.com/CryptoKey"
1761    }
1762  ];
1763
1764  // Output only. The [KMS key version
1765  // name](https://cloud.google.com/kms/docs/resource-hierarchy#key_versions)
1766  // with which the CustomClass is encrypted. The expected format is
1767  // `projects/{project}/locations/{location}/keyRings/{key_ring}/cryptoKeys/{crypto_key}/cryptoKeyVersions/{crypto_key_version}`.
1768  string kms_key_version_name = 14 [
1769    (google.api.field_behavior) = OUTPUT_ONLY,
1770    (google.api.resource_reference) = {
1771      type: "cloudkms.googleapis.com/CryptoKeyVersion"
1772    }
1773  ];
1774}
1775
1776// PhraseSet for biasing in speech recognition. A PhraseSet is used to provide
1777// "hints" to the speech recognizer to favor specific words and phrases in the
1778// results.
1779message PhraseSet {
1780  option (google.api.resource) = {
1781    type: "speech.googleapis.com/PhraseSet"
1782    pattern: "projects/{project}/locations/{location}/phraseSets/{phrase_set}"
1783    style: DECLARATIVE_FRIENDLY
1784  };
1785
1786  // A Phrase contains words and phrase "hints" so that the speech recognition
1787  // is more likely to recognize them. This can be used to improve the accuracy
1788  // for specific words and phrases, for example, if specific commands are
1789  // typically spoken by the user. This can also be used to add additional words
1790  // to the vocabulary of the recognizer.
1791  //
1792  // List items can also include CustomClass references containing groups of
1793  // words that represent common concepts that occur in natural language.
1794  message Phrase {
1795    // The phrase itself.
1796    string value = 1;
1797
1798    // Hint Boost. Overrides the boost set at the phrase set level.
1799    // Positive value will increase the probability that a specific phrase will
1800    // be recognized over other similar sounding phrases. The higher the boost,
1801    // the higher the chance of false positive recognition as well. Negative
1802    // boost values would correspond to anti-biasing. Anti-biasing is not
1803    // enabled, so negative boost values will return an error. Boost values must
1804    // be between 0 and 20. Any values outside that range will return an error.
1805    // We recommend using a binary search approach to finding the optimal value
1806    // for your use case as well as adding phrases both with and without boost
1807    // to your requests.
1808    float boost = 2;
1809  }
1810
1811  // Set of states that define the lifecycle of a PhraseSet.
1812  enum State {
1813    // Unspecified state.  This is only used/useful for distinguishing
1814    // unset values.
1815    STATE_UNSPECIFIED = 0;
1816
1817    // The normal and active state.
1818    ACTIVE = 2;
1819
1820    // This PhraseSet has been deleted.
1821    DELETED = 4;
1822  }
1823
1824  // Output only. Identifier. The resource name of the PhraseSet.
1825  // Format: `projects/{project}/locations/{location}/phraseSets/{phrase_set}`.
1826  string name = 1 [
1827    (google.api.field_behavior) = OUTPUT_ONLY,
1828    (google.api.field_behavior) = IDENTIFIER
1829  ];
1830
1831  // Output only. System-assigned unique identifier for the PhraseSet.
1832  string uid = 2 [(google.api.field_behavior) = OUTPUT_ONLY];
1833
1834  // A list of word and phrases.
1835  repeated Phrase phrases = 3;
1836
1837  // Hint Boost. Positive value will increase the probability that a specific
1838  // phrase will be recognized over other similar sounding phrases. The higher
1839  // the boost, the higher the chance of false positive recognition as well.
1840  // Valid `boost` values are between 0 (exclusive) and 20. We recommend using a
1841  // binary search approach to finding the optimal value for your use case as
1842  // well as adding phrases both with and without boost to your requests.
1843  float boost = 4;
1844
1845  // User-settable, human-readable name for the PhraseSet. Must be 63
1846  // characters or less.
1847  string display_name = 5;
1848
1849  // Output only. The PhraseSet lifecycle state.
1850  State state = 15 [(google.api.field_behavior) = OUTPUT_ONLY];
1851
1852  // Output only. Creation time.
1853  google.protobuf.Timestamp create_time = 6
1854      [(google.api.field_behavior) = OUTPUT_ONLY];
1855
1856  // Output only. The most recent time this resource was modified.
1857  google.protobuf.Timestamp update_time = 7
1858      [(google.api.field_behavior) = OUTPUT_ONLY];
1859
1860  // Output only. The time at which this resource was requested for deletion.
1861  google.protobuf.Timestamp delete_time = 8
1862      [(google.api.field_behavior) = OUTPUT_ONLY];
1863
1864  // Output only. The time at which this resource will be purged.
1865  google.protobuf.Timestamp expire_time = 9
1866      [(google.api.field_behavior) = OUTPUT_ONLY];
1867
1868  // Allows users to store small amounts of arbitrary data.
1869  // Both the key and the value must be 63 characters or less each.
1870  // At most 100 annotations.
1871  map<string, string> annotations = 10;
1872
1873  // Output only. This checksum is computed by the server based on the value of
1874  // other fields. This may be sent on update, undelete, and delete requests to
1875  // ensure the client has an up-to-date value before proceeding.
1876  string etag = 11 [(google.api.field_behavior) = OUTPUT_ONLY];
1877
1878  // Output only. Whether or not this PhraseSet is in the process of being
1879  // updated.
1880  bool reconciling = 12 [(google.api.field_behavior) = OUTPUT_ONLY];
1881
1882  // Output only. The [KMS key
1883  // name](https://cloud.google.com/kms/docs/resource-hierarchy#keys) with which
1884  // the PhraseSet is encrypted. The expected format is
1885  // `projects/{project}/locations/{location}/keyRings/{key_ring}/cryptoKeys/{crypto_key}`.
1886  string kms_key_name = 13 [
1887    (google.api.field_behavior) = OUTPUT_ONLY,
1888    (google.api.resource_reference) = {
1889      type: "cloudkms.googleapis.com/CryptoKey"
1890    }
1891  ];
1892
1893  // Output only. The [KMS key version
1894  // name](https://cloud.google.com/kms/docs/resource-hierarchy#key_versions)
1895  // with which the PhraseSet is encrypted. The expected format is
1896  // `projects/{project}/locations/{location}/keyRings/{key_ring}/cryptoKeys/{crypto_key}/cryptoKeyVersions/{crypto_key_version}`.
1897  string kms_key_version_name = 14 [
1898    (google.api.field_behavior) = OUTPUT_ONLY,
1899    (google.api.resource_reference) = {
1900      type: "cloudkms.googleapis.com/CryptoKeyVersion"
1901    }
1902  ];
1903}
1904
1905// Request message for the
1906// [CreateCustomClass][google.cloud.speech.v2.Speech.CreateCustomClass] method.
1907message CreateCustomClassRequest {
1908  // Required. The CustomClass to create.
1909  CustomClass custom_class = 1 [(google.api.field_behavior) = REQUIRED];
1910
1911  // If set, validate the request and preview the CustomClass, but do not
1912  // actually create it.
1913  bool validate_only = 2;
1914
1915  // The ID to use for the CustomClass, which will become the final component of
1916  // the CustomClass's resource name.
1917  //
1918  // This value should be 4-63 characters, and valid characters
1919  // are /[a-z][0-9]-/.
1920  string custom_class_id = 3;
1921
1922  // Required. The project and location where this CustomClass will be created.
1923  // The expected format is `projects/{project}/locations/{location}`.
1924  string parent = 4 [
1925    (google.api.field_behavior) = REQUIRED,
1926    (google.api.resource_reference) = {
1927      child_type: "speech.googleapis.com/CustomClass"
1928    }
1929  ];
1930}
1931
1932// Request message for the
1933// [ListCustomClasses][google.cloud.speech.v2.Speech.ListCustomClasses] method.
1934message ListCustomClassesRequest {
1935  // Required. The project and location of CustomClass resources to list. The
1936  // expected format is `projects/{project}/locations/{location}`.
1937  string parent = 1 [
1938    (google.api.field_behavior) = REQUIRED,
1939    (google.api.resource_reference) = {
1940      type: "locations.googleapis.com/Location"
1941    }
1942  ];
1943
1944  // Number of results per requests. A valid page_size ranges from 0 to 100
1945  // inclusive. If the page_size is zero or unspecified, a page size of 5 will
1946  // be chosen. If the page size exceeds 100, it will be coerced down to 100.
1947  // Note that a call might return fewer results than the requested page size.
1948  int32 page_size = 2;
1949
1950  // A page token, received from a previous
1951  // [ListCustomClasses][google.cloud.speech.v2.Speech.ListCustomClasses] call.
1952  // Provide this to retrieve the subsequent page.
1953  //
1954  // When paginating, all other parameters provided to
1955  // [ListCustomClasses][google.cloud.speech.v2.Speech.ListCustomClasses] must
1956  // match the call that provided the page token.
1957  string page_token = 3;
1958
1959  // Whether, or not, to show resources that have been deleted.
1960  bool show_deleted = 4;
1961}
1962
1963// Response message for the
1964// [ListCustomClasses][google.cloud.speech.v2.Speech.ListCustomClasses] method.
1965message ListCustomClassesResponse {
1966  // The list of requested CustomClasses.
1967  repeated CustomClass custom_classes = 1;
1968
1969  // A token, which can be sent as
1970  // [page_token][google.cloud.speech.v2.ListCustomClassesRequest.page_token] to
1971  // retrieve the next page. If this field is omitted, there are no subsequent
1972  // pages. This token expires after 72 hours.
1973  string next_page_token = 2;
1974}
1975
1976// Request message for the
1977// [GetCustomClass][google.cloud.speech.v2.Speech.GetCustomClass] method.
1978message GetCustomClassRequest {
1979  // Required. The name of the CustomClass to retrieve. The expected format is
1980  // `projects/{project}/locations/{location}/customClasses/{custom_class}`.
1981  string name = 1 [
1982    (google.api.field_behavior) = REQUIRED,
1983    (google.api.resource_reference) = {
1984      type: "speech.googleapis.com/CustomClass"
1985    }
1986  ];
1987}
1988
1989// Request message for the
1990// [UpdateCustomClass][google.cloud.speech.v2.Speech.UpdateCustomClass] method.
1991message UpdateCustomClassRequest {
1992  // Required. The CustomClass to update.
1993  //
1994  // The CustomClass's `name` field is used to identify the CustomClass to
1995  // update. Format:
1996  // `projects/{project}/locations/{location}/customClasses/{custom_class}`.
1997  CustomClass custom_class = 1 [(google.api.field_behavior) = REQUIRED];
1998
1999  // The list of fields to be updated. If empty, all fields are considered for
2000  // update.
2001  google.protobuf.FieldMask update_mask = 2;
2002
2003  // If set, validate the request and preview the updated CustomClass, but do
2004  // not actually update it.
2005  bool validate_only = 4;
2006}
2007
2008// Request message for the
2009// [DeleteCustomClass][google.cloud.speech.v2.Speech.DeleteCustomClass] method.
2010message DeleteCustomClassRequest {
2011  // Required. The name of the CustomClass to delete.
2012  // Format:
2013  // `projects/{project}/locations/{location}/customClasses/{custom_class}`
2014  string name = 1 [
2015    (google.api.field_behavior) = REQUIRED,
2016    (google.api.resource_reference) = {
2017      type: "speech.googleapis.com/CustomClass"
2018    }
2019  ];
2020
2021  // If set, validate the request and preview the deleted CustomClass, but do
2022  // not actually delete it.
2023  bool validate_only = 2;
2024
2025  // If set to true, and the CustomClass is not found, the request will succeed
2026  // and  be a no-op (no Operation is recorded in this case).
2027  bool allow_missing = 4;
2028
2029  // This checksum is computed by the server based on the value of other
2030  // fields. This may be sent on update, undelete, and delete requests to ensure
2031  // the client has an up-to-date value before proceeding.
2032  string etag = 3;
2033}
2034
2035// Request message for the
2036// [UndeleteCustomClass][google.cloud.speech.v2.Speech.UndeleteCustomClass]
2037// method.
2038message UndeleteCustomClassRequest {
2039  // Required. The name of the CustomClass to undelete.
2040  // Format:
2041  // `projects/{project}/locations/{location}/customClasses/{custom_class}`
2042  string name = 1 [
2043    (google.api.field_behavior) = REQUIRED,
2044    (google.api.resource_reference) = {
2045      type: "speech.googleapis.com/CustomClass"
2046    }
2047  ];
2048
2049  // If set, validate the request and preview the undeleted CustomClass, but do
2050  // not actually undelete it.
2051  bool validate_only = 3;
2052
2053  // This checksum is computed by the server based on the value of other
2054  // fields. This may be sent on update, undelete, and delete requests to ensure
2055  // the client has an up-to-date value before proceeding.
2056  string etag = 4;
2057}
2058
2059// Request message for the
2060// [CreatePhraseSet][google.cloud.speech.v2.Speech.CreatePhraseSet] method.
2061message CreatePhraseSetRequest {
2062  // Required. The PhraseSet to create.
2063  PhraseSet phrase_set = 1 [(google.api.field_behavior) = REQUIRED];
2064
2065  // If set, validate the request and preview the PhraseSet, but do not
2066  // actually create it.
2067  bool validate_only = 2;
2068
2069  // The ID to use for the PhraseSet, which will become the final component of
2070  // the PhraseSet's resource name.
2071  //
2072  // This value should be 4-63 characters, and valid characters
2073  // are /[a-z][0-9]-/.
2074  string phrase_set_id = 3;
2075
2076  // Required. The project and location where this PhraseSet will be created.
2077  // The expected format is `projects/{project}/locations/{location}`.
2078  string parent = 4 [
2079    (google.api.field_behavior) = REQUIRED,
2080    (google.api.resource_reference) = {
2081      child_type: "speech.googleapis.com/PhraseSet"
2082    }
2083  ];
2084}
2085
2086// Request message for the
2087// [ListPhraseSets][google.cloud.speech.v2.Speech.ListPhraseSets] method.
2088message ListPhraseSetsRequest {
2089  // Required. The project and location of PhraseSet resources to list. The
2090  // expected format is `projects/{project}/locations/{location}`.
2091  string parent = 1 [
2092    (google.api.field_behavior) = REQUIRED,
2093    (google.api.resource_reference) = {
2094      type: "locations.googleapis.com/Location"
2095    }
2096  ];
2097
2098  // The maximum number of PhraseSets to return. The service may return fewer
2099  // than this value. If unspecified, at most 5 PhraseSets will be returned.
2100  // The maximum value is 100; values above 100 will be coerced to 100.
2101  int32 page_size = 2;
2102
2103  // A page token, received from a previous
2104  // [ListPhraseSets][google.cloud.speech.v2.Speech.ListPhraseSets] call.
2105  // Provide this to retrieve the subsequent page.
2106  //
2107  // When paginating, all other parameters provided to
2108  // [ListPhraseSets][google.cloud.speech.v2.Speech.ListPhraseSets] must match
2109  // the call that provided the page token.
2110  string page_token = 3;
2111
2112  // Whether, or not, to show resources that have been deleted.
2113  bool show_deleted = 4;
2114}
2115
2116// Response message for the
2117// [ListPhraseSets][google.cloud.speech.v2.Speech.ListPhraseSets] method.
2118message ListPhraseSetsResponse {
2119  // The list of requested PhraseSets.
2120  repeated PhraseSet phrase_sets = 1;
2121
2122  // A token, which can be sent as
2123  // [page_token][google.cloud.speech.v2.ListPhraseSetsRequest.page_token] to
2124  // retrieve the next page. If this field is omitted, there are no subsequent
2125  // pages. This token expires after 72 hours.
2126  string next_page_token = 2;
2127}
2128
2129// Request message for the
2130// [GetPhraseSet][google.cloud.speech.v2.Speech.GetPhraseSet] method.
2131message GetPhraseSetRequest {
2132  // Required. The name of the PhraseSet to retrieve. The expected format is
2133  // `projects/{project}/locations/{location}/phraseSets/{phrase_set}`.
2134  string name = 1 [
2135    (google.api.field_behavior) = REQUIRED,
2136    (google.api.resource_reference) = {
2137      type: "speech.googleapis.com/PhraseSet"
2138    }
2139  ];
2140}
2141
2142// Request message for the
2143// [UpdatePhraseSet][google.cloud.speech.v2.Speech.UpdatePhraseSet] method.
2144message UpdatePhraseSetRequest {
2145  // Required. The PhraseSet to update.
2146  //
2147  // The PhraseSet's `name` field is used to identify the PhraseSet to update.
2148  // Format: `projects/{project}/locations/{location}/phraseSets/{phrase_set}`.
2149  PhraseSet phrase_set = 1 [(google.api.field_behavior) = REQUIRED];
2150
2151  // The list of fields to update. If empty, all non-default valued fields are
2152  // considered for update. Use `*` to update the entire PhraseSet resource.
2153  google.protobuf.FieldMask update_mask = 2;
2154
2155  // If set, validate the request and preview the updated PhraseSet, but do not
2156  // actually update it.
2157  bool validate_only = 4;
2158}
2159
2160// Request message for the
2161// [DeletePhraseSet][google.cloud.speech.v2.Speech.DeletePhraseSet] method.
2162message DeletePhraseSetRequest {
2163  // Required. The name of the PhraseSet to delete.
2164  // Format: `projects/{project}/locations/{location}/phraseSets/{phrase_set}`
2165  string name = 1 [
2166    (google.api.field_behavior) = REQUIRED,
2167    (google.api.resource_reference) = {
2168      type: "speech.googleapis.com/PhraseSet"
2169    }
2170  ];
2171
2172  // If set, validate the request and preview the deleted PhraseSet, but do not
2173  // actually delete it.
2174  bool validate_only = 2;
2175
2176  // If set to true, and the PhraseSet is not found, the request will succeed
2177  // and  be a no-op (no Operation is recorded in this case).
2178  bool allow_missing = 4;
2179
2180  // This checksum is computed by the server based on the value of other
2181  // fields. This may be sent on update, undelete, and delete requests to ensure
2182  // the client has an up-to-date value before proceeding.
2183  string etag = 3;
2184}
2185
2186// Request message for the
2187// [UndeletePhraseSet][google.cloud.speech.v2.Speech.UndeletePhraseSet]
2188// method.
2189message UndeletePhraseSetRequest {
2190  // Required. The name of the PhraseSet to undelete.
2191  // Format: `projects/{project}/locations/{location}/phraseSets/{phrase_set}`
2192  string name = 1 [
2193    (google.api.field_behavior) = REQUIRED,
2194    (google.api.resource_reference) = {
2195      type: "speech.googleapis.com/PhraseSet"
2196    }
2197  ];
2198
2199  // If set, validate the request and preview the undeleted PhraseSet, but do
2200  // not actually undelete it.
2201  bool validate_only = 3;
2202
2203  // This checksum is computed by the server based on the value of other
2204  // fields. This may be sent on update, undelete, and delete requests to ensure
2205  // the client has an up-to-date value before proceeding.
2206  string etag = 4;
2207}
2208