1// Copyright 2023 Google LLC
2//
3// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at
6//
7//     http://www.apache.org/licenses/LICENSE-2.0
8//
9// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS,
11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12// See the License for the specific language governing permissions and
13// limitations under the License.
14
15syntax = "proto3";
16
17package google.cloud.speech.v2;
18
19import "google/api/annotations.proto";
20import "google/api/client.proto";
21import "google/api/field_behavior.proto";
22import "google/api/resource.proto";
23import "google/longrunning/operations.proto";
24import "google/protobuf/duration.proto";
25import "google/protobuf/field_mask.proto";
26import "google/protobuf/timestamp.proto";
27import "google/rpc/status.proto";
28
29option go_package = "cloud.google.com/go/speech/apiv2/speechpb;speechpb";
30option java_multiple_files = true;
31option java_outer_classname = "CloudSpeechProto";
32option java_package = "com.google.cloud.speech.v2";
33option (google.api.resource_definition) = {
34  type: "cloudkms.googleapis.com/CryptoKey"
35  pattern: "projects/{project}/locations/{location}/keyRings/{key_ring}/cryptoKeys/{crypto_key}"
36};
37option (google.api.resource_definition) = {
38  type: "cloudkms.googleapis.com/CryptoKeyVersion"
39  pattern: "projects/{project}/locations/{location}/keyRings/{key_ring}/cryptoKeys/{crypto_key}/cryptoKeyVersions/{crypto_key_version}"
40};
41
42// Enables speech transcription and resource management.
43service Speech {
44  option (google.api.default_host) = "speech.googleapis.com";
45  option (google.api.oauth_scopes) =
46      "https://www.googleapis.com/auth/cloud-platform";
47
48  // Creates a [Recognizer][google.cloud.speech.v2.Recognizer].
49  rpc CreateRecognizer(CreateRecognizerRequest)
50      returns (google.longrunning.Operation) {
51    option (google.api.http) = {
52      post: "/v2/{parent=projects/*/locations/*}/recognizers"
53      body: "recognizer"
54    };
55    option (google.api.method_signature) = "parent,recognizer,recognizer_id";
56    option (google.longrunning.operation_info) = {
57      response_type: "Recognizer"
58      metadata_type: "OperationMetadata"
59    };
60  }
61
62  // Lists Recognizers.
63  rpc ListRecognizers(ListRecognizersRequest)
64      returns (ListRecognizersResponse) {
65    option (google.api.http) = {
66      get: "/v2/{parent=projects/*/locations/*}/recognizers"
67    };
68    option (google.api.method_signature) = "parent";
69  }
70
71  // Returns the requested
72  // [Recognizer][google.cloud.speech.v2.Recognizer]. Fails with
73  // [NOT_FOUND][google.rpc.Code.NOT_FOUND] if the requested Recognizer doesn't
74  // exist.
75  rpc GetRecognizer(GetRecognizerRequest) returns (Recognizer) {
76    option (google.api.http) = {
77      get: "/v2/{name=projects/*/locations/*/recognizers/*}"
78    };
79    option (google.api.method_signature) = "name";
80  }
81
82  // Updates the [Recognizer][google.cloud.speech.v2.Recognizer].
83  rpc UpdateRecognizer(UpdateRecognizerRequest)
84      returns (google.longrunning.Operation) {
85    option (google.api.http) = {
86      patch: "/v2/{recognizer.name=projects/*/locations/*/recognizers/*}"
87      body: "recognizer"
88    };
89    option (google.api.method_signature) = "recognizer,update_mask";
90    option (google.longrunning.operation_info) = {
91      response_type: "Recognizer"
92      metadata_type: "OperationMetadata"
93    };
94  }
95
96  // Deletes the [Recognizer][google.cloud.speech.v2.Recognizer].
97  rpc DeleteRecognizer(DeleteRecognizerRequest)
98      returns (google.longrunning.Operation) {
99    option (google.api.http) = {
100      delete: "/v2/{name=projects/*/locations/*/recognizers/*}"
101    };
102    option (google.api.method_signature) = "name";
103    option (google.longrunning.operation_info) = {
104      response_type: "Recognizer"
105      metadata_type: "OperationMetadata"
106    };
107  }
108
109  // Undeletes the [Recognizer][google.cloud.speech.v2.Recognizer].
110  rpc UndeleteRecognizer(UndeleteRecognizerRequest)
111      returns (google.longrunning.Operation) {
112    option (google.api.http) = {
113      post: "/v2/{name=projects/*/locations/*/recognizers/*}:undelete"
114      body: "*"
115    };
116    option (google.api.method_signature) = "name";
117    option (google.longrunning.operation_info) = {
118      response_type: "Recognizer"
119      metadata_type: "OperationMetadata"
120    };
121  }
122
123  // Performs synchronous Speech recognition: receive results after all audio
124  // has been sent and processed.
125  rpc Recognize(RecognizeRequest) returns (RecognizeResponse) {
126    option (google.api.http) = {
127      post: "/v2/{recognizer=projects/*/locations/*/recognizers/*}:recognize"
128      body: "*"
129    };
130    option (google.api.method_signature) =
131        "recognizer,config,config_mask,content";
132    option (google.api.method_signature) = "recognizer,config,config_mask,uri";
133  }
134
135  // Performs bidirectional streaming speech recognition: receive results while
136  // sending audio. This method is only available via the gRPC API (not REST).
137  rpc StreamingRecognize(stream StreamingRecognizeRequest)
138      returns (stream StreamingRecognizeResponse) {}
139
140  // Performs batch asynchronous speech recognition: send a request with N
141  // audio files and receive a long running operation that can be polled to see
142  // when the transcriptions are finished.
143  rpc BatchRecognize(BatchRecognizeRequest)
144      returns (google.longrunning.Operation) {
145    option (google.api.http) = {
146      post: "/v2/{recognizer=projects/*/locations/*/recognizers/*}:batchRecognize"
147      body: "*"
148    };
149    option (google.api.method_signature) =
150        "recognizer,config,config_mask,files";
151    option (google.longrunning.operation_info) = {
152      response_type: "BatchRecognizeResponse"
153      metadata_type: "OperationMetadata"
154    };
155  }
156
157  // Returns the requested [Config][google.cloud.speech.v2.Config].
158  rpc GetConfig(GetConfigRequest) returns (Config) {
159    option (google.api.http) = {
160      get: "/v2/{name=projects/*/locations/*/config}"
161    };
162    option (google.api.method_signature) = "name";
163  }
164
165  // Updates the [Config][google.cloud.speech.v2.Config].
166  rpc UpdateConfig(UpdateConfigRequest) returns (Config) {
167    option (google.api.http) = {
168      patch: "/v2/{config.name=projects/*/locations/*/config}"
169      body: "config"
170    };
171    option (google.api.method_signature) = "config,update_mask";
172  }
173
174  // Creates a [CustomClass][google.cloud.speech.v2.CustomClass].
175  rpc CreateCustomClass(CreateCustomClassRequest)
176      returns (google.longrunning.Operation) {
177    option (google.api.http) = {
178      post: "/v2/{parent=projects/*/locations/*}/customClasses"
179      body: "custom_class"
180    };
181    option (google.api.method_signature) =
182        "parent,custom_class,custom_class_id";
183    option (google.longrunning.operation_info) = {
184      response_type: "CustomClass"
185      metadata_type: "OperationMetadata"
186    };
187  }
188
189  // Lists CustomClasses.
190  rpc ListCustomClasses(ListCustomClassesRequest)
191      returns (ListCustomClassesResponse) {
192    option (google.api.http) = {
193      get: "/v2/{parent=projects/*/locations/*}/customClasses"
194    };
195    option (google.api.method_signature) = "parent";
196  }
197
198  // Returns the requested
199  // [CustomClass][google.cloud.speech.v2.CustomClass].
200  rpc GetCustomClass(GetCustomClassRequest) returns (CustomClass) {
201    option (google.api.http) = {
202      get: "/v2/{name=projects/*/locations/*/customClasses/*}"
203    };
204    option (google.api.method_signature) = "name";
205  }
206
207  // Updates the [CustomClass][google.cloud.speech.v2.CustomClass].
208  rpc UpdateCustomClass(UpdateCustomClassRequest)
209      returns (google.longrunning.Operation) {
210    option (google.api.http) = {
211      patch: "/v2/{custom_class.name=projects/*/locations/*/customClasses/*}"
212      body: "custom_class"
213    };
214    option (google.api.method_signature) = "custom_class,update_mask";
215    option (google.longrunning.operation_info) = {
216      response_type: "CustomClass"
217      metadata_type: "OperationMetadata"
218    };
219  }
220
221  // Deletes the [CustomClass][google.cloud.speech.v2.CustomClass].
222  rpc DeleteCustomClass(DeleteCustomClassRequest)
223      returns (google.longrunning.Operation) {
224    option (google.api.http) = {
225      delete: "/v2/{name=projects/*/locations/*/customClasses/*}"
226    };
227    option (google.api.method_signature) = "name";
228    option (google.longrunning.operation_info) = {
229      response_type: "CustomClass"
230      metadata_type: "OperationMetadata"
231    };
232  }
233
234  // Undeletes the [CustomClass][google.cloud.speech.v2.CustomClass].
235  rpc UndeleteCustomClass(UndeleteCustomClassRequest)
236      returns (google.longrunning.Operation) {
237    option (google.api.http) = {
238      post: "/v2/{name=projects/*/locations/*/customClasses/*}:undelete"
239      body: "*"
240    };
241    option (google.api.method_signature) = "name";
242    option (google.longrunning.operation_info) = {
243      response_type: "CustomClass"
244      metadata_type: "OperationMetadata"
245    };
246  }
247
248  // Creates a [PhraseSet][google.cloud.speech.v2.PhraseSet].
249  rpc CreatePhraseSet(CreatePhraseSetRequest)
250      returns (google.longrunning.Operation) {
251    option (google.api.http) = {
252      post: "/v2/{parent=projects/*/locations/*}/phraseSets"
253      body: "phrase_set"
254    };
255    option (google.api.method_signature) = "parent,phrase_set,phrase_set_id";
256    option (google.longrunning.operation_info) = {
257      response_type: "PhraseSet"
258      metadata_type: "OperationMetadata"
259    };
260  }
261
262  // Lists PhraseSets.
263  rpc ListPhraseSets(ListPhraseSetsRequest) returns (ListPhraseSetsResponse) {
264    option (google.api.http) = {
265      get: "/v2/{parent=projects/*/locations/*}/phraseSets"
266    };
267    option (google.api.method_signature) = "parent";
268  }
269
270  // Returns the requested
271  // [PhraseSet][google.cloud.speech.v2.PhraseSet].
272  rpc GetPhraseSet(GetPhraseSetRequest) returns (PhraseSet) {
273    option (google.api.http) = {
274      get: "/v2/{name=projects/*/locations/*/phraseSets/*}"
275    };
276    option (google.api.method_signature) = "name";
277  }
278
279  // Updates the [PhraseSet][google.cloud.speech.v2.PhraseSet].
280  rpc UpdatePhraseSet(UpdatePhraseSetRequest)
281      returns (google.longrunning.Operation) {
282    option (google.api.http) = {
283      patch: "/v2/{phrase_set.name=projects/*/locations/*/phraseSets/*}"
284      body: "phrase_set"
285    };
286    option (google.api.method_signature) = "phrase_set,update_mask";
287    option (google.longrunning.operation_info) = {
288      response_type: "PhraseSet"
289      metadata_type: "OperationMetadata"
290    };
291  }
292
293  // Deletes the [PhraseSet][google.cloud.speech.v2.PhraseSet].
294  rpc DeletePhraseSet(DeletePhraseSetRequest)
295      returns (google.longrunning.Operation) {
296    option (google.api.http) = {
297      delete: "/v2/{name=projects/*/locations/*/phraseSets/*}"
298    };
299    option (google.api.method_signature) = "name";
300    option (google.longrunning.operation_info) = {
301      response_type: "PhraseSet"
302      metadata_type: "OperationMetadata"
303    };
304  }
305
306  // Undeletes the [PhraseSet][google.cloud.speech.v2.PhraseSet].
307  rpc UndeletePhraseSet(UndeletePhraseSetRequest)
308      returns (google.longrunning.Operation) {
309    option (google.api.http) = {
310      post: "/v2/{name=projects/*/locations/*/phraseSets/*}:undelete"
311      body: "*"
312    };
313    option (google.api.method_signature) = "name";
314    option (google.longrunning.operation_info) = {
315      response_type: "PhraseSet"
316      metadata_type: "OperationMetadata"
317    };
318  }
319}
320
321// Request message for the
322// [CreateRecognizer][google.cloud.speech.v2.Speech.CreateRecognizer] method.
323message CreateRecognizerRequest {
324  // Required. The Recognizer to create.
325  Recognizer recognizer = 1 [(google.api.field_behavior) = REQUIRED];
326
327  // If set, validate the request and preview the Recognizer, but do not
328  // actually create it.
329  bool validate_only = 2;
330
331  // The ID to use for the Recognizer, which will become the final component of
332  // the Recognizer's resource name.
333  //
334  // This value should be 4-63 characters, and valid characters
335  // are /[a-z][0-9]-/.
336  string recognizer_id = 3;
337
338  // Required. The project and location where this Recognizer will be created.
339  // The expected format is `projects/{project}/locations/{location}`.
340  string parent = 4 [
341    (google.api.field_behavior) = REQUIRED,
342    (google.api.resource_reference) = {
343      child_type: "speech.googleapis.com/Recognizer"
344    }
345  ];
346}
347
348// Represents the metadata of a long-running operation.
349message OperationMetadata {
350  // The time the operation was created.
351  google.protobuf.Timestamp create_time = 1;
352
353  // The time the operation was last updated.
354  google.protobuf.Timestamp update_time = 2;
355
356  // The resource path for the target of the operation.
357  string resource = 3;
358
359  // The method that triggered the operation.
360  string method = 4;
361
362  // The [KMS key
363  // name](https://cloud.google.com/kms/docs/resource-hierarchy#keys) with which
364  // the content of the Operation is encrypted. The expected format is
365  // `projects/{project}/locations/{location}/keyRings/{key_ring}/cryptoKeys/{crypto_key}`.
366  string kms_key_name = 6 [(google.api.resource_reference) = {
367    type: "cloudkms.googleapis.com/CryptoKey"
368  }];
369
370  // The [KMS key version
371  // name](https://cloud.google.com/kms/docs/resource-hierarchy#key_versions)
372  // with which content of the Operation is encrypted. The expected format is
373  // `projects/{project}/locations/{location}/keyRings/{key_ring}/cryptoKeys/{crypto_key}/cryptoKeyVersions/{crypto_key_version}`.
374  string kms_key_version_name = 7 [(google.api.resource_reference) = {
375    type: "cloudkms.googleapis.com/CryptoKeyVersion"
376  }];
377
378  // The request that spawned the Operation.
379  oneof request {
380    // The BatchRecognizeRequest that spawned the Operation.
381    BatchRecognizeRequest batch_recognize_request = 8;
382
383    // The CreateRecognizerRequest that spawned the Operation.
384    CreateRecognizerRequest create_recognizer_request = 9;
385
386    // The UpdateRecognizerRequest that spawned the Operation.
387    UpdateRecognizerRequest update_recognizer_request = 10;
388
389    // The DeleteRecognizerRequest that spawned the Operation.
390    DeleteRecognizerRequest delete_recognizer_request = 11;
391
392    // The UndeleteRecognizerRequest that spawned the Operation.
393    UndeleteRecognizerRequest undelete_recognizer_request = 12;
394
395    // The CreateCustomClassRequest that spawned the Operation.
396    CreateCustomClassRequest create_custom_class_request = 13;
397
398    // The UpdateCustomClassRequest that spawned the Operation.
399    UpdateCustomClassRequest update_custom_class_request = 14;
400
401    // The DeleteCustomClassRequest that spawned the Operation.
402    DeleteCustomClassRequest delete_custom_class_request = 15;
403
404    // The UndeleteCustomClassRequest that spawned the Operation.
405    UndeleteCustomClassRequest undelete_custom_class_request = 16;
406
407    // The CreatePhraseSetRequest that spawned the Operation.
408    CreatePhraseSetRequest create_phrase_set_request = 17;
409
410    // The UpdatePhraseSetRequest that spawned the Operation.
411    UpdatePhraseSetRequest update_phrase_set_request = 18;
412
413    // The DeletePhraseSetRequest that spawned the Operation.
414    DeletePhraseSetRequest delete_phrase_set_request = 19;
415
416    // The UndeletePhraseSetRequest that spawned the Operation.
417    UndeletePhraseSetRequest undelete_phrase_set_request = 20;
418
419    // The UpdateConfigRequest that spawned the Operation.
420    UpdateConfigRequest update_config_request = 21 [deprecated = true];
421  }
422
423  // The percent progress of the Operation. Values can range from 0-100. If the
424  // value is 100, then the operation is finished.
425  int32 progress_percent = 22;
426
427  // Specific metadata per RPC.
428  oneof metadata {
429    // Metadata specific to the BatchRecognize method.
430    BatchRecognizeMetadata batch_recognize_metadata = 23;
431  }
432}
433
434// Request message for the
435// [ListRecognizers][google.cloud.speech.v2.Speech.ListRecognizers] method.
436message ListRecognizersRequest {
437  // Required. The project and location of Recognizers to list. The expected
438  // format is `projects/{project}/locations/{location}`.
439  string parent = 1 [
440    (google.api.field_behavior) = REQUIRED,
441    (google.api.resource_reference) = {
442      type: "locations.googleapis.com/Location"
443    }
444  ];
445
446  // The maximum number of Recognizers to return. The service may return fewer
447  // than this value. If unspecified, at most 5 Recognizers will be returned.
448  // The maximum value is 100; values above 100 will be coerced to 100.
449  int32 page_size = 2;
450
451  // A page token, received from a previous
452  // [ListRecognizers][google.cloud.speech.v2.Speech.ListRecognizers] call.
453  // Provide this to retrieve the subsequent page.
454  //
455  // When paginating, all other parameters provided to
456  // [ListRecognizers][google.cloud.speech.v2.Speech.ListRecognizers] must match
457  // the call that provided the page token.
458  string page_token = 3;
459
460  // Whether, or not, to show resources that have been deleted.
461  bool show_deleted = 4;
462}
463
464// Response message for the
465// [ListRecognizers][google.cloud.speech.v2.Speech.ListRecognizers] method.
466message ListRecognizersResponse {
467  // The list of requested Recognizers.
468  repeated Recognizer recognizers = 1;
469
470  // A token, which can be sent as
471  // [page_token][google.cloud.speech.v2.ListRecognizersRequest.page_token] to
472  // retrieve the next page. If this field is omitted, there are no subsequent
473  // pages. This token expires after 72 hours.
474  string next_page_token = 2;
475}
476
477// Request message for the
478// [GetRecognizer][google.cloud.speech.v2.Speech.GetRecognizer] method.
479message GetRecognizerRequest {
480  // Required. The name of the Recognizer to retrieve. The expected format is
481  // `projects/{project}/locations/{location}/recognizers/{recognizer}`.
482  string name = 1 [
483    (google.api.field_behavior) = REQUIRED,
484    (google.api.resource_reference) = {
485      type: "speech.googleapis.com/Recognizer"
486    }
487  ];
488}
489
490// Request message for the
491// [UpdateRecognizer][google.cloud.speech.v2.Speech.UpdateRecognizer] method.
492message UpdateRecognizerRequest {
493  // Required. The Recognizer to update.
494  //
495  // The Recognizer's `name` field is used to identify the Recognizer to update.
496  // Format: `projects/{project}/locations/{location}/recognizers/{recognizer}`.
497  Recognizer recognizer = 1 [(google.api.field_behavior) = REQUIRED];
498
499  // The list of fields to update. If empty, all non-default valued fields are
500  // considered for update. Use `*` to update the entire Recognizer resource.
501  google.protobuf.FieldMask update_mask = 2;
502
503  // If set, validate the request and preview the updated Recognizer, but do not
504  // actually update it.
505  bool validate_only = 4;
506}
507
508// Request message for the
509// [DeleteRecognizer][google.cloud.speech.v2.Speech.DeleteRecognizer] method.
510message DeleteRecognizerRequest {
511  // Required. The name of the Recognizer to delete.
512  // Format: `projects/{project}/locations/{location}/recognizers/{recognizer}`
513  string name = 1 [
514    (google.api.field_behavior) = REQUIRED,
515    (google.api.resource_reference) = {
516      type: "speech.googleapis.com/Recognizer"
517    }
518  ];
519
520  // If set, validate the request and preview the deleted Recognizer, but do not
521  // actually delete it.
522  bool validate_only = 2;
523
524  // If set to true, and the Recognizer is not found, the request will succeed
525  // and  be a no-op (no Operation is recorded in this case).
526  bool allow_missing = 4;
527
528  // This checksum is computed by the server based on the value of other
529  // fields. This may be sent on update, undelete, and delete requests to ensure
530  // the client has an up-to-date value before proceeding.
531  string etag = 3;
532}
533
534// Request message for the
535// [UndeleteRecognizer][google.cloud.speech.v2.Speech.UndeleteRecognizer]
536// method.
537message UndeleteRecognizerRequest {
538  // Required. The name of the Recognizer to undelete.
539  // Format: `projects/{project}/locations/{location}/recognizers/{recognizer}`
540  string name = 1 [
541    (google.api.field_behavior) = REQUIRED,
542    (google.api.resource_reference) = {
543      type: "speech.googleapis.com/Recognizer"
544    }
545  ];
546
547  // If set, validate the request and preview the undeleted Recognizer, but do
548  // not actually undelete it.
549  bool validate_only = 3;
550
551  // This checksum is computed by the server based on the value of other
552  // fields. This may be sent on update, undelete, and delete requests to ensure
553  // the client has an up-to-date value before proceeding.
554  string etag = 4;
555}
556
557// A Recognizer message. Stores recognition configuration and metadata.
558message Recognizer {
559  option (google.api.resource) = {
560    type: "speech.googleapis.com/Recognizer"
561    pattern: "projects/{project}/locations/{location}/recognizers/{recognizer}"
562    style: DECLARATIVE_FRIENDLY
563  };
564
565  // Set of states that define the lifecycle of a Recognizer.
566  enum State {
567    // The default value. This value is used if the state is omitted.
568    STATE_UNSPECIFIED = 0;
569
570    // The Recognizer is active and ready for use.
571    ACTIVE = 2;
572
573    // This Recognizer has been deleted.
574    DELETED = 4;
575  }
576
577  // Output only. The resource name of the Recognizer.
578  // Format: `projects/{project}/locations/{location}/recognizers/{recognizer}`.
579  string name = 1 [(google.api.field_behavior) = OUTPUT_ONLY];
580
581  // Output only. System-assigned unique identifier for the Recognizer.
582  string uid = 2 [(google.api.field_behavior) = OUTPUT_ONLY];
583
584  // User-settable, human-readable name for the Recognizer. Must be 63
585  // characters or less.
586  string display_name = 3;
587
588  // Required. Which model to use for recognition requests. Select the model
589  // best suited to your domain to get best results.
590  //
591  // Guidance for choosing which model to use can be found in the [Transcription
592  // Models
593  // Documentation](https://cloud.google.com/speech-to-text/v2/docs/transcription-model)
594  // and the models supported in each region can be found in the [Table Of
595  // Supported
596  // Models](https://cloud.google.com/speech-to-text/v2/docs/speech-to-text-supported-languages).
597  string model = 4 [(google.api.field_behavior) = REQUIRED];
598
599  // Required. The language of the supplied audio as a
600  // [BCP-47](https://www.rfc-editor.org/rfc/bcp/bcp47.txt) language tag.
601  //
602  // Supported languages for each model are listed in the [Table of Supported
603  // Models](https://cloud.google.com/speech-to-text/v2/docs/speech-to-text-supported-languages).
604  //
605  // If additional languages are provided, recognition result will contain
606  // recognition in the most likely language detected. The recognition result
607  // will include the language tag of the language detected in the audio.
608  // When you create or update a Recognizer, these values are
609  // stored in normalized BCP-47 form. For example, "en-us" is stored as
610  // "en-US".
611  repeated string language_codes = 17 [(google.api.field_behavior) = REQUIRED];
612
613  // Default configuration to use for requests with this Recognizer.
614  // This can be overwritten by inline configuration in the
615  // [RecognizeRequest.config][google.cloud.speech.v2.RecognizeRequest.config]
616  // field.
617  RecognitionConfig default_recognition_config = 6;
618
619  // Allows users to store small amounts of arbitrary data.
620  // Both the key and the value must be 63 characters or less each.
621  // At most 100 annotations.
622  map<string, string> annotations = 7;
623
624  // Output only. The Recognizer lifecycle state.
625  State state = 8 [(google.api.field_behavior) = OUTPUT_ONLY];
626
627  // Output only. Creation time.
628  google.protobuf.Timestamp create_time = 9
629      [(google.api.field_behavior) = OUTPUT_ONLY];
630
631  // Output only. The most recent time this Recognizer was modified.
632  google.protobuf.Timestamp update_time = 10
633      [(google.api.field_behavior) = OUTPUT_ONLY];
634
635  // Output only. The time at which this Recognizer was requested for deletion.
636  google.protobuf.Timestamp delete_time = 11
637      [(google.api.field_behavior) = OUTPUT_ONLY];
638
639  // Output only. The time at which this Recognizer will be purged.
640  google.protobuf.Timestamp expire_time = 14
641      [(google.api.field_behavior) = OUTPUT_ONLY];
642
643  // Output only. This checksum is computed by the server based on the value of
644  // other fields. This may be sent on update, undelete, and delete requests to
645  // ensure the client has an up-to-date value before proceeding.
646  string etag = 12 [(google.api.field_behavior) = OUTPUT_ONLY];
647
648  // Output only. Whether or not this Recognizer is in the process of being
649  // updated.
650  bool reconciling = 13 [(google.api.field_behavior) = OUTPUT_ONLY];
651
652  // Output only. The [KMS key
653  // name](https://cloud.google.com/kms/docs/resource-hierarchy#keys) with which
654  // the Recognizer is encrypted. The expected format is
655  // `projects/{project}/locations/{location}/keyRings/{key_ring}/cryptoKeys/{crypto_key}`.
656  string kms_key_name = 15 [
657    (google.api.field_behavior) = OUTPUT_ONLY,
658    (google.api.resource_reference) = {
659      type: "cloudkms.googleapis.com/CryptoKey"
660    }
661  ];
662
663  // Output only. The [KMS key version
664  // name](https://cloud.google.com/kms/docs/resource-hierarchy#key_versions)
665  // with which the Recognizer is encrypted. The expected format is
666  // `projects/{project}/locations/{location}/keyRings/{key_ring}/cryptoKeys/{crypto_key}/cryptoKeyVersions/{crypto_key_version}`.
667  string kms_key_version_name = 16 [
668    (google.api.field_behavior) = OUTPUT_ONLY,
669    (google.api.resource_reference) = {
670      type: "cloudkms.googleapis.com/CryptoKeyVersion"
671    }
672  ];
673}
674
675// Automatically detected decoding parameters.
676// Supported for the following encodings:
677//
678// * WAV_LINEAR16: 16-bit signed little-endian PCM samples in a WAV container.
679//
680// * WAV_MULAW: 8-bit companded mulaw samples in a WAV container.
681//
682// * WAV_ALAW: 8-bit companded alaw samples in a WAV container.
683//
684// * RFC4867_5_AMR: AMR frames with an rfc4867.5 header.
685//
686// * RFC4867_5_AMRWB: AMR-WB frames with an rfc4867.5 header.
687//
688// * FLAC: FLAC frames in the "native FLAC" container format.
689//
690// * MP3: MPEG audio frames with optional (ignored) ID3 metadata.
691//
692// * OGG_OPUS: Opus audio frames in an Ogg container.
693//
694// * WEBM_OPUS: Opus audio frames in a WebM container.
695message AutoDetectDecodingConfig {}
696
697// Explicitly specified decoding parameters.
698message ExplicitDecodingConfig {
699  // Supported audio data encodings.
700  enum AudioEncoding {
701    // Default value. This value is unused.
702    AUDIO_ENCODING_UNSPECIFIED = 0;
703
704    // Headerless 16-bit signed little-endian PCM samples.
705    LINEAR16 = 1;
706
707    // Headerless 8-bit companded mulaw samples.
708    MULAW = 2;
709
710    // Headerless 8-bit companded alaw samples.
711    ALAW = 3;
712  }
713
714  // Required. Encoding of the audio data sent for recognition.
715  AudioEncoding encoding = 1 [(google.api.field_behavior) = REQUIRED];
716
717  // Sample rate in Hertz of the audio data sent for recognition. Valid
718  // values are: 8000-48000. 16000 is optimal. For best results, set the
719  // sampling rate of the audio source to 16000 Hz. If that's not possible, use
720  // the native sample rate of the audio source (instead of re-sampling).
721  // Supported for the following encodings:
722  //
723  // * LINEAR16: Headerless 16-bit signed little-endian PCM samples.
724  //
725  // * MULAW: Headerless 8-bit companded mulaw samples.
726  //
727  // * ALAW: Headerless 8-bit companded alaw samples.
728  int32 sample_rate_hertz = 2;
729
730  // Number of channels present in the audio data sent for recognition.
731  // Supported for the following encodings:
732  //
733  // * LINEAR16: Headerless 16-bit signed little-endian PCM samples.
734  //
735  // * MULAW: Headerless 8-bit companded mulaw samples.
736  //
737  // * ALAW: Headerless 8-bit companded alaw samples.
738  //
739  // The maximum allowed value is 8.
740  int32 audio_channel_count = 3;
741}
742
743// Configuration to enable speaker diarization.
744message SpeakerDiarizationConfig {
745  // Required. Minimum number of speakers in the conversation. This range gives
746  // you more flexibility by allowing the system to automatically determine the
747  // correct number of speakers.
748  //
749  // To fix the number of speakers detected in the audio, set
750  // `min_speaker_count` = `max_speaker_count`.
751  int32 min_speaker_count = 2 [(google.api.field_behavior) = REQUIRED];
752
753  // Required. Maximum number of speakers in the conversation. Valid values are:
754  // 1-6. Must be >= `min_speaker_count`. This range gives you more flexibility
755  // by allowing the system to automatically determine the correct number of
756  // speakers.
757  int32 max_speaker_count = 3 [(google.api.field_behavior) = REQUIRED];
758}
759
760// Available recognition features.
761message RecognitionFeatures {
762  // Options for how to recognize multi-channel audio.
763  enum MultiChannelMode {
764    // Default value for the multi-channel mode. If the audio contains
765    // multiple channels, only the first channel will be transcribed; other
766    // channels will be ignored.
767    MULTI_CHANNEL_MODE_UNSPECIFIED = 0;
768
769    // If selected, each channel in the provided audio is transcribed
770    // independently. This cannot be selected if the selected
771    // [model][google.cloud.speech.v2.Recognizer.model] is `latest_short`.
772    SEPARATE_RECOGNITION_PER_CHANNEL = 1;
773  }
774
775  // If set to `true`, the server will attempt to filter out profanities,
776  // replacing all but the initial character in each filtered word with
777  // asterisks, for instance, "f***". If set to `false` or omitted, profanities
778  // won't be filtered out.
779  bool profanity_filter = 1;
780
781  // If `true`, the top result includes a list of words and the start and end
782  // time offsets (timestamps) for those words. If `false`, no word-level time
783  // offset information is returned. The default is `false`.
784  bool enable_word_time_offsets = 2;
785
786  // If `true`, the top result includes a list of words and the confidence for
787  // those words. If `false`, no word-level confidence information is returned.
788  // The default is `false`.
789  bool enable_word_confidence = 3;
790
791  // If `true`, adds punctuation to recognition result hypotheses. This feature
792  // is only available in select languages. The default `false` value does not
793  // add punctuation to result hypotheses.
794  bool enable_automatic_punctuation = 4;
795
796  // The spoken punctuation behavior for the call. If `true`, replaces spoken
797  // punctuation with the corresponding symbols in the request. For example,
798  // "how are you question mark" becomes "how are you?". See
799  // https://cloud.google.com/speech-to-text/docs/spoken-punctuation for
800  // support. If `false`, spoken punctuation is not replaced.
801  bool enable_spoken_punctuation = 14;
802
803  // The spoken emoji behavior for the call. If `true`, adds spoken emoji
804  // formatting for the request. This will replace spoken emojis with the
805  // corresponding Unicode symbols in the final transcript. If `false`, spoken
806  // emojis are not replaced.
807  bool enable_spoken_emojis = 15;
808
809  // Mode for recognizing multi-channel audio.
810  MultiChannelMode multi_channel_mode = 17;
811
812  // Configuration to enable speaker diarization and set additional
813  // parameters to make diarization better suited for your application.
814  // When this is enabled, we send all the words from the beginning of the
815  // audio for the top alternative in every consecutive STREAMING responses.
816  // This is done in order to improve our speaker tags as our models learn to
817  // identify the speakers in the conversation over time.
818  // For non-streaming requests, the diarization results will be provided only
819  // in the top alternative of the FINAL SpeechRecognitionResult.
820  SpeakerDiarizationConfig diarization_config = 9;
821
822  // Maximum number of recognition hypotheses to be returned.
823  // The server may return fewer than `max_alternatives`.
824  // Valid values are `0`-`30`. A value of `0` or `1` will return a maximum of
825  // one. If omitted, will return a maximum of one.
826  int32 max_alternatives = 16;
827}
828
829// Provides "hints" to the speech recognizer to favor specific words and phrases
830// in the results. PhraseSets can be specified as an inline resource, or a
831// reference to an existing PhraseSet resource.
832message SpeechAdaptation {
833  // A biasing PhraseSet, which can be either a string referencing the name of
834  // an existing PhraseSets resource, or an inline definition of a PhraseSet.
835  message AdaptationPhraseSet {
836    oneof value {
837      // The name of an existing PhraseSet resource. The user must have read
838      // access to the resource and it must not be deleted.
839      string phrase_set = 1 [(google.api.resource_reference) = {
840        type: "speech.googleapis.com/PhraseSet"
841      }];
842
843      // An inline defined PhraseSet.
844      PhraseSet inline_phrase_set = 2;
845    }
846  }
847
848  // A list of inline or referenced PhraseSets.
849  repeated AdaptationPhraseSet phrase_sets = 1;
850
851  // A list of inline CustomClasses. Existing CustomClass resources can be
852  // referenced directly in a PhraseSet.
853  repeated CustomClass custom_classes = 2;
854}
855
856// Provides information to the Recognizer that specifies how to process the
857// recognition request.
858message RecognitionConfig {
859  // Decoding parameters for audio being sent for recognition.
860  oneof decoding_config {
861    // Automatically detect decoding parameters.
862    // Preferred for supported formats.
863    AutoDetectDecodingConfig auto_decoding_config = 7;
864
865    // Explicitly specified decoding parameters.
866    // Required if using headerless PCM audio (linear16, mulaw, alaw).
867    ExplicitDecodingConfig explicit_decoding_config = 8;
868  }
869
870  // Speech recognition features to enable.
871  RecognitionFeatures features = 2;
872
873  // Speech adaptation context that weights recognizer predictions for specific
874  // words and phrases.
875  SpeechAdaptation adaptation = 6;
876}
877
878// Request message for the
879// [Recognize][google.cloud.speech.v2.Speech.Recognize] method. Either
880// `content` or `uri` must be supplied. Supplying both or neither returns
881// [INVALID_ARGUMENT][google.rpc.Code.INVALID_ARGUMENT]. See [content
882// limits](https://cloud.google.com/speech-to-text/quotas#content).
883message RecognizeRequest {
884  // Required. The name of the Recognizer to use during recognition. The
885  // expected format is
886  // `projects/{project}/locations/{location}/recognizers/{recognizer}`.
887  string recognizer = 3 [
888    (google.api.field_behavior) = REQUIRED,
889    (google.api.resource_reference) = {
890      type: "speech.googleapis.com/Recognizer"
891    }
892  ];
893
894  // Features and audio metadata to use for the Automatic Speech Recognition.
895  // This field in combination with the
896  // [config_mask][google.cloud.speech.v2.RecognizeRequest.config_mask] field
897  // can be used to override parts of the
898  // [default_recognition_config][google.cloud.speech.v2.Recognizer.default_recognition_config]
899  // of the Recognizer resource.
900  RecognitionConfig config = 1;
901
902  // The list of fields in
903  // [config][google.cloud.speech.v2.RecognizeRequest.config] that override the
904  // values in the
905  // [default_recognition_config][google.cloud.speech.v2.Recognizer.default_recognition_config]
906  // of the recognizer during this recognition request. If no mask is provided,
907  // all non-default valued fields in
908  // [config][google.cloud.speech.v2.RecognizeRequest.config] override the
909  // values in the recognizer for this recognition request. If a mask is
910  // provided, only the fields listed in the mask override the config in the
911  // recognizer for this recognition request. If a wildcard (`*`) is provided,
912  // [config][google.cloud.speech.v2.RecognizeRequest.config] completely
913  // overrides and replaces the config in the recognizer for this recognition
914  // request.
915  google.protobuf.FieldMask config_mask = 8;
916
917  // The audio source, which is either inline content or a Google Cloud
918  // Storage URI.
919  oneof audio_source {
920    // The audio data bytes encoded as specified in
921    // [RecognitionConfig][google.cloud.speech.v2.RecognitionConfig]. As
922    // with all bytes fields, proto buffers use a pure binary representation,
923    // whereas JSON representations use base64.
924    bytes content = 5;
925
926    // URI that points to a file that contains audio data bytes as specified in
927    // [RecognitionConfig][google.cloud.speech.v2.RecognitionConfig]. The file
928    // must not be compressed (for example, gzip). Currently, only Google Cloud
929    // Storage URIs are supported, which must be specified in the following
930    // format: `gs://bucket_name/object_name` (other URI formats return
931    // [INVALID_ARGUMENT][google.rpc.Code.INVALID_ARGUMENT]). For more
932    // information, see [Request
933    // URIs](https://cloud.google.com/storage/docs/reference-uris).
934    string uri = 6;
935  }
936}
937
938// Metadata about the recognition request and response.
939message RecognitionResponseMetadata {
940  // When available, billed audio seconds for the corresponding request.
941  google.protobuf.Duration total_billed_duration = 6;
942}
943
944// Alternative hypotheses (a.k.a. n-best list).
945message SpeechRecognitionAlternative {
946  // Transcript text representing the words that the user spoke.
947  string transcript = 1;
948
949  // The confidence estimate between 0.0 and 1.0. A higher number
950  // indicates an estimated greater likelihood that the recognized words are
951  // correct. This field is set only for the top alternative of a non-streaming
952  // result or, of a streaming result where
953  // [is_final][google.cloud.speech.v2.StreamingRecognitionResult.is_final] is
954  // set to `true`. This field is not guaranteed to be accurate and users should
955  // not rely on it to be always provided. The default of 0.0 is a sentinel
956  // value indicating `confidence` was not set.
957  float confidence = 2;
958
959  // A list of word-specific information for each recognized word.
960  // When the
961  // [SpeakerDiarizationConfig][google.cloud.speech.v2.SpeakerDiarizationConfig]
962  // is set, you will see all the words from the beginning of the audio.
963  repeated WordInfo words = 3;
964}
965
966// Word-specific information for recognized words.
967message WordInfo {
968  // Time offset relative to the beginning of the audio,
969  // and corresponding to the start of the spoken word.
970  // This field is only set if
971  // [enable_word_time_offsets][google.cloud.speech.v2.RecognitionFeatures.enable_word_time_offsets]
972  // is `true` and only in the top hypothesis. This is an experimental feature
973  // and the accuracy of the time offset can vary.
974  google.protobuf.Duration start_offset = 1;
975
976  // Time offset relative to the beginning of the audio,
977  // and corresponding to the end of the spoken word.
978  // This field is only set if
979  // [enable_word_time_offsets][google.cloud.speech.v2.RecognitionFeatures.enable_word_time_offsets]
980  // is `true` and only in the top hypothesis. This is an experimental feature
981  // and the accuracy of the time offset can vary.
982  google.protobuf.Duration end_offset = 2;
983
984  // The word corresponding to this set of information.
985  string word = 3;
986
987  // The confidence estimate between 0.0 and 1.0. A higher number
988  // indicates an estimated greater likelihood that the recognized words are
989  // correct. This field is set only for the top alternative of a non-streaming
990  // result or, of a streaming result where
991  // [is_final][google.cloud.speech.v2.StreamingRecognitionResult.is_final] is
992  // set to `true`. This field is not guaranteed to be accurate and users should
993  // not rely on it to be always provided. The default of 0.0 is a sentinel
994  // value indicating `confidence` was not set.
995  float confidence = 4;
996
997  // A distinct label is assigned for every speaker within the audio. This field
998  // specifies which one of those speakers was detected to have spoken this
999  // word. `speaker_label` is set if
1000  // [SpeakerDiarizationConfig][google.cloud.speech.v2.SpeakerDiarizationConfig]
1001  // is given and only in the top alternative.
1002  string speaker_label = 6;
1003}
1004
1005// A speech recognition result corresponding to a portion of the audio.
1006message SpeechRecognitionResult {
1007  // May contain one or more recognition hypotheses. These alternatives are
1008  // ordered in terms of accuracy, with the top (first) alternative being the
1009  // most probable, as ranked by the recognizer.
1010  repeated SpeechRecognitionAlternative alternatives = 1;
1011
1012  // For multi-channel audio, this is the channel number corresponding to the
1013  // recognized result for the audio from that channel.
1014  // For `audio_channel_count` = `N`, its output values can range from `1` to
1015  // `N`.
1016  int32 channel_tag = 2;
1017
1018  // Time offset of the end of this result relative to the beginning of the
1019  // audio.
1020  google.protobuf.Duration result_end_offset = 4;
1021
1022  // Output only. The [BCP-47](https://www.rfc-editor.org/rfc/bcp/bcp47.txt)
1023  // language tag of the language in this result. This language code was
1024  // detected to have the most likelihood of being spoken in the audio.
1025  string language_code = 5 [(google.api.field_behavior) = OUTPUT_ONLY];
1026}
1027
1028// Response message for the
1029// [Recognize][google.cloud.speech.v2.Speech.Recognize] method.
1030message RecognizeResponse {
1031  // Sequential list of transcription results corresponding to sequential
1032  // portions of audio.
1033  repeated SpeechRecognitionResult results = 3;
1034
1035  // Metadata about the recognition.
1036  RecognitionResponseMetadata metadata = 2;
1037}
1038
1039// Available recognition features specific to streaming recognition requests.
1040message StreamingRecognitionFeatures {
1041  // Events that a timeout can be set on for voice activity.
1042  message VoiceActivityTimeout {
1043    // Duration to timeout the stream if no speech begins. If this is set and
1044    // no speech is detected in this duration at the start of the stream, the
1045    // server will close the stream.
1046    google.protobuf.Duration speech_start_timeout = 1;
1047
1048    // Duration to timeout the stream after speech ends. If this is set and no
1049    // speech is detected in this duration after speech was detected, the server
1050    // will close the stream.
1051    google.protobuf.Duration speech_end_timeout = 2;
1052  }
1053
1054  // If `true`, responses with voice activity speech events will be returned as
1055  // they are detected.
1056  bool enable_voice_activity_events = 1;
1057
1058  // Whether or not to stream interim results to the client. If set to true,
1059  // interim results will be streamed to the client. Otherwise, only the final
1060  // response will be streamed back.
1061  bool interim_results = 2;
1062
1063  // If set, the server will automatically close the stream after the specified
1064  // duration has elapsed after the last VOICE_ACTIVITY speech event has been
1065  // sent. The field `voice_activity_events` must also be set to true.
1066  VoiceActivityTimeout voice_activity_timeout = 3;
1067}
1068
1069// Provides configuration information for the StreamingRecognize request.
1070message StreamingRecognitionConfig {
1071  // Required. Features and audio metadata to use for the Automatic Speech
1072  // Recognition. This field in combination with the
1073  // [config_mask][google.cloud.speech.v2.StreamingRecognitionConfig.config_mask]
1074  // field can be used to override parts of the
1075  // [default_recognition_config][google.cloud.speech.v2.Recognizer.default_recognition_config]
1076  // of the Recognizer resource.
1077  RecognitionConfig config = 1 [(google.api.field_behavior) = REQUIRED];
1078
1079  // The list of fields in
1080  // [config][google.cloud.speech.v2.StreamingRecognitionConfig.config] that
1081  // override the values in the
1082  // [default_recognition_config][google.cloud.speech.v2.Recognizer.default_recognition_config]
1083  // of the recognizer during this recognition request. If no mask is provided,
1084  // all non-default valued fields in
1085  // [config][google.cloud.speech.v2.StreamingRecognitionConfig.config] override
1086  // the values in the Recognizer for this recognition request. If a mask is
1087  // provided, only the fields listed in the mask override the config in the
1088  // Recognizer for this recognition request. If a wildcard (`*`) is provided,
1089  // [config][google.cloud.speech.v2.StreamingRecognitionConfig.config]
1090  // completely overrides and replaces the config in the recognizer for this
1091  // recognition request.
1092  google.protobuf.FieldMask config_mask = 3;
1093
1094  // Speech recognition features to enable specific to streaming audio
1095  // recognition requests.
1096  StreamingRecognitionFeatures streaming_features = 2;
1097}
1098
1099// Request message for the
1100// [StreamingRecognize][google.cloud.speech.v2.Speech.StreamingRecognize]
1101// method. Multiple
1102// [StreamingRecognizeRequest][google.cloud.speech.v2.StreamingRecognizeRequest]
1103// messages are sent. The first message must contain a
1104// [recognizer][google.cloud.speech.v2.StreamingRecognizeRequest.recognizer] and
1105// optionally a
1106// [streaming_config][google.cloud.speech.v2.StreamingRecognizeRequest.streaming_config]
1107// message and must not contain
1108// [audio][google.cloud.speech.v2.StreamingRecognizeRequest.audio]. All
1109// subsequent messages must contain
1110// [audio][google.cloud.speech.v2.StreamingRecognizeRequest.audio] and must not
1111// contain a
1112// [streaming_config][google.cloud.speech.v2.StreamingRecognizeRequest.streaming_config]
1113// message.
1114message StreamingRecognizeRequest {
1115  // Required. Streaming recognition should start with an initial request having
1116  // a `recognizer`. Subsequent requests carry the audio data to be recognized.
1117  //
1118  // The initial request with configuration can be omitted if the Recognizer
1119  // being used has a
1120  // [default_recognition_config][google.cloud.speech.v2.Recognizer.default_recognition_config].
1121  string recognizer = 3 [
1122    (google.api.field_behavior) = REQUIRED,
1123    (google.api.resource_reference) = {
1124      type: "speech.googleapis.com/Recognizer"
1125    }
1126  ];
1127
1128  oneof streaming_request {
1129    // StreamingRecognitionConfig to be used in this recognition attempt.
1130    // If provided, it will override the default RecognitionConfig stored in the
1131    // Recognizer.
1132    StreamingRecognitionConfig streaming_config = 6;
1133
1134    // Inline audio bytes to be Recognized.
1135    // Maximum size for this field is 15 KB per request.
1136    bytes audio = 5;
1137  }
1138}
1139
1140// Request message for the
1141// [BatchRecognize][google.cloud.speech.v2.Speech.BatchRecognize]
1142// method.
1143message BatchRecognizeRequest {
1144  // Possible processing strategies for batch requests.
1145  enum ProcessingStrategy {
1146    // Default value for the processing strategy. The request is processed as
1147    // soon as its received.
1148    PROCESSING_STRATEGY_UNSPECIFIED = 0;
1149
1150    // If selected, processes the request during lower utilization periods for a
1151    // price discount. The request is fulfilled within 24 hours.
1152    DYNAMIC_BATCHING = 1;
1153  }
1154
1155  // Required. Resource name of the recognizer to be used for ASR.
1156  string recognizer = 1 [
1157    (google.api.field_behavior) = REQUIRED,
1158    (google.api.resource_reference) = {
1159      type: "speech.googleapis.com/Recognizer"
1160    }
1161  ];
1162
1163  // Features and audio metadata to use for the Automatic Speech Recognition.
1164  // This field in combination with the
1165  // [config_mask][google.cloud.speech.v2.BatchRecognizeRequest.config_mask]
1166  // field can be used to override parts of the
1167  // [default_recognition_config][google.cloud.speech.v2.Recognizer.default_recognition_config]
1168  // of the Recognizer resource.
1169  RecognitionConfig config = 4;
1170
1171  // The list of fields in
1172  // [config][google.cloud.speech.v2.BatchRecognizeRequest.config] that override
1173  // the values in the
1174  // [default_recognition_config][google.cloud.speech.v2.Recognizer.default_recognition_config]
1175  // of the recognizer during this recognition request. If no mask is provided,
1176  // all given fields in
1177  // [config][google.cloud.speech.v2.BatchRecognizeRequest.config] override the
1178  // values in the recognizer for this recognition request. If a mask is
1179  // provided, only the fields listed in the mask override the config in the
1180  // recognizer for this recognition request. If a wildcard (`*`) is provided,
1181  // [config][google.cloud.speech.v2.BatchRecognizeRequest.config] completely
1182  // overrides and replaces the config in the recognizer for this recognition
1183  // request.
1184  google.protobuf.FieldMask config_mask = 5;
1185
1186  // Audio files with file metadata for ASR.
1187  // The maximum number of files allowed to be specified is 5.
1188  repeated BatchRecognizeFileMetadata files = 3;
1189
1190  // Configuration options for where to output the transcripts of each file.
1191  RecognitionOutputConfig recognition_output_config = 6;
1192
1193  // Processing strategy to use for this request.
1194  ProcessingStrategy processing_strategy = 7;
1195}
1196
1197// Output configurations for Cloud Storage.
1198message GcsOutputConfig {
1199  // The Cloud Storage URI prefix with which recognition results will be
1200  // written.
1201  string uri = 1;
1202}
1203
1204// Output configurations for inline response.
1205message InlineOutputConfig {}
1206
1207// Configuration options for the output(s) of recognition.
1208message RecognitionOutputConfig {
1209  oneof output {
1210    // If this message is populated, recognition results are written to the
1211    // provided Google Cloud Storage URI.
1212    GcsOutputConfig gcs_output_config = 1;
1213
1214    // If this message is populated, recognition results are provided in the
1215    // [BatchRecognizeResponse][google.cloud.speech.v2.BatchRecognizeResponse]
1216    // message of the Operation when completed. This is only supported when
1217    // calling [BatchRecognize][google.cloud.speech.v2.Speech.BatchRecognize]
1218    // with just one audio file.
1219    InlineOutputConfig inline_response_config = 2;
1220  }
1221}
1222
1223// Response message for
1224// [BatchRecognize][google.cloud.speech.v2.Speech.BatchRecognize] that is
1225// packaged into a longrunning [Operation][google.longrunning.Operation].
1226message BatchRecognizeResponse {
1227  // Map from filename to the final result for that file.
1228  map<string, BatchRecognizeFileResult> results = 1;
1229
1230  // When available, billed audio seconds for the corresponding request.
1231  google.protobuf.Duration total_billed_duration = 2;
1232}
1233
1234// Output type for Cloud Storage of BatchRecognize transcripts. Though this
1235// proto isn't returned in this API anywhere, the Cloud Storage transcripts will
1236// be this proto serialized and should be parsed as such.
1237message BatchRecognizeResults {
1238  // Sequential list of transcription results corresponding to sequential
1239  // portions of audio.
1240  repeated SpeechRecognitionResult results = 1;
1241
1242  // Metadata about the recognition.
1243  RecognitionResponseMetadata metadata = 2;
1244}
1245
1246// Final results for a single file.
1247message BatchRecognizeFileResult {
1248  // The Cloud Storage URI to which recognition results were written.
1249  string uri = 1;
1250
1251  // Error if one was encountered.
1252  google.rpc.Status error = 2;
1253
1254  RecognitionResponseMetadata metadata = 3;
1255
1256  // The transcript for the audio file. This is populated only when
1257  // [InlineOutputConfig][google.cloud.speech.v2.InlineOutputConfig] is set in
1258  // the
1259  // [RecognitionOutputConfig][[google.cloud.speech.v2.RecognitionOutputConfig].
1260  BatchRecognizeResults transcript = 4;
1261}
1262
1263// Metadata about transcription for a single file (for example, progress
1264// percent).
1265message BatchRecognizeTranscriptionMetadata {
1266  // How much of the file has been transcribed so far.
1267  int32 progress_percent = 1;
1268
1269  // Error if one was encountered.
1270  google.rpc.Status error = 2;
1271
1272  // The Cloud Storage URI to which recognition results will be written.
1273  string uri = 3;
1274}
1275
1276// Operation metadata for
1277// [BatchRecognize][google.cloud.speech.v2.Speech.BatchRecognize].
1278message BatchRecognizeMetadata {
1279  // Map from provided filename to the transcription metadata for that file.
1280  map<string, BatchRecognizeTranscriptionMetadata> transcription_metadata = 1;
1281}
1282
1283// Metadata about a single file in a batch for BatchRecognize.
1284message BatchRecognizeFileMetadata {
1285  // The audio source, which is a Google Cloud Storage URI.
1286  oneof audio_source {
1287    // Cloud Storage URI for the audio file.
1288    string uri = 1;
1289  }
1290
1291  // Features and audio metadata to use for the Automatic Speech Recognition.
1292  // This field in combination with the
1293  // [config_mask][google.cloud.speech.v2.BatchRecognizeFileMetadata.config_mask]
1294  // field can be used to override parts of the
1295  // [default_recognition_config][google.cloud.speech.v2.Recognizer.default_recognition_config]
1296  // of the Recognizer resource as well as the
1297  // [config][google.cloud.speech.v2.BatchRecognizeRequest.config] at the
1298  // request level.
1299  RecognitionConfig config = 4;
1300
1301  // The list of fields in
1302  // [config][google.cloud.speech.v2.BatchRecognizeFileMetadata.config] that
1303  // override the values in the
1304  // [default_recognition_config][google.cloud.speech.v2.Recognizer.default_recognition_config]
1305  // of the recognizer during this recognition request. If no mask is provided,
1306  // all non-default valued fields in
1307  // [config][google.cloud.speech.v2.BatchRecognizeFileMetadata.config] override
1308  // the values in the recognizer for this recognition request. If a mask is
1309  // provided, only the fields listed in the mask override the config in the
1310  // recognizer for this recognition request. If a wildcard (`*`) is provided,
1311  // [config][google.cloud.speech.v2.BatchRecognizeFileMetadata.config]
1312  // completely overrides and replaces the config in the recognizer for this
1313  // recognition request.
1314  google.protobuf.FieldMask config_mask = 5;
1315}
1316
1317// A streaming speech recognition result corresponding to a portion of the audio
1318// that is currently being processed.
1319message StreamingRecognitionResult {
1320  // May contain one or more recognition hypotheses. These alternatives are
1321  // ordered in terms of accuracy, with the top (first) alternative being the
1322  // most probable, as ranked by the recognizer.
1323  repeated SpeechRecognitionAlternative alternatives = 1;
1324
1325  // If `false`, this
1326  // [StreamingRecognitionResult][google.cloud.speech.v2.StreamingRecognitionResult]
1327  // represents an interim result that may change. If `true`, this is the final
1328  // time the speech service will return this particular
1329  // [StreamingRecognitionResult][google.cloud.speech.v2.StreamingRecognitionResult],
1330  // the recognizer will not return any further hypotheses for this portion of
1331  // the transcript and corresponding audio.
1332  bool is_final = 2;
1333
1334  // An estimate of the likelihood that the recognizer will not change its guess
1335  // about this interim result. Values range from 0.0 (completely unstable)
1336  // to 1.0 (completely stable). This field is only provided for interim results
1337  // ([is_final][google.cloud.speech.v2.StreamingRecognitionResult.is_final]=`false`).
1338  // The default of 0.0 is a sentinel value indicating `stability` was not set.
1339  float stability = 3;
1340
1341  // Time offset of the end of this result relative to the beginning of the
1342  // audio.
1343  google.protobuf.Duration result_end_offset = 4;
1344
1345  // For multi-channel audio, this is the channel number corresponding to the
1346  // recognized result for the audio from that channel.
1347  // For
1348  // `audio_channel_count` = `N`, its output values can range from `1` to `N`.
1349  int32 channel_tag = 5;
1350
1351  // Output only. The [BCP-47](https://www.rfc-editor.org/rfc/bcp/bcp47.txt)
1352  // language tag of the language in this result. This language code was
1353  // detected to have the most likelihood of being spoken in the audio.
1354  string language_code = 6 [(google.api.field_behavior) = OUTPUT_ONLY];
1355}
1356
1357// `StreamingRecognizeResponse` is the only message returned to the client by
1358// `StreamingRecognize`. A series of zero or more `StreamingRecognizeResponse`
1359// messages are streamed back to the client. If there is no recognizable
1360// audio then no messages are streamed back to the client.
1361//
1362// Here are some examples of `StreamingRecognizeResponse`s that might
1363// be returned while processing audio:
1364//
1365// 1. results { alternatives { transcript: "tube" } stability: 0.01 }
1366//
1367// 2. results { alternatives { transcript: "to be a" } stability: 0.01 }
1368//
1369// 3. results { alternatives { transcript: "to be" } stability: 0.9 }
1370//    results { alternatives { transcript: " or not to be" } stability: 0.01 }
1371//
1372// 4. results { alternatives { transcript: "to be or not to be"
1373//                             confidence: 0.92 }
1374//              alternatives { transcript: "to bee or not to bee" }
1375//              is_final: true }
1376//
1377// 5. results { alternatives { transcript: " that's" } stability: 0.01 }
1378//
1379// 6. results { alternatives { transcript: " that is" } stability: 0.9 }
1380//    results { alternatives { transcript: " the question" } stability: 0.01 }
1381//
1382// 7. results { alternatives { transcript: " that is the question"
1383//                             confidence: 0.98 }
1384//              alternatives { transcript: " that was the question" }
1385//              is_final: true }
1386//
1387// Notes:
1388//
1389// - Only two of the above responses #4 and #7 contain final results; they are
1390//   indicated by `is_final: true`. Concatenating these together generates the
1391//   full transcript: "to be or not to be that is the question".
1392//
1393// - The others contain interim `results`. #3 and #6 contain two interim
1394//   `results`: the first portion has a high stability and is less likely to
1395//   change; the second portion has a low stability and is very likely to
1396//   change. A UI designer might choose to show only high stability `results`.
1397//
1398// - The specific `stability` and `confidence` values shown above are only for
1399//   illustrative purposes. Actual values may vary.
1400//
1401// - In each response, only one of these fields will be set:
1402//     `error`,
1403//     `speech_event_type`, or
1404//     one or more (repeated) `results`.
1405message StreamingRecognizeResponse {
1406  // Indicates the type of speech event.
1407  enum SpeechEventType {
1408    // No speech event specified.
1409    SPEECH_EVENT_TYPE_UNSPECIFIED = 0;
1410
1411    // This event indicates that the server has detected the end of the user's
1412    // speech utterance and expects no additional speech. Therefore, the server
1413    // will not process additional audio and will close the gRPC bidirectional
1414    // stream. This event is only sent if there was a force cutoff due to
1415    // silence being detected early. This event is only available through the
1416    // `latest_short` [model][google.cloud.speech.v2.Recognizer.model].
1417    END_OF_SINGLE_UTTERANCE = 1;
1418
1419    // This event indicates that the server has detected the beginning of human
1420    // voice activity in the stream. This event can be returned multiple times
1421    // if speech starts and stops repeatedly throughout the stream. This event
1422    // is only sent if `voice_activity_events` is set to true.
1423    SPEECH_ACTIVITY_BEGIN = 2;
1424
1425    // This event indicates that the server has detected the end of human voice
1426    // activity in the stream. This event can be returned multiple times if
1427    // speech starts and stops repeatedly throughout the stream. This event is
1428    // only sent if `voice_activity_events` is set to true.
1429    SPEECH_ACTIVITY_END = 3;
1430  }
1431
1432  // This repeated list contains zero or more results that
1433  // correspond to consecutive portions of the audio currently being processed.
1434  // It contains zero or one
1435  // [is_final][google.cloud.speech.v2.StreamingRecognitionResult.is_final]=`true`
1436  // result (the newly settled portion), followed by zero or more
1437  // [is_final][google.cloud.speech.v2.StreamingRecognitionResult.is_final]=`false`
1438  // results (the interim results).
1439  repeated StreamingRecognitionResult results = 6;
1440
1441  // Indicates the type of speech event.
1442  SpeechEventType speech_event_type = 3;
1443
1444  // Time offset between the beginning of the audio and event emission.
1445  google.protobuf.Duration speech_event_offset = 7;
1446
1447  // Metadata about the recognition.
1448  RecognitionResponseMetadata metadata = 5;
1449}
1450
1451// Message representing the config for the Speech-to-Text API. This includes an
1452// optional [KMS key](https://cloud.google.com/kms/docs/resource-hierarchy#keys)
1453// with which incoming data will be encrypted.
1454message Config {
1455  option (google.api.resource) = {
1456    type: "speech.googleapis.com/Config"
1457    pattern: "projects/{project}/locations/{location}/config"
1458  };
1459
1460  // Output only. The name of the config resource. There is exactly one config
1461  // resource per project per location. The expected format is
1462  // `projects/{project}/locations/{location}/config`.
1463  string name = 1 [(google.api.field_behavior) = OUTPUT_ONLY];
1464
1465  // Optional. An optional [KMS key
1466  // name](https://cloud.google.com/kms/docs/resource-hierarchy#keys) that if
1467  // present, will be used to encrypt Speech-to-Text resources at-rest. Updating
1468  // this key will not encrypt existing resources using this key; only new
1469  // resources will be encrypted using this key. The expected format is
1470  // `projects/{project}/locations/{location}/keyRings/{key_ring}/cryptoKeys/{crypto_key}`.
1471  string kms_key_name = 2 [
1472    (google.api.field_behavior) = OPTIONAL,
1473    (google.api.resource_reference) = {
1474      type: "cloudkms.googleapis.com/CryptoKey"
1475    }
1476  ];
1477
1478  // Output only. The most recent time this resource was modified.
1479  google.protobuf.Timestamp update_time = 3
1480      [(google.api.field_behavior) = OUTPUT_ONLY];
1481}
1482
1483// Request message for the
1484// [GetConfig][google.cloud.speech.v2.Speech.GetConfig] method.
1485message GetConfigRequest {
1486  // Required. The name of the config to retrieve. There is exactly one config
1487  // resource per project per location. The expected format is
1488  // `projects/{project}/locations/{location}/config`.
1489  string name = 1 [
1490    (google.api.field_behavior) = REQUIRED,
1491    (google.api.resource_reference) = { type: "speech.googleapis.com/Config" }
1492  ];
1493}
1494
1495// Request message for the
1496// [UpdateConfig][google.cloud.speech.v2.Speech.UpdateConfig] method.
1497message UpdateConfigRequest {
1498  // Required. The config to update.
1499  //
1500  // The config's `name` field is used to identify the config to be updated.
1501  // The expected format is `projects/{project}/locations/{location}/config`.
1502  Config config = 1 [(google.api.field_behavior) = REQUIRED];
1503
1504  // The list of fields to be updated.
1505  google.protobuf.FieldMask update_mask = 2;
1506}
1507
1508// CustomClass for biasing in speech recognition. Used to define a set of words
1509// or phrases that represents a common concept or theme likely to appear in your
1510// audio, for example a list of passenger ship names.
1511message CustomClass {
1512  option (google.api.resource) = {
1513    type: "speech.googleapis.com/CustomClass"
1514    pattern: "projects/{project}/locations/{location}/customClasses/{custom_class}"
1515    style: DECLARATIVE_FRIENDLY
1516  };
1517
1518  // An item of the class.
1519  message ClassItem {
1520    // The class item's value.
1521    string value = 1;
1522  }
1523
1524  // Set of states that define the lifecycle of a CustomClass.
1525  enum State {
1526    // Unspecified state.  This is only used/useful for distinguishing
1527    // unset values.
1528    STATE_UNSPECIFIED = 0;
1529
1530    // The normal and active state.
1531    ACTIVE = 2;
1532
1533    // This CustomClass has been deleted.
1534    DELETED = 4;
1535  }
1536
1537  // Output only. The resource name of the CustomClass.
1538  // Format:
1539  // `projects/{project}/locations/{location}/customClasses/{custom_class}`.
1540  string name = 1 [(google.api.field_behavior) = OUTPUT_ONLY];
1541
1542  // Output only. System-assigned unique identifier for the CustomClass.
1543  string uid = 2 [(google.api.field_behavior) = OUTPUT_ONLY];
1544
1545  // User-settable, human-readable name for the CustomClass. Must be 63
1546  // characters or less.
1547  string display_name = 4;
1548
1549  // A collection of class items.
1550  repeated ClassItem items = 5;
1551
1552  // Output only. The CustomClass lifecycle state.
1553  State state = 15 [(google.api.field_behavior) = OUTPUT_ONLY];
1554
1555  // Output only. Creation time.
1556  google.protobuf.Timestamp create_time = 6
1557      [(google.api.field_behavior) = OUTPUT_ONLY];
1558
1559  // Output only. The most recent time this resource was modified.
1560  google.protobuf.Timestamp update_time = 7
1561      [(google.api.field_behavior) = OUTPUT_ONLY];
1562
1563  // Output only. The time at which this resource was requested for deletion.
1564  google.protobuf.Timestamp delete_time = 8
1565      [(google.api.field_behavior) = OUTPUT_ONLY];
1566
1567  // Output only. The time at which this resource will be purged.
1568  google.protobuf.Timestamp expire_time = 9
1569      [(google.api.field_behavior) = OUTPUT_ONLY];
1570
1571  // Allows users to store small amounts of arbitrary data.
1572  // Both the key and the value must be 63 characters or less each.
1573  // At most 100 annotations.
1574  map<string, string> annotations = 10;
1575
1576  // Output only. This checksum is computed by the server based on the value of
1577  // other fields. This may be sent on update, undelete, and delete requests to
1578  // ensure the client has an up-to-date value before proceeding.
1579  string etag = 11 [(google.api.field_behavior) = OUTPUT_ONLY];
1580
1581  // Output only. Whether or not this CustomClass is in the process of being
1582  // updated.
1583  bool reconciling = 12 [(google.api.field_behavior) = OUTPUT_ONLY];
1584
1585  // Output only. The [KMS key
1586  // name](https://cloud.google.com/kms/docs/resource-hierarchy#keys) with which
1587  // the CustomClass is encrypted. The expected format is
1588  // `projects/{project}/locations/{location}/keyRings/{key_ring}/cryptoKeys/{crypto_key}`.
1589  string kms_key_name = 13 [
1590    (google.api.field_behavior) = OUTPUT_ONLY,
1591    (google.api.resource_reference) = {
1592      type: "cloudkms.googleapis.com/CryptoKey"
1593    }
1594  ];
1595
1596  // Output only. The [KMS key version
1597  // name](https://cloud.google.com/kms/docs/resource-hierarchy#key_versions)
1598  // with which the CustomClass is encrypted. The expected format is
1599  // `projects/{project}/locations/{location}/keyRings/{key_ring}/cryptoKeys/{crypto_key}/cryptoKeyVersions/{crypto_key_version}`.
1600  string kms_key_version_name = 14 [
1601    (google.api.field_behavior) = OUTPUT_ONLY,
1602    (google.api.resource_reference) = {
1603      type: "cloudkms.googleapis.com/CryptoKeyVersion"
1604    }
1605  ];
1606}
1607
1608// PhraseSet for biasing in speech recognition. A PhraseSet is used to provide
1609// "hints" to the speech recognizer to favor specific words and phrases in the
1610// results.
1611message PhraseSet {
1612  option (google.api.resource) = {
1613    type: "speech.googleapis.com/PhraseSet"
1614    pattern: "projects/{project}/locations/{location}/phraseSets/{phrase_set}"
1615    style: DECLARATIVE_FRIENDLY
1616  };
1617
1618  // A Phrase contains words and phrase "hints" so that the speech recognition
1619  // is more likely to recognize them. This can be used to improve the accuracy
1620  // for specific words and phrases, for example, if specific commands are
1621  // typically spoken by the user. This can also be used to add additional words
1622  // to the vocabulary of the recognizer.
1623  //
1624  // List items can also include CustomClass references containing groups of
1625  // words that represent common concepts that occur in natural language.
1626  message Phrase {
1627    // The phrase itself.
1628    string value = 1;
1629
1630    // Hint Boost. Overrides the boost set at the phrase set level.
1631    // Positive value will increase the probability that a specific phrase will
1632    // be recognized over other similar sounding phrases. The higher the boost,
1633    // the higher the chance of false positive recognition as well. Negative
1634    // boost values would correspond to anti-biasing. Anti-biasing is not
1635    // enabled, so negative boost values will return an error. Boost values must
1636    // be between 0 and 20. Any values outside that range will return an error.
1637    // We recommend using a binary search approach to finding the optimal value
1638    // for your use case as well as adding phrases both with and without boost
1639    // to your requests.
1640    float boost = 2;
1641  }
1642
1643  // Set of states that define the lifecycle of a PhraseSet.
1644  enum State {
1645    // Unspecified state.  This is only used/useful for distinguishing
1646    // unset values.
1647    STATE_UNSPECIFIED = 0;
1648
1649    // The normal and active state.
1650    ACTIVE = 2;
1651
1652    // This PhraseSet has been deleted.
1653    DELETED = 4;
1654  }
1655
1656  // Output only. The resource name of the PhraseSet.
1657  // Format: `projects/{project}/locations/{location}/phraseSets/{phrase_set}`.
1658  string name = 1 [(google.api.field_behavior) = OUTPUT_ONLY];
1659
1660  // Output only. System-assigned unique identifier for the PhraseSet.
1661  string uid = 2 [(google.api.field_behavior) = OUTPUT_ONLY];
1662
1663  // A list of word and phrases.
1664  repeated Phrase phrases = 3;
1665
1666  // Hint Boost. Positive value will increase the probability that a specific
1667  // phrase will be recognized over other similar sounding phrases. The higher
1668  // the boost, the higher the chance of false positive recognition as well.
1669  // Valid `boost` values are between 0 (exclusive) and 20. We recommend using a
1670  // binary search approach to finding the optimal value for your use case as
1671  // well as adding phrases both with and without boost to your requests.
1672  float boost = 4;
1673
1674  // User-settable, human-readable name for the PhraseSet. Must be 63
1675  // characters or less.
1676  string display_name = 5;
1677
1678  // Output only. The PhraseSet lifecycle state.
1679  State state = 15 [(google.api.field_behavior) = OUTPUT_ONLY];
1680
1681  // Output only. Creation time.
1682  google.protobuf.Timestamp create_time = 6
1683      [(google.api.field_behavior) = OUTPUT_ONLY];
1684
1685  // Output only. The most recent time this resource was modified.
1686  google.protobuf.Timestamp update_time = 7
1687      [(google.api.field_behavior) = OUTPUT_ONLY];
1688
1689  // Output only. The time at which this resource was requested for deletion.
1690  google.protobuf.Timestamp delete_time = 8
1691      [(google.api.field_behavior) = OUTPUT_ONLY];
1692
1693  // Output only. The time at which this resource will be purged.
1694  google.protobuf.Timestamp expire_time = 9
1695      [(google.api.field_behavior) = OUTPUT_ONLY];
1696
1697  // Allows users to store small amounts of arbitrary data.
1698  // Both the key and the value must be 63 characters or less each.
1699  // At most 100 annotations.
1700  map<string, string> annotations = 10;
1701
1702  // Output only. This checksum is computed by the server based on the value of
1703  // other fields. This may be sent on update, undelete, and delete requests to
1704  // ensure the client has an up-to-date value before proceeding.
1705  string etag = 11 [(google.api.field_behavior) = OUTPUT_ONLY];
1706
1707  // Output only. Whether or not this PhraseSet is in the process of being
1708  // updated.
1709  bool reconciling = 12 [(google.api.field_behavior) = OUTPUT_ONLY];
1710
1711  // Output only. The [KMS key
1712  // name](https://cloud.google.com/kms/docs/resource-hierarchy#keys) with which
1713  // the PhraseSet is encrypted. The expected format is
1714  // `projects/{project}/locations/{location}/keyRings/{key_ring}/cryptoKeys/{crypto_key}`.
1715  string kms_key_name = 13 [
1716    (google.api.field_behavior) = OUTPUT_ONLY,
1717    (google.api.resource_reference) = {
1718      type: "cloudkms.googleapis.com/CryptoKey"
1719    }
1720  ];
1721
1722  // Output only. The [KMS key version
1723  // name](https://cloud.google.com/kms/docs/resource-hierarchy#key_versions)
1724  // with which the PhraseSet is encrypted. The expected format is
1725  // `projects/{project}/locations/{location}/keyRings/{key_ring}/cryptoKeys/{crypto_key}/cryptoKeyVersions/{crypto_key_version}`.
1726  string kms_key_version_name = 14 [
1727    (google.api.field_behavior) = OUTPUT_ONLY,
1728    (google.api.resource_reference) = {
1729      type: "cloudkms.googleapis.com/CryptoKeyVersion"
1730    }
1731  ];
1732}
1733
1734// Request message for the
1735// [CreateCustomClass][google.cloud.speech.v2.Speech.CreateCustomClass] method.
1736message CreateCustomClassRequest {
1737  // Required. The CustomClass to create.
1738  CustomClass custom_class = 1 [(google.api.field_behavior) = REQUIRED];
1739
1740  // If set, validate the request and preview the CustomClass, but do not
1741  // actually create it.
1742  bool validate_only = 2;
1743
1744  // The ID to use for the CustomClass, which will become the final component of
1745  // the CustomClass's resource name.
1746  //
1747  // This value should be 4-63 characters, and valid characters
1748  // are /[a-z][0-9]-/.
1749  string custom_class_id = 3;
1750
1751  // Required. The project and location where this CustomClass will be created.
1752  // The expected format is `projects/{project}/locations/{location}`.
1753  string parent = 4 [
1754    (google.api.field_behavior) = REQUIRED,
1755    (google.api.resource_reference) = {
1756      child_type: "speech.googleapis.com/CustomClass"
1757    }
1758  ];
1759}
1760
1761// Request message for the
1762// [ListCustomClasses][google.cloud.speech.v2.Speech.ListCustomClasses] method.
1763message ListCustomClassesRequest {
1764  // Required. The project and location of CustomClass resources to list. The
1765  // expected format is `projects/{project}/locations/{location}`.
1766  string parent = 1 [
1767    (google.api.field_behavior) = REQUIRED,
1768    (google.api.resource_reference) = {
1769      type: "locations.googleapis.com/Location"
1770    }
1771  ];
1772
1773  // Number of results per requests. A valid page_size ranges from 0 to 100
1774  // inclusive. If the page_size is zero or unspecified, a page size of 5 will
1775  // be chosen. If the page size exceeds 100, it will be coerced down to 100.
1776  // Note that a call might return fewer results than the requested page size.
1777  int32 page_size = 2;
1778
1779  // A page token, received from a previous
1780  // [ListCustomClasses][google.cloud.speech.v2.Speech.ListCustomClasses] call.
1781  // Provide this to retrieve the subsequent page.
1782  //
1783  // When paginating, all other parameters provided to
1784  // [ListCustomClasses][google.cloud.speech.v2.Speech.ListCustomClasses] must
1785  // match the call that provided the page token.
1786  string page_token = 3;
1787
1788  // Whether, or not, to show resources that have been deleted.
1789  bool show_deleted = 4;
1790}
1791
1792// Response message for the
1793// [ListCustomClasses][google.cloud.speech.v2.Speech.ListCustomClasses] method.
1794message ListCustomClassesResponse {
1795  // The list of requested CustomClasses.
1796  repeated CustomClass custom_classes = 1;
1797
1798  // A token, which can be sent as
1799  // [page_token][google.cloud.speech.v2.ListCustomClassesRequest.page_token] to
1800  // retrieve the next page. If this field is omitted, there are no subsequent
1801  // pages. This token expires after 72 hours.
1802  string next_page_token = 2;
1803}
1804
1805// Request message for the
1806// [GetCustomClass][google.cloud.speech.v2.Speech.GetCustomClass] method.
1807message GetCustomClassRequest {
1808  // Required. The name of the CustomClass to retrieve. The expected format is
1809  // `projects/{project}/locations/{location}/customClasses/{custom_class}`.
1810  string name = 1 [
1811    (google.api.field_behavior) = REQUIRED,
1812    (google.api.resource_reference) = {
1813      type: "speech.googleapis.com/CustomClass"
1814    }
1815  ];
1816}
1817
1818// Request message for the
1819// [UpdateCustomClass][google.cloud.speech.v2.Speech.UpdateCustomClass] method.
1820message UpdateCustomClassRequest {
1821  // Required. The CustomClass to update.
1822  //
1823  // The CustomClass's `name` field is used to identify the CustomClass to
1824  // update. Format:
1825  // `projects/{project}/locations/{location}/customClasses/{custom_class}`.
1826  CustomClass custom_class = 1 [(google.api.field_behavior) = REQUIRED];
1827
1828  // The list of fields to be updated. If empty, all fields are considered for
1829  // update.
1830  google.protobuf.FieldMask update_mask = 2;
1831
1832  // If set, validate the request and preview the updated CustomClass, but do
1833  // not actually update it.
1834  bool validate_only = 4;
1835}
1836
1837// Request message for the
1838// [DeleteCustomClass][google.cloud.speech.v2.Speech.DeleteCustomClass] method.
1839message DeleteCustomClassRequest {
1840  // Required. The name of the CustomClass to delete.
1841  // Format:
1842  // `projects/{project}/locations/{location}/customClasses/{custom_class}`
1843  string name = 1 [
1844    (google.api.field_behavior) = REQUIRED,
1845    (google.api.resource_reference) = {
1846      type: "speech.googleapis.com/CustomClass"
1847    }
1848  ];
1849
1850  // If set, validate the request and preview the deleted CustomClass, but do
1851  // not actually delete it.
1852  bool validate_only = 2;
1853
1854  // If set to true, and the CustomClass is not found, the request will succeed
1855  // and  be a no-op (no Operation is recorded in this case).
1856  bool allow_missing = 4;
1857
1858  // This checksum is computed by the server based on the value of other
1859  // fields. This may be sent on update, undelete, and delete requests to ensure
1860  // the client has an up-to-date value before proceeding.
1861  string etag = 3;
1862}
1863
1864// Request message for the
1865// [UndeleteCustomClass][google.cloud.speech.v2.Speech.UndeleteCustomClass]
1866// method.
1867message UndeleteCustomClassRequest {
1868  // Required. The name of the CustomClass to undelete.
1869  // Format:
1870  // `projects/{project}/locations/{location}/customClasses/{custom_class}`
1871  string name = 1 [
1872    (google.api.field_behavior) = REQUIRED,
1873    (google.api.resource_reference) = {
1874      type: "speech.googleapis.com/CustomClass"
1875    }
1876  ];
1877
1878  // If set, validate the request and preview the undeleted CustomClass, but do
1879  // not actually undelete it.
1880  bool validate_only = 3;
1881
1882  // This checksum is computed by the server based on the value of other
1883  // fields. This may be sent on update, undelete, and delete requests to ensure
1884  // the client has an up-to-date value before proceeding.
1885  string etag = 4;
1886}
1887
1888// Request message for the
1889// [CreatePhraseSet][google.cloud.speech.v2.Speech.CreatePhraseSet] method.
1890message CreatePhraseSetRequest {
1891  // Required. The PhraseSet to create.
1892  PhraseSet phrase_set = 1 [(google.api.field_behavior) = REQUIRED];
1893
1894  // If set, validate the request and preview the PhraseSet, but do not
1895  // actually create it.
1896  bool validate_only = 2;
1897
1898  // The ID to use for the PhraseSet, which will become the final component of
1899  // the PhraseSet's resource name.
1900  //
1901  // This value should be 4-63 characters, and valid characters
1902  // are /[a-z][0-9]-/.
1903  string phrase_set_id = 3;
1904
1905  // Required. The project and location where this PhraseSet will be created.
1906  // The expected format is `projects/{project}/locations/{location}`.
1907  string parent = 4 [
1908    (google.api.field_behavior) = REQUIRED,
1909    (google.api.resource_reference) = {
1910      child_type: "speech.googleapis.com/PhraseSet"
1911    }
1912  ];
1913}
1914
1915// Request message for the
1916// [ListPhraseSets][google.cloud.speech.v2.Speech.ListPhraseSets] method.
1917message ListPhraseSetsRequest {
1918  // Required. The project and location of PhraseSet resources to list. The
1919  // expected format is `projects/{project}/locations/{location}`.
1920  string parent = 1 [
1921    (google.api.field_behavior) = REQUIRED,
1922    (google.api.resource_reference) = {
1923      type: "locations.googleapis.com/Location"
1924    }
1925  ];
1926
1927  // The maximum number of PhraseSets to return. The service may return fewer
1928  // than this value. If unspecified, at most 5 PhraseSets will be returned.
1929  // The maximum value is 100; values above 100 will be coerced to 100.
1930  int32 page_size = 2;
1931
1932  // A page token, received from a previous
1933  // [ListPhraseSets][google.cloud.speech.v2.Speech.ListPhraseSets] call.
1934  // Provide this to retrieve the subsequent page.
1935  //
1936  // When paginating, all other parameters provided to
1937  // [ListPhraseSets][google.cloud.speech.v2.Speech.ListPhraseSets] must match
1938  // the call that provided the page token.
1939  string page_token = 3;
1940
1941  // Whether, or not, to show resources that have been deleted.
1942  bool show_deleted = 4;
1943}
1944
1945// Response message for the
1946// [ListPhraseSets][google.cloud.speech.v2.Speech.ListPhraseSets] method.
1947message ListPhraseSetsResponse {
1948  // The list of requested PhraseSets.
1949  repeated PhraseSet phrase_sets = 1;
1950
1951  // A token, which can be sent as
1952  // [page_token][google.cloud.speech.v2.ListPhraseSetsRequest.page_token] to
1953  // retrieve the next page. If this field is omitted, there are no subsequent
1954  // pages. This token expires after 72 hours.
1955  string next_page_token = 2;
1956}
1957
1958// Request message for the
1959// [GetPhraseSet][google.cloud.speech.v2.Speech.GetPhraseSet] method.
1960message GetPhraseSetRequest {
1961  // Required. The name of the PhraseSet to retrieve. The expected format is
1962  // `projects/{project}/locations/{location}/phraseSets/{phrase_set}`.
1963  string name = 1 [
1964    (google.api.field_behavior) = REQUIRED,
1965    (google.api.resource_reference) = {
1966      type: "speech.googleapis.com/PhraseSet"
1967    }
1968  ];
1969}
1970
1971// Request message for the
1972// [UpdatePhraseSet][google.cloud.speech.v2.Speech.UpdatePhraseSet] method.
1973message UpdatePhraseSetRequest {
1974  // Required. The PhraseSet to update.
1975  //
1976  // The PhraseSet's `name` field is used to identify the PhraseSet to update.
1977  // Format: `projects/{project}/locations/{location}/phraseSets/{phrase_set}`.
1978  PhraseSet phrase_set = 1 [(google.api.field_behavior) = REQUIRED];
1979
1980  // The list of fields to update. If empty, all non-default valued fields are
1981  // considered for update. Use `*` to update the entire PhraseSet resource.
1982  google.protobuf.FieldMask update_mask = 2;
1983
1984  // If set, validate the request and preview the updated PhraseSet, but do not
1985  // actually update it.
1986  bool validate_only = 4;
1987}
1988
1989// Request message for the
1990// [DeletePhraseSet][google.cloud.speech.v2.Speech.DeletePhraseSet] method.
1991message DeletePhraseSetRequest {
1992  // Required. The name of the PhraseSet to delete.
1993  // Format: `projects/{project}/locations/{location}/phraseSets/{phrase_set}`
1994  string name = 1 [
1995    (google.api.field_behavior) = REQUIRED,
1996    (google.api.resource_reference) = {
1997      type: "speech.googleapis.com/PhraseSet"
1998    }
1999  ];
2000
2001  // If set, validate the request and preview the deleted PhraseSet, but do not
2002  // actually delete it.
2003  bool validate_only = 2;
2004
2005  // If set to true, and the PhraseSet is not found, the request will succeed
2006  // and  be a no-op (no Operation is recorded in this case).
2007  bool allow_missing = 4;
2008
2009  // This checksum is computed by the server based on the value of other
2010  // fields. This may be sent on update, undelete, and delete requests to ensure
2011  // the client has an up-to-date value before proceeding.
2012  string etag = 3;
2013}
2014
2015// Request message for the
2016// [UndeletePhraseSet][google.cloud.speech.v2.Speech.UndeletePhraseSet]
2017// method.
2018message UndeletePhraseSetRequest {
2019  // Required. The name of the PhraseSet to undelete.
2020  // Format: `projects/{project}/locations/{location}/phraseSets/{phrase_set}`
2021  string name = 1 [
2022    (google.api.field_behavior) = REQUIRED,
2023    (google.api.resource_reference) = {
2024      type: "speech.googleapis.com/PhraseSet"
2025    }
2026  ];
2027
2028  // If set, validate the request and preview the undeleted PhraseSet, but do
2029  // not actually undelete it.
2030  bool validate_only = 3;
2031
2032  // This checksum is computed by the server based on the value of other
2033  // fields. This may be sent on update, undelete, and delete requests to ensure
2034  // the client has an up-to-date value before proceeding.
2035  string etag = 4;
2036}
2037