1// Copyright 2023 Google LLC 2// 3// Licensed under the Apache License, Version 2.0 (the "License"); 4// you may not use this file except in compliance with the License. 5// You may obtain a copy of the License at 6// 7// http://www.apache.org/licenses/LICENSE-2.0 8// 9// Unless required by applicable law or agreed to in writing, software 10// distributed under the License is distributed on an "AS IS" BASIS, 11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12// See the License for the specific language governing permissions and 13// limitations under the License. 14 15syntax = "proto3"; 16 17package google.cloud.speech.v2; 18 19import "google/api/annotations.proto"; 20import "google/api/client.proto"; 21import "google/api/field_behavior.proto"; 22import "google/api/resource.proto"; 23import "google/longrunning/operations.proto"; 24import "google/protobuf/duration.proto"; 25import "google/protobuf/field_mask.proto"; 26import "google/protobuf/timestamp.proto"; 27import "google/rpc/status.proto"; 28 29option go_package = "cloud.google.com/go/speech/apiv2/speechpb;speechpb"; 30option java_multiple_files = true; 31option java_outer_classname = "CloudSpeechProto"; 32option java_package = "com.google.cloud.speech.v2"; 33option (google.api.resource_definition) = { 34 type: "cloudkms.googleapis.com/CryptoKey" 35 pattern: "projects/{project}/locations/{location}/keyRings/{key_ring}/cryptoKeys/{crypto_key}" 36}; 37option (google.api.resource_definition) = { 38 type: "cloudkms.googleapis.com/CryptoKeyVersion" 39 pattern: "projects/{project}/locations/{location}/keyRings/{key_ring}/cryptoKeys/{crypto_key}/cryptoKeyVersions/{crypto_key_version}" 40}; 41 42// Enables speech transcription and resource management. 43service Speech { 44 option (google.api.default_host) = "speech.googleapis.com"; 45 option (google.api.oauth_scopes) = 46 "https://www.googleapis.com/auth/cloud-platform"; 47 48 // Creates a [Recognizer][google.cloud.speech.v2.Recognizer]. 49 rpc CreateRecognizer(CreateRecognizerRequest) 50 returns (google.longrunning.Operation) { 51 option (google.api.http) = { 52 post: "/v2/{parent=projects/*/locations/*}/recognizers" 53 body: "recognizer" 54 }; 55 option (google.api.method_signature) = "parent,recognizer,recognizer_id"; 56 option (google.longrunning.operation_info) = { 57 response_type: "Recognizer" 58 metadata_type: "OperationMetadata" 59 }; 60 } 61 62 // Lists Recognizers. 63 rpc ListRecognizers(ListRecognizersRequest) 64 returns (ListRecognizersResponse) { 65 option (google.api.http) = { 66 get: "/v2/{parent=projects/*/locations/*}/recognizers" 67 }; 68 option (google.api.method_signature) = "parent"; 69 } 70 71 // Returns the requested 72 // [Recognizer][google.cloud.speech.v2.Recognizer]. Fails with 73 // [NOT_FOUND][google.rpc.Code.NOT_FOUND] if the requested Recognizer doesn't 74 // exist. 75 rpc GetRecognizer(GetRecognizerRequest) returns (Recognizer) { 76 option (google.api.http) = { 77 get: "/v2/{name=projects/*/locations/*/recognizers/*}" 78 }; 79 option (google.api.method_signature) = "name"; 80 } 81 82 // Updates the [Recognizer][google.cloud.speech.v2.Recognizer]. 83 rpc UpdateRecognizer(UpdateRecognizerRequest) 84 returns (google.longrunning.Operation) { 85 option (google.api.http) = { 86 patch: "/v2/{recognizer.name=projects/*/locations/*/recognizers/*}" 87 body: "recognizer" 88 }; 89 option (google.api.method_signature) = "recognizer,update_mask"; 90 option (google.longrunning.operation_info) = { 91 response_type: "Recognizer" 92 metadata_type: "OperationMetadata" 93 }; 94 } 95 96 // Deletes the [Recognizer][google.cloud.speech.v2.Recognizer]. 97 rpc DeleteRecognizer(DeleteRecognizerRequest) 98 returns (google.longrunning.Operation) { 99 option (google.api.http) = { 100 delete: "/v2/{name=projects/*/locations/*/recognizers/*}" 101 }; 102 option (google.api.method_signature) = "name"; 103 option (google.longrunning.operation_info) = { 104 response_type: "Recognizer" 105 metadata_type: "OperationMetadata" 106 }; 107 } 108 109 // Undeletes the [Recognizer][google.cloud.speech.v2.Recognizer]. 110 rpc UndeleteRecognizer(UndeleteRecognizerRequest) 111 returns (google.longrunning.Operation) { 112 option (google.api.http) = { 113 post: "/v2/{name=projects/*/locations/*/recognizers/*}:undelete" 114 body: "*" 115 }; 116 option (google.api.method_signature) = "name"; 117 option (google.longrunning.operation_info) = { 118 response_type: "Recognizer" 119 metadata_type: "OperationMetadata" 120 }; 121 } 122 123 // Performs synchronous Speech recognition: receive results after all audio 124 // has been sent and processed. 125 rpc Recognize(RecognizeRequest) returns (RecognizeResponse) { 126 option (google.api.http) = { 127 post: "/v2/{recognizer=projects/*/locations/*/recognizers/*}:recognize" 128 body: "*" 129 }; 130 option (google.api.method_signature) = 131 "recognizer,config,config_mask,content"; 132 option (google.api.method_signature) = "recognizer,config,config_mask,uri"; 133 } 134 135 // Performs bidirectional streaming speech recognition: receive results while 136 // sending audio. This method is only available via the gRPC API (not REST). 137 rpc StreamingRecognize(stream StreamingRecognizeRequest) 138 returns (stream StreamingRecognizeResponse) {} 139 140 // Performs batch asynchronous speech recognition: send a request with N 141 // audio files and receive a long running operation that can be polled to see 142 // when the transcriptions are finished. 143 rpc BatchRecognize(BatchRecognizeRequest) 144 returns (google.longrunning.Operation) { 145 option (google.api.http) = { 146 post: "/v2/{recognizer=projects/*/locations/*/recognizers/*}:batchRecognize" 147 body: "*" 148 }; 149 option (google.api.method_signature) = 150 "recognizer,config,config_mask,files"; 151 option (google.longrunning.operation_info) = { 152 response_type: "BatchRecognizeResponse" 153 metadata_type: "OperationMetadata" 154 }; 155 } 156 157 // Returns the requested [Config][google.cloud.speech.v2.Config]. 158 rpc GetConfig(GetConfigRequest) returns (Config) { 159 option (google.api.http) = { 160 get: "/v2/{name=projects/*/locations/*/config}" 161 }; 162 option (google.api.method_signature) = "name"; 163 } 164 165 // Updates the [Config][google.cloud.speech.v2.Config]. 166 rpc UpdateConfig(UpdateConfigRequest) returns (Config) { 167 option (google.api.http) = { 168 patch: "/v2/{config.name=projects/*/locations/*/config}" 169 body: "config" 170 }; 171 option (google.api.method_signature) = "config,update_mask"; 172 } 173 174 // Creates a [CustomClass][google.cloud.speech.v2.CustomClass]. 175 rpc CreateCustomClass(CreateCustomClassRequest) 176 returns (google.longrunning.Operation) { 177 option (google.api.http) = { 178 post: "/v2/{parent=projects/*/locations/*}/customClasses" 179 body: "custom_class" 180 }; 181 option (google.api.method_signature) = 182 "parent,custom_class,custom_class_id"; 183 option (google.longrunning.operation_info) = { 184 response_type: "CustomClass" 185 metadata_type: "OperationMetadata" 186 }; 187 } 188 189 // Lists CustomClasses. 190 rpc ListCustomClasses(ListCustomClassesRequest) 191 returns (ListCustomClassesResponse) { 192 option (google.api.http) = { 193 get: "/v2/{parent=projects/*/locations/*}/customClasses" 194 }; 195 option (google.api.method_signature) = "parent"; 196 } 197 198 // Returns the requested 199 // [CustomClass][google.cloud.speech.v2.CustomClass]. 200 rpc GetCustomClass(GetCustomClassRequest) returns (CustomClass) { 201 option (google.api.http) = { 202 get: "/v2/{name=projects/*/locations/*/customClasses/*}" 203 }; 204 option (google.api.method_signature) = "name"; 205 } 206 207 // Updates the [CustomClass][google.cloud.speech.v2.CustomClass]. 208 rpc UpdateCustomClass(UpdateCustomClassRequest) 209 returns (google.longrunning.Operation) { 210 option (google.api.http) = { 211 patch: "/v2/{custom_class.name=projects/*/locations/*/customClasses/*}" 212 body: "custom_class" 213 }; 214 option (google.api.method_signature) = "custom_class,update_mask"; 215 option (google.longrunning.operation_info) = { 216 response_type: "CustomClass" 217 metadata_type: "OperationMetadata" 218 }; 219 } 220 221 // Deletes the [CustomClass][google.cloud.speech.v2.CustomClass]. 222 rpc DeleteCustomClass(DeleteCustomClassRequest) 223 returns (google.longrunning.Operation) { 224 option (google.api.http) = { 225 delete: "/v2/{name=projects/*/locations/*/customClasses/*}" 226 }; 227 option (google.api.method_signature) = "name"; 228 option (google.longrunning.operation_info) = { 229 response_type: "CustomClass" 230 metadata_type: "OperationMetadata" 231 }; 232 } 233 234 // Undeletes the [CustomClass][google.cloud.speech.v2.CustomClass]. 235 rpc UndeleteCustomClass(UndeleteCustomClassRequest) 236 returns (google.longrunning.Operation) { 237 option (google.api.http) = { 238 post: "/v2/{name=projects/*/locations/*/customClasses/*}:undelete" 239 body: "*" 240 }; 241 option (google.api.method_signature) = "name"; 242 option (google.longrunning.operation_info) = { 243 response_type: "CustomClass" 244 metadata_type: "OperationMetadata" 245 }; 246 } 247 248 // Creates a [PhraseSet][google.cloud.speech.v2.PhraseSet]. 249 rpc CreatePhraseSet(CreatePhraseSetRequest) 250 returns (google.longrunning.Operation) { 251 option (google.api.http) = { 252 post: "/v2/{parent=projects/*/locations/*}/phraseSets" 253 body: "phrase_set" 254 }; 255 option (google.api.method_signature) = "parent,phrase_set,phrase_set_id"; 256 option (google.longrunning.operation_info) = { 257 response_type: "PhraseSet" 258 metadata_type: "OperationMetadata" 259 }; 260 } 261 262 // Lists PhraseSets. 263 rpc ListPhraseSets(ListPhraseSetsRequest) returns (ListPhraseSetsResponse) { 264 option (google.api.http) = { 265 get: "/v2/{parent=projects/*/locations/*}/phraseSets" 266 }; 267 option (google.api.method_signature) = "parent"; 268 } 269 270 // Returns the requested 271 // [PhraseSet][google.cloud.speech.v2.PhraseSet]. 272 rpc GetPhraseSet(GetPhraseSetRequest) returns (PhraseSet) { 273 option (google.api.http) = { 274 get: "/v2/{name=projects/*/locations/*/phraseSets/*}" 275 }; 276 option (google.api.method_signature) = "name"; 277 } 278 279 // Updates the [PhraseSet][google.cloud.speech.v2.PhraseSet]. 280 rpc UpdatePhraseSet(UpdatePhraseSetRequest) 281 returns (google.longrunning.Operation) { 282 option (google.api.http) = { 283 patch: "/v2/{phrase_set.name=projects/*/locations/*/phraseSets/*}" 284 body: "phrase_set" 285 }; 286 option (google.api.method_signature) = "phrase_set,update_mask"; 287 option (google.longrunning.operation_info) = { 288 response_type: "PhraseSet" 289 metadata_type: "OperationMetadata" 290 }; 291 } 292 293 // Deletes the [PhraseSet][google.cloud.speech.v2.PhraseSet]. 294 rpc DeletePhraseSet(DeletePhraseSetRequest) 295 returns (google.longrunning.Operation) { 296 option (google.api.http) = { 297 delete: "/v2/{name=projects/*/locations/*/phraseSets/*}" 298 }; 299 option (google.api.method_signature) = "name"; 300 option (google.longrunning.operation_info) = { 301 response_type: "PhraseSet" 302 metadata_type: "OperationMetadata" 303 }; 304 } 305 306 // Undeletes the [PhraseSet][google.cloud.speech.v2.PhraseSet]. 307 rpc UndeletePhraseSet(UndeletePhraseSetRequest) 308 returns (google.longrunning.Operation) { 309 option (google.api.http) = { 310 post: "/v2/{name=projects/*/locations/*/phraseSets/*}:undelete" 311 body: "*" 312 }; 313 option (google.api.method_signature) = "name"; 314 option (google.longrunning.operation_info) = { 315 response_type: "PhraseSet" 316 metadata_type: "OperationMetadata" 317 }; 318 } 319} 320 321// Request message for the 322// [CreateRecognizer][google.cloud.speech.v2.Speech.CreateRecognizer] method. 323message CreateRecognizerRequest { 324 // Required. The Recognizer to create. 325 Recognizer recognizer = 1 [(google.api.field_behavior) = REQUIRED]; 326 327 // If set, validate the request and preview the Recognizer, but do not 328 // actually create it. 329 bool validate_only = 2; 330 331 // The ID to use for the Recognizer, which will become the final component of 332 // the Recognizer's resource name. 333 // 334 // This value should be 4-63 characters, and valid characters 335 // are /[a-z][0-9]-/. 336 string recognizer_id = 3; 337 338 // Required. The project and location where this Recognizer will be created. 339 // The expected format is `projects/{project}/locations/{location}`. 340 string parent = 4 [ 341 (google.api.field_behavior) = REQUIRED, 342 (google.api.resource_reference) = { 343 child_type: "speech.googleapis.com/Recognizer" 344 } 345 ]; 346} 347 348// Represents the metadata of a long-running operation. 349message OperationMetadata { 350 // The time the operation was created. 351 google.protobuf.Timestamp create_time = 1; 352 353 // The time the operation was last updated. 354 google.protobuf.Timestamp update_time = 2; 355 356 // The resource path for the target of the operation. 357 string resource = 3; 358 359 // The method that triggered the operation. 360 string method = 4; 361 362 // The [KMS key 363 // name](https://cloud.google.com/kms/docs/resource-hierarchy#keys) with which 364 // the content of the Operation is encrypted. The expected format is 365 // `projects/{project}/locations/{location}/keyRings/{key_ring}/cryptoKeys/{crypto_key}`. 366 string kms_key_name = 6 [(google.api.resource_reference) = { 367 type: "cloudkms.googleapis.com/CryptoKey" 368 }]; 369 370 // The [KMS key version 371 // name](https://cloud.google.com/kms/docs/resource-hierarchy#key_versions) 372 // with which content of the Operation is encrypted. The expected format is 373 // `projects/{project}/locations/{location}/keyRings/{key_ring}/cryptoKeys/{crypto_key}/cryptoKeyVersions/{crypto_key_version}`. 374 string kms_key_version_name = 7 [(google.api.resource_reference) = { 375 type: "cloudkms.googleapis.com/CryptoKeyVersion" 376 }]; 377 378 // The request that spawned the Operation. 379 oneof request { 380 // The BatchRecognizeRequest that spawned the Operation. 381 BatchRecognizeRequest batch_recognize_request = 8; 382 383 // The CreateRecognizerRequest that spawned the Operation. 384 CreateRecognizerRequest create_recognizer_request = 9; 385 386 // The UpdateRecognizerRequest that spawned the Operation. 387 UpdateRecognizerRequest update_recognizer_request = 10; 388 389 // The DeleteRecognizerRequest that spawned the Operation. 390 DeleteRecognizerRequest delete_recognizer_request = 11; 391 392 // The UndeleteRecognizerRequest that spawned the Operation. 393 UndeleteRecognizerRequest undelete_recognizer_request = 12; 394 395 // The CreateCustomClassRequest that spawned the Operation. 396 CreateCustomClassRequest create_custom_class_request = 13; 397 398 // The UpdateCustomClassRequest that spawned the Operation. 399 UpdateCustomClassRequest update_custom_class_request = 14; 400 401 // The DeleteCustomClassRequest that spawned the Operation. 402 DeleteCustomClassRequest delete_custom_class_request = 15; 403 404 // The UndeleteCustomClassRequest that spawned the Operation. 405 UndeleteCustomClassRequest undelete_custom_class_request = 16; 406 407 // The CreatePhraseSetRequest that spawned the Operation. 408 CreatePhraseSetRequest create_phrase_set_request = 17; 409 410 // The UpdatePhraseSetRequest that spawned the Operation. 411 UpdatePhraseSetRequest update_phrase_set_request = 18; 412 413 // The DeletePhraseSetRequest that spawned the Operation. 414 DeletePhraseSetRequest delete_phrase_set_request = 19; 415 416 // The UndeletePhraseSetRequest that spawned the Operation. 417 UndeletePhraseSetRequest undelete_phrase_set_request = 20; 418 419 // The UpdateConfigRequest that spawned the Operation. 420 UpdateConfigRequest update_config_request = 21 [deprecated = true]; 421 } 422 423 // The percent progress of the Operation. Values can range from 0-100. If the 424 // value is 100, then the operation is finished. 425 int32 progress_percent = 22; 426 427 // Specific metadata per RPC. 428 oneof metadata { 429 // Metadata specific to the BatchRecognize method. 430 BatchRecognizeMetadata batch_recognize_metadata = 23; 431 } 432} 433 434// Request message for the 435// [ListRecognizers][google.cloud.speech.v2.Speech.ListRecognizers] method. 436message ListRecognizersRequest { 437 // Required. The project and location of Recognizers to list. The expected 438 // format is `projects/{project}/locations/{location}`. 439 string parent = 1 [ 440 (google.api.field_behavior) = REQUIRED, 441 (google.api.resource_reference) = { 442 type: "locations.googleapis.com/Location" 443 } 444 ]; 445 446 // The maximum number of Recognizers to return. The service may return fewer 447 // than this value. If unspecified, at most 5 Recognizers will be returned. 448 // The maximum value is 100; values above 100 will be coerced to 100. 449 int32 page_size = 2; 450 451 // A page token, received from a previous 452 // [ListRecognizers][google.cloud.speech.v2.Speech.ListRecognizers] call. 453 // Provide this to retrieve the subsequent page. 454 // 455 // When paginating, all other parameters provided to 456 // [ListRecognizers][google.cloud.speech.v2.Speech.ListRecognizers] must match 457 // the call that provided the page token. 458 string page_token = 3; 459 460 // Whether, or not, to show resources that have been deleted. 461 bool show_deleted = 4; 462} 463 464// Response message for the 465// [ListRecognizers][google.cloud.speech.v2.Speech.ListRecognizers] method. 466message ListRecognizersResponse { 467 // The list of requested Recognizers. 468 repeated Recognizer recognizers = 1; 469 470 // A token, which can be sent as 471 // [page_token][google.cloud.speech.v2.ListRecognizersRequest.page_token] to 472 // retrieve the next page. If this field is omitted, there are no subsequent 473 // pages. This token expires after 72 hours. 474 string next_page_token = 2; 475} 476 477// Request message for the 478// [GetRecognizer][google.cloud.speech.v2.Speech.GetRecognizer] method. 479message GetRecognizerRequest { 480 // Required. The name of the Recognizer to retrieve. The expected format is 481 // `projects/{project}/locations/{location}/recognizers/{recognizer}`. 482 string name = 1 [ 483 (google.api.field_behavior) = REQUIRED, 484 (google.api.resource_reference) = { 485 type: "speech.googleapis.com/Recognizer" 486 } 487 ]; 488} 489 490// Request message for the 491// [UpdateRecognizer][google.cloud.speech.v2.Speech.UpdateRecognizer] method. 492message UpdateRecognizerRequest { 493 // Required. The Recognizer to update. 494 // 495 // The Recognizer's `name` field is used to identify the Recognizer to update. 496 // Format: `projects/{project}/locations/{location}/recognizers/{recognizer}`. 497 Recognizer recognizer = 1 [(google.api.field_behavior) = REQUIRED]; 498 499 // The list of fields to update. If empty, all non-default valued fields are 500 // considered for update. Use `*` to update the entire Recognizer resource. 501 google.protobuf.FieldMask update_mask = 2; 502 503 // If set, validate the request and preview the updated Recognizer, but do not 504 // actually update it. 505 bool validate_only = 4; 506} 507 508// Request message for the 509// [DeleteRecognizer][google.cloud.speech.v2.Speech.DeleteRecognizer] method. 510message DeleteRecognizerRequest { 511 // Required. The name of the Recognizer to delete. 512 // Format: `projects/{project}/locations/{location}/recognizers/{recognizer}` 513 string name = 1 [ 514 (google.api.field_behavior) = REQUIRED, 515 (google.api.resource_reference) = { 516 type: "speech.googleapis.com/Recognizer" 517 } 518 ]; 519 520 // If set, validate the request and preview the deleted Recognizer, but do not 521 // actually delete it. 522 bool validate_only = 2; 523 524 // If set to true, and the Recognizer is not found, the request will succeed 525 // and be a no-op (no Operation is recorded in this case). 526 bool allow_missing = 4; 527 528 // This checksum is computed by the server based on the value of other 529 // fields. This may be sent on update, undelete, and delete requests to ensure 530 // the client has an up-to-date value before proceeding. 531 string etag = 3; 532} 533 534// Request message for the 535// [UndeleteRecognizer][google.cloud.speech.v2.Speech.UndeleteRecognizer] 536// method. 537message UndeleteRecognizerRequest { 538 // Required. The name of the Recognizer to undelete. 539 // Format: `projects/{project}/locations/{location}/recognizers/{recognizer}` 540 string name = 1 [ 541 (google.api.field_behavior) = REQUIRED, 542 (google.api.resource_reference) = { 543 type: "speech.googleapis.com/Recognizer" 544 } 545 ]; 546 547 // If set, validate the request and preview the undeleted Recognizer, but do 548 // not actually undelete it. 549 bool validate_only = 3; 550 551 // This checksum is computed by the server based on the value of other 552 // fields. This may be sent on update, undelete, and delete requests to ensure 553 // the client has an up-to-date value before proceeding. 554 string etag = 4; 555} 556 557// A Recognizer message. Stores recognition configuration and metadata. 558message Recognizer { 559 option (google.api.resource) = { 560 type: "speech.googleapis.com/Recognizer" 561 pattern: "projects/{project}/locations/{location}/recognizers/{recognizer}" 562 style: DECLARATIVE_FRIENDLY 563 }; 564 565 // Set of states that define the lifecycle of a Recognizer. 566 enum State { 567 // The default value. This value is used if the state is omitted. 568 STATE_UNSPECIFIED = 0; 569 570 // The Recognizer is active and ready for use. 571 ACTIVE = 2; 572 573 // This Recognizer has been deleted. 574 DELETED = 4; 575 } 576 577 // Output only. The resource name of the Recognizer. 578 // Format: `projects/{project}/locations/{location}/recognizers/{recognizer}`. 579 string name = 1 [(google.api.field_behavior) = OUTPUT_ONLY]; 580 581 // Output only. System-assigned unique identifier for the Recognizer. 582 string uid = 2 [(google.api.field_behavior) = OUTPUT_ONLY]; 583 584 // User-settable, human-readable name for the Recognizer. Must be 63 585 // characters or less. 586 string display_name = 3; 587 588 // Required. Which model to use for recognition requests. Select the model 589 // best suited to your domain to get best results. 590 // 591 // Guidance for choosing which model to use can be found in the [Transcription 592 // Models 593 // Documentation](https://cloud.google.com/speech-to-text/v2/docs/transcription-model) 594 // and the models supported in each region can be found in the [Table Of 595 // Supported 596 // Models](https://cloud.google.com/speech-to-text/v2/docs/speech-to-text-supported-languages). 597 string model = 4 [(google.api.field_behavior) = REQUIRED]; 598 599 // Required. The language of the supplied audio as a 600 // [BCP-47](https://www.rfc-editor.org/rfc/bcp/bcp47.txt) language tag. 601 // 602 // Supported languages for each model are listed in the [Table of Supported 603 // Models](https://cloud.google.com/speech-to-text/v2/docs/speech-to-text-supported-languages). 604 // 605 // If additional languages are provided, recognition result will contain 606 // recognition in the most likely language detected. The recognition result 607 // will include the language tag of the language detected in the audio. 608 // When you create or update a Recognizer, these values are 609 // stored in normalized BCP-47 form. For example, "en-us" is stored as 610 // "en-US". 611 repeated string language_codes = 17 [(google.api.field_behavior) = REQUIRED]; 612 613 // Default configuration to use for requests with this Recognizer. 614 // This can be overwritten by inline configuration in the 615 // [RecognizeRequest.config][google.cloud.speech.v2.RecognizeRequest.config] 616 // field. 617 RecognitionConfig default_recognition_config = 6; 618 619 // Allows users to store small amounts of arbitrary data. 620 // Both the key and the value must be 63 characters or less each. 621 // At most 100 annotations. 622 map<string, string> annotations = 7; 623 624 // Output only. The Recognizer lifecycle state. 625 State state = 8 [(google.api.field_behavior) = OUTPUT_ONLY]; 626 627 // Output only. Creation time. 628 google.protobuf.Timestamp create_time = 9 629 [(google.api.field_behavior) = OUTPUT_ONLY]; 630 631 // Output only. The most recent time this Recognizer was modified. 632 google.protobuf.Timestamp update_time = 10 633 [(google.api.field_behavior) = OUTPUT_ONLY]; 634 635 // Output only. The time at which this Recognizer was requested for deletion. 636 google.protobuf.Timestamp delete_time = 11 637 [(google.api.field_behavior) = OUTPUT_ONLY]; 638 639 // Output only. The time at which this Recognizer will be purged. 640 google.protobuf.Timestamp expire_time = 14 641 [(google.api.field_behavior) = OUTPUT_ONLY]; 642 643 // Output only. This checksum is computed by the server based on the value of 644 // other fields. This may be sent on update, undelete, and delete requests to 645 // ensure the client has an up-to-date value before proceeding. 646 string etag = 12 [(google.api.field_behavior) = OUTPUT_ONLY]; 647 648 // Output only. Whether or not this Recognizer is in the process of being 649 // updated. 650 bool reconciling = 13 [(google.api.field_behavior) = OUTPUT_ONLY]; 651 652 // Output only. The [KMS key 653 // name](https://cloud.google.com/kms/docs/resource-hierarchy#keys) with which 654 // the Recognizer is encrypted. The expected format is 655 // `projects/{project}/locations/{location}/keyRings/{key_ring}/cryptoKeys/{crypto_key}`. 656 string kms_key_name = 15 [ 657 (google.api.field_behavior) = OUTPUT_ONLY, 658 (google.api.resource_reference) = { 659 type: "cloudkms.googleapis.com/CryptoKey" 660 } 661 ]; 662 663 // Output only. The [KMS key version 664 // name](https://cloud.google.com/kms/docs/resource-hierarchy#key_versions) 665 // with which the Recognizer is encrypted. The expected format is 666 // `projects/{project}/locations/{location}/keyRings/{key_ring}/cryptoKeys/{crypto_key}/cryptoKeyVersions/{crypto_key_version}`. 667 string kms_key_version_name = 16 [ 668 (google.api.field_behavior) = OUTPUT_ONLY, 669 (google.api.resource_reference) = { 670 type: "cloudkms.googleapis.com/CryptoKeyVersion" 671 } 672 ]; 673} 674 675// Automatically detected decoding parameters. 676// Supported for the following encodings: 677// 678// * WAV_LINEAR16: 16-bit signed little-endian PCM samples in a WAV container. 679// 680// * WAV_MULAW: 8-bit companded mulaw samples in a WAV container. 681// 682// * WAV_ALAW: 8-bit companded alaw samples in a WAV container. 683// 684// * RFC4867_5_AMR: AMR frames with an rfc4867.5 header. 685// 686// * RFC4867_5_AMRWB: AMR-WB frames with an rfc4867.5 header. 687// 688// * FLAC: FLAC frames in the "native FLAC" container format. 689// 690// * MP3: MPEG audio frames with optional (ignored) ID3 metadata. 691// 692// * OGG_OPUS: Opus audio frames in an Ogg container. 693// 694// * WEBM_OPUS: Opus audio frames in a WebM container. 695message AutoDetectDecodingConfig {} 696 697// Explicitly specified decoding parameters. 698message ExplicitDecodingConfig { 699 // Supported audio data encodings. 700 enum AudioEncoding { 701 // Default value. This value is unused. 702 AUDIO_ENCODING_UNSPECIFIED = 0; 703 704 // Headerless 16-bit signed little-endian PCM samples. 705 LINEAR16 = 1; 706 707 // Headerless 8-bit companded mulaw samples. 708 MULAW = 2; 709 710 // Headerless 8-bit companded alaw samples. 711 ALAW = 3; 712 } 713 714 // Required. Encoding of the audio data sent for recognition. 715 AudioEncoding encoding = 1 [(google.api.field_behavior) = REQUIRED]; 716 717 // Sample rate in Hertz of the audio data sent for recognition. Valid 718 // values are: 8000-48000. 16000 is optimal. For best results, set the 719 // sampling rate of the audio source to 16000 Hz. If that's not possible, use 720 // the native sample rate of the audio source (instead of re-sampling). 721 // Supported for the following encodings: 722 // 723 // * LINEAR16: Headerless 16-bit signed little-endian PCM samples. 724 // 725 // * MULAW: Headerless 8-bit companded mulaw samples. 726 // 727 // * ALAW: Headerless 8-bit companded alaw samples. 728 int32 sample_rate_hertz = 2; 729 730 // Number of channels present in the audio data sent for recognition. 731 // Supported for the following encodings: 732 // 733 // * LINEAR16: Headerless 16-bit signed little-endian PCM samples. 734 // 735 // * MULAW: Headerless 8-bit companded mulaw samples. 736 // 737 // * ALAW: Headerless 8-bit companded alaw samples. 738 // 739 // The maximum allowed value is 8. 740 int32 audio_channel_count = 3; 741} 742 743// Configuration to enable speaker diarization. 744message SpeakerDiarizationConfig { 745 // Required. Minimum number of speakers in the conversation. This range gives 746 // you more flexibility by allowing the system to automatically determine the 747 // correct number of speakers. 748 // 749 // To fix the number of speakers detected in the audio, set 750 // `min_speaker_count` = `max_speaker_count`. 751 int32 min_speaker_count = 2 [(google.api.field_behavior) = REQUIRED]; 752 753 // Required. Maximum number of speakers in the conversation. Valid values are: 754 // 1-6. Must be >= `min_speaker_count`. This range gives you more flexibility 755 // by allowing the system to automatically determine the correct number of 756 // speakers. 757 int32 max_speaker_count = 3 [(google.api.field_behavior) = REQUIRED]; 758} 759 760// Available recognition features. 761message RecognitionFeatures { 762 // Options for how to recognize multi-channel audio. 763 enum MultiChannelMode { 764 // Default value for the multi-channel mode. If the audio contains 765 // multiple channels, only the first channel will be transcribed; other 766 // channels will be ignored. 767 MULTI_CHANNEL_MODE_UNSPECIFIED = 0; 768 769 // If selected, each channel in the provided audio is transcribed 770 // independently. This cannot be selected if the selected 771 // [model][google.cloud.speech.v2.Recognizer.model] is `latest_short`. 772 SEPARATE_RECOGNITION_PER_CHANNEL = 1; 773 } 774 775 // If set to `true`, the server will attempt to filter out profanities, 776 // replacing all but the initial character in each filtered word with 777 // asterisks, for instance, "f***". If set to `false` or omitted, profanities 778 // won't be filtered out. 779 bool profanity_filter = 1; 780 781 // If `true`, the top result includes a list of words and the start and end 782 // time offsets (timestamps) for those words. If `false`, no word-level time 783 // offset information is returned. The default is `false`. 784 bool enable_word_time_offsets = 2; 785 786 // If `true`, the top result includes a list of words and the confidence for 787 // those words. If `false`, no word-level confidence information is returned. 788 // The default is `false`. 789 bool enable_word_confidence = 3; 790 791 // If `true`, adds punctuation to recognition result hypotheses. This feature 792 // is only available in select languages. The default `false` value does not 793 // add punctuation to result hypotheses. 794 bool enable_automatic_punctuation = 4; 795 796 // The spoken punctuation behavior for the call. If `true`, replaces spoken 797 // punctuation with the corresponding symbols in the request. For example, 798 // "how are you question mark" becomes "how are you?". See 799 // https://cloud.google.com/speech-to-text/docs/spoken-punctuation for 800 // support. If `false`, spoken punctuation is not replaced. 801 bool enable_spoken_punctuation = 14; 802 803 // The spoken emoji behavior for the call. If `true`, adds spoken emoji 804 // formatting for the request. This will replace spoken emojis with the 805 // corresponding Unicode symbols in the final transcript. If `false`, spoken 806 // emojis are not replaced. 807 bool enable_spoken_emojis = 15; 808 809 // Mode for recognizing multi-channel audio. 810 MultiChannelMode multi_channel_mode = 17; 811 812 // Configuration to enable speaker diarization and set additional 813 // parameters to make diarization better suited for your application. 814 // When this is enabled, we send all the words from the beginning of the 815 // audio for the top alternative in every consecutive STREAMING responses. 816 // This is done in order to improve our speaker tags as our models learn to 817 // identify the speakers in the conversation over time. 818 // For non-streaming requests, the diarization results will be provided only 819 // in the top alternative of the FINAL SpeechRecognitionResult. 820 SpeakerDiarizationConfig diarization_config = 9; 821 822 // Maximum number of recognition hypotheses to be returned. 823 // The server may return fewer than `max_alternatives`. 824 // Valid values are `0`-`30`. A value of `0` or `1` will return a maximum of 825 // one. If omitted, will return a maximum of one. 826 int32 max_alternatives = 16; 827} 828 829// Provides "hints" to the speech recognizer to favor specific words and phrases 830// in the results. PhraseSets can be specified as an inline resource, or a 831// reference to an existing PhraseSet resource. 832message SpeechAdaptation { 833 // A biasing PhraseSet, which can be either a string referencing the name of 834 // an existing PhraseSets resource, or an inline definition of a PhraseSet. 835 message AdaptationPhraseSet { 836 oneof value { 837 // The name of an existing PhraseSet resource. The user must have read 838 // access to the resource and it must not be deleted. 839 string phrase_set = 1 [(google.api.resource_reference) = { 840 type: "speech.googleapis.com/PhraseSet" 841 }]; 842 843 // An inline defined PhraseSet. 844 PhraseSet inline_phrase_set = 2; 845 } 846 } 847 848 // A list of inline or referenced PhraseSets. 849 repeated AdaptationPhraseSet phrase_sets = 1; 850 851 // A list of inline CustomClasses. Existing CustomClass resources can be 852 // referenced directly in a PhraseSet. 853 repeated CustomClass custom_classes = 2; 854} 855 856// Provides information to the Recognizer that specifies how to process the 857// recognition request. 858message RecognitionConfig { 859 // Decoding parameters for audio being sent for recognition. 860 oneof decoding_config { 861 // Automatically detect decoding parameters. 862 // Preferred for supported formats. 863 AutoDetectDecodingConfig auto_decoding_config = 7; 864 865 // Explicitly specified decoding parameters. 866 // Required if using headerless PCM audio (linear16, mulaw, alaw). 867 ExplicitDecodingConfig explicit_decoding_config = 8; 868 } 869 870 // Speech recognition features to enable. 871 RecognitionFeatures features = 2; 872 873 // Speech adaptation context that weights recognizer predictions for specific 874 // words and phrases. 875 SpeechAdaptation adaptation = 6; 876} 877 878// Request message for the 879// [Recognize][google.cloud.speech.v2.Speech.Recognize] method. Either 880// `content` or `uri` must be supplied. Supplying both or neither returns 881// [INVALID_ARGUMENT][google.rpc.Code.INVALID_ARGUMENT]. See [content 882// limits](https://cloud.google.com/speech-to-text/quotas#content). 883message RecognizeRequest { 884 // Required. The name of the Recognizer to use during recognition. The 885 // expected format is 886 // `projects/{project}/locations/{location}/recognizers/{recognizer}`. 887 string recognizer = 3 [ 888 (google.api.field_behavior) = REQUIRED, 889 (google.api.resource_reference) = { 890 type: "speech.googleapis.com/Recognizer" 891 } 892 ]; 893 894 // Features and audio metadata to use for the Automatic Speech Recognition. 895 // This field in combination with the 896 // [config_mask][google.cloud.speech.v2.RecognizeRequest.config_mask] field 897 // can be used to override parts of the 898 // [default_recognition_config][google.cloud.speech.v2.Recognizer.default_recognition_config] 899 // of the Recognizer resource. 900 RecognitionConfig config = 1; 901 902 // The list of fields in 903 // [config][google.cloud.speech.v2.RecognizeRequest.config] that override the 904 // values in the 905 // [default_recognition_config][google.cloud.speech.v2.Recognizer.default_recognition_config] 906 // of the recognizer during this recognition request. If no mask is provided, 907 // all non-default valued fields in 908 // [config][google.cloud.speech.v2.RecognizeRequest.config] override the 909 // values in the recognizer for this recognition request. If a mask is 910 // provided, only the fields listed in the mask override the config in the 911 // recognizer for this recognition request. If a wildcard (`*`) is provided, 912 // [config][google.cloud.speech.v2.RecognizeRequest.config] completely 913 // overrides and replaces the config in the recognizer for this recognition 914 // request. 915 google.protobuf.FieldMask config_mask = 8; 916 917 // The audio source, which is either inline content or a Google Cloud 918 // Storage URI. 919 oneof audio_source { 920 // The audio data bytes encoded as specified in 921 // [RecognitionConfig][google.cloud.speech.v2.RecognitionConfig]. As 922 // with all bytes fields, proto buffers use a pure binary representation, 923 // whereas JSON representations use base64. 924 bytes content = 5; 925 926 // URI that points to a file that contains audio data bytes as specified in 927 // [RecognitionConfig][google.cloud.speech.v2.RecognitionConfig]. The file 928 // must not be compressed (for example, gzip). Currently, only Google Cloud 929 // Storage URIs are supported, which must be specified in the following 930 // format: `gs://bucket_name/object_name` (other URI formats return 931 // [INVALID_ARGUMENT][google.rpc.Code.INVALID_ARGUMENT]). For more 932 // information, see [Request 933 // URIs](https://cloud.google.com/storage/docs/reference-uris). 934 string uri = 6; 935 } 936} 937 938// Metadata about the recognition request and response. 939message RecognitionResponseMetadata { 940 // When available, billed audio seconds for the corresponding request. 941 google.protobuf.Duration total_billed_duration = 6; 942} 943 944// Alternative hypotheses (a.k.a. n-best list). 945message SpeechRecognitionAlternative { 946 // Transcript text representing the words that the user spoke. 947 string transcript = 1; 948 949 // The confidence estimate between 0.0 and 1.0. A higher number 950 // indicates an estimated greater likelihood that the recognized words are 951 // correct. This field is set only for the top alternative of a non-streaming 952 // result or, of a streaming result where 953 // [is_final][google.cloud.speech.v2.StreamingRecognitionResult.is_final] is 954 // set to `true`. This field is not guaranteed to be accurate and users should 955 // not rely on it to be always provided. The default of 0.0 is a sentinel 956 // value indicating `confidence` was not set. 957 float confidence = 2; 958 959 // A list of word-specific information for each recognized word. 960 // When the 961 // [SpeakerDiarizationConfig][google.cloud.speech.v2.SpeakerDiarizationConfig] 962 // is set, you will see all the words from the beginning of the audio. 963 repeated WordInfo words = 3; 964} 965 966// Word-specific information for recognized words. 967message WordInfo { 968 // Time offset relative to the beginning of the audio, 969 // and corresponding to the start of the spoken word. 970 // This field is only set if 971 // [enable_word_time_offsets][google.cloud.speech.v2.RecognitionFeatures.enable_word_time_offsets] 972 // is `true` and only in the top hypothesis. This is an experimental feature 973 // and the accuracy of the time offset can vary. 974 google.protobuf.Duration start_offset = 1; 975 976 // Time offset relative to the beginning of the audio, 977 // and corresponding to the end of the spoken word. 978 // This field is only set if 979 // [enable_word_time_offsets][google.cloud.speech.v2.RecognitionFeatures.enable_word_time_offsets] 980 // is `true` and only in the top hypothesis. This is an experimental feature 981 // and the accuracy of the time offset can vary. 982 google.protobuf.Duration end_offset = 2; 983 984 // The word corresponding to this set of information. 985 string word = 3; 986 987 // The confidence estimate between 0.0 and 1.0. A higher number 988 // indicates an estimated greater likelihood that the recognized words are 989 // correct. This field is set only for the top alternative of a non-streaming 990 // result or, of a streaming result where 991 // [is_final][google.cloud.speech.v2.StreamingRecognitionResult.is_final] is 992 // set to `true`. This field is not guaranteed to be accurate and users should 993 // not rely on it to be always provided. The default of 0.0 is a sentinel 994 // value indicating `confidence` was not set. 995 float confidence = 4; 996 997 // A distinct label is assigned for every speaker within the audio. This field 998 // specifies which one of those speakers was detected to have spoken this 999 // word. `speaker_label` is set if 1000 // [SpeakerDiarizationConfig][google.cloud.speech.v2.SpeakerDiarizationConfig] 1001 // is given and only in the top alternative. 1002 string speaker_label = 6; 1003} 1004 1005// A speech recognition result corresponding to a portion of the audio. 1006message SpeechRecognitionResult { 1007 // May contain one or more recognition hypotheses. These alternatives are 1008 // ordered in terms of accuracy, with the top (first) alternative being the 1009 // most probable, as ranked by the recognizer. 1010 repeated SpeechRecognitionAlternative alternatives = 1; 1011 1012 // For multi-channel audio, this is the channel number corresponding to the 1013 // recognized result for the audio from that channel. 1014 // For `audio_channel_count` = `N`, its output values can range from `1` to 1015 // `N`. 1016 int32 channel_tag = 2; 1017 1018 // Time offset of the end of this result relative to the beginning of the 1019 // audio. 1020 google.protobuf.Duration result_end_offset = 4; 1021 1022 // Output only. The [BCP-47](https://www.rfc-editor.org/rfc/bcp/bcp47.txt) 1023 // language tag of the language in this result. This language code was 1024 // detected to have the most likelihood of being spoken in the audio. 1025 string language_code = 5 [(google.api.field_behavior) = OUTPUT_ONLY]; 1026} 1027 1028// Response message for the 1029// [Recognize][google.cloud.speech.v2.Speech.Recognize] method. 1030message RecognizeResponse { 1031 // Sequential list of transcription results corresponding to sequential 1032 // portions of audio. 1033 repeated SpeechRecognitionResult results = 3; 1034 1035 // Metadata about the recognition. 1036 RecognitionResponseMetadata metadata = 2; 1037} 1038 1039// Available recognition features specific to streaming recognition requests. 1040message StreamingRecognitionFeatures { 1041 // Events that a timeout can be set on for voice activity. 1042 message VoiceActivityTimeout { 1043 // Duration to timeout the stream if no speech begins. If this is set and 1044 // no speech is detected in this duration at the start of the stream, the 1045 // server will close the stream. 1046 google.protobuf.Duration speech_start_timeout = 1; 1047 1048 // Duration to timeout the stream after speech ends. If this is set and no 1049 // speech is detected in this duration after speech was detected, the server 1050 // will close the stream. 1051 google.protobuf.Duration speech_end_timeout = 2; 1052 } 1053 1054 // If `true`, responses with voice activity speech events will be returned as 1055 // they are detected. 1056 bool enable_voice_activity_events = 1; 1057 1058 // Whether or not to stream interim results to the client. If set to true, 1059 // interim results will be streamed to the client. Otherwise, only the final 1060 // response will be streamed back. 1061 bool interim_results = 2; 1062 1063 // If set, the server will automatically close the stream after the specified 1064 // duration has elapsed after the last VOICE_ACTIVITY speech event has been 1065 // sent. The field `voice_activity_events` must also be set to true. 1066 VoiceActivityTimeout voice_activity_timeout = 3; 1067} 1068 1069// Provides configuration information for the StreamingRecognize request. 1070message StreamingRecognitionConfig { 1071 // Required. Features and audio metadata to use for the Automatic Speech 1072 // Recognition. This field in combination with the 1073 // [config_mask][google.cloud.speech.v2.StreamingRecognitionConfig.config_mask] 1074 // field can be used to override parts of the 1075 // [default_recognition_config][google.cloud.speech.v2.Recognizer.default_recognition_config] 1076 // of the Recognizer resource. 1077 RecognitionConfig config = 1 [(google.api.field_behavior) = REQUIRED]; 1078 1079 // The list of fields in 1080 // [config][google.cloud.speech.v2.StreamingRecognitionConfig.config] that 1081 // override the values in the 1082 // [default_recognition_config][google.cloud.speech.v2.Recognizer.default_recognition_config] 1083 // of the recognizer during this recognition request. If no mask is provided, 1084 // all non-default valued fields in 1085 // [config][google.cloud.speech.v2.StreamingRecognitionConfig.config] override 1086 // the values in the Recognizer for this recognition request. If a mask is 1087 // provided, only the fields listed in the mask override the config in the 1088 // Recognizer for this recognition request. If a wildcard (`*`) is provided, 1089 // [config][google.cloud.speech.v2.StreamingRecognitionConfig.config] 1090 // completely overrides and replaces the config in the recognizer for this 1091 // recognition request. 1092 google.protobuf.FieldMask config_mask = 3; 1093 1094 // Speech recognition features to enable specific to streaming audio 1095 // recognition requests. 1096 StreamingRecognitionFeatures streaming_features = 2; 1097} 1098 1099// Request message for the 1100// [StreamingRecognize][google.cloud.speech.v2.Speech.StreamingRecognize] 1101// method. Multiple 1102// [StreamingRecognizeRequest][google.cloud.speech.v2.StreamingRecognizeRequest] 1103// messages are sent. The first message must contain a 1104// [recognizer][google.cloud.speech.v2.StreamingRecognizeRequest.recognizer] and 1105// optionally a 1106// [streaming_config][google.cloud.speech.v2.StreamingRecognizeRequest.streaming_config] 1107// message and must not contain 1108// [audio][google.cloud.speech.v2.StreamingRecognizeRequest.audio]. All 1109// subsequent messages must contain 1110// [audio][google.cloud.speech.v2.StreamingRecognizeRequest.audio] and must not 1111// contain a 1112// [streaming_config][google.cloud.speech.v2.StreamingRecognizeRequest.streaming_config] 1113// message. 1114message StreamingRecognizeRequest { 1115 // Required. Streaming recognition should start with an initial request having 1116 // a `recognizer`. Subsequent requests carry the audio data to be recognized. 1117 // 1118 // The initial request with configuration can be omitted if the Recognizer 1119 // being used has a 1120 // [default_recognition_config][google.cloud.speech.v2.Recognizer.default_recognition_config]. 1121 string recognizer = 3 [ 1122 (google.api.field_behavior) = REQUIRED, 1123 (google.api.resource_reference) = { 1124 type: "speech.googleapis.com/Recognizer" 1125 } 1126 ]; 1127 1128 oneof streaming_request { 1129 // StreamingRecognitionConfig to be used in this recognition attempt. 1130 // If provided, it will override the default RecognitionConfig stored in the 1131 // Recognizer. 1132 StreamingRecognitionConfig streaming_config = 6; 1133 1134 // Inline audio bytes to be Recognized. 1135 // Maximum size for this field is 15 KB per request. 1136 bytes audio = 5; 1137 } 1138} 1139 1140// Request message for the 1141// [BatchRecognize][google.cloud.speech.v2.Speech.BatchRecognize] 1142// method. 1143message BatchRecognizeRequest { 1144 // Possible processing strategies for batch requests. 1145 enum ProcessingStrategy { 1146 // Default value for the processing strategy. The request is processed as 1147 // soon as its received. 1148 PROCESSING_STRATEGY_UNSPECIFIED = 0; 1149 1150 // If selected, processes the request during lower utilization periods for a 1151 // price discount. The request is fulfilled within 24 hours. 1152 DYNAMIC_BATCHING = 1; 1153 } 1154 1155 // Required. Resource name of the recognizer to be used for ASR. 1156 string recognizer = 1 [ 1157 (google.api.field_behavior) = REQUIRED, 1158 (google.api.resource_reference) = { 1159 type: "speech.googleapis.com/Recognizer" 1160 } 1161 ]; 1162 1163 // Features and audio metadata to use for the Automatic Speech Recognition. 1164 // This field in combination with the 1165 // [config_mask][google.cloud.speech.v2.BatchRecognizeRequest.config_mask] 1166 // field can be used to override parts of the 1167 // [default_recognition_config][google.cloud.speech.v2.Recognizer.default_recognition_config] 1168 // of the Recognizer resource. 1169 RecognitionConfig config = 4; 1170 1171 // The list of fields in 1172 // [config][google.cloud.speech.v2.BatchRecognizeRequest.config] that override 1173 // the values in the 1174 // [default_recognition_config][google.cloud.speech.v2.Recognizer.default_recognition_config] 1175 // of the recognizer during this recognition request. If no mask is provided, 1176 // all given fields in 1177 // [config][google.cloud.speech.v2.BatchRecognizeRequest.config] override the 1178 // values in the recognizer for this recognition request. If a mask is 1179 // provided, only the fields listed in the mask override the config in the 1180 // recognizer for this recognition request. If a wildcard (`*`) is provided, 1181 // [config][google.cloud.speech.v2.BatchRecognizeRequest.config] completely 1182 // overrides and replaces the config in the recognizer for this recognition 1183 // request. 1184 google.protobuf.FieldMask config_mask = 5; 1185 1186 // Audio files with file metadata for ASR. 1187 // The maximum number of files allowed to be specified is 5. 1188 repeated BatchRecognizeFileMetadata files = 3; 1189 1190 // Configuration options for where to output the transcripts of each file. 1191 RecognitionOutputConfig recognition_output_config = 6; 1192 1193 // Processing strategy to use for this request. 1194 ProcessingStrategy processing_strategy = 7; 1195} 1196 1197// Output configurations for Cloud Storage. 1198message GcsOutputConfig { 1199 // The Cloud Storage URI prefix with which recognition results will be 1200 // written. 1201 string uri = 1; 1202} 1203 1204// Output configurations for inline response. 1205message InlineOutputConfig {} 1206 1207// Configuration options for the output(s) of recognition. 1208message RecognitionOutputConfig { 1209 oneof output { 1210 // If this message is populated, recognition results are written to the 1211 // provided Google Cloud Storage URI. 1212 GcsOutputConfig gcs_output_config = 1; 1213 1214 // If this message is populated, recognition results are provided in the 1215 // [BatchRecognizeResponse][google.cloud.speech.v2.BatchRecognizeResponse] 1216 // message of the Operation when completed. This is only supported when 1217 // calling [BatchRecognize][google.cloud.speech.v2.Speech.BatchRecognize] 1218 // with just one audio file. 1219 InlineOutputConfig inline_response_config = 2; 1220 } 1221} 1222 1223// Response message for 1224// [BatchRecognize][google.cloud.speech.v2.Speech.BatchRecognize] that is 1225// packaged into a longrunning [Operation][google.longrunning.Operation]. 1226message BatchRecognizeResponse { 1227 // Map from filename to the final result for that file. 1228 map<string, BatchRecognizeFileResult> results = 1; 1229 1230 // When available, billed audio seconds for the corresponding request. 1231 google.protobuf.Duration total_billed_duration = 2; 1232} 1233 1234// Output type for Cloud Storage of BatchRecognize transcripts. Though this 1235// proto isn't returned in this API anywhere, the Cloud Storage transcripts will 1236// be this proto serialized and should be parsed as such. 1237message BatchRecognizeResults { 1238 // Sequential list of transcription results corresponding to sequential 1239 // portions of audio. 1240 repeated SpeechRecognitionResult results = 1; 1241 1242 // Metadata about the recognition. 1243 RecognitionResponseMetadata metadata = 2; 1244} 1245 1246// Final results for a single file. 1247message BatchRecognizeFileResult { 1248 // The Cloud Storage URI to which recognition results were written. 1249 string uri = 1; 1250 1251 // Error if one was encountered. 1252 google.rpc.Status error = 2; 1253 1254 RecognitionResponseMetadata metadata = 3; 1255 1256 // The transcript for the audio file. This is populated only when 1257 // [InlineOutputConfig][google.cloud.speech.v2.InlineOutputConfig] is set in 1258 // the 1259 // [RecognitionOutputConfig][[google.cloud.speech.v2.RecognitionOutputConfig]. 1260 BatchRecognizeResults transcript = 4; 1261} 1262 1263// Metadata about transcription for a single file (for example, progress 1264// percent). 1265message BatchRecognizeTranscriptionMetadata { 1266 // How much of the file has been transcribed so far. 1267 int32 progress_percent = 1; 1268 1269 // Error if one was encountered. 1270 google.rpc.Status error = 2; 1271 1272 // The Cloud Storage URI to which recognition results will be written. 1273 string uri = 3; 1274} 1275 1276// Operation metadata for 1277// [BatchRecognize][google.cloud.speech.v2.Speech.BatchRecognize]. 1278message BatchRecognizeMetadata { 1279 // Map from provided filename to the transcription metadata for that file. 1280 map<string, BatchRecognizeTranscriptionMetadata> transcription_metadata = 1; 1281} 1282 1283// Metadata about a single file in a batch for BatchRecognize. 1284message BatchRecognizeFileMetadata { 1285 // The audio source, which is a Google Cloud Storage URI. 1286 oneof audio_source { 1287 // Cloud Storage URI for the audio file. 1288 string uri = 1; 1289 } 1290 1291 // Features and audio metadata to use for the Automatic Speech Recognition. 1292 // This field in combination with the 1293 // [config_mask][google.cloud.speech.v2.BatchRecognizeFileMetadata.config_mask] 1294 // field can be used to override parts of the 1295 // [default_recognition_config][google.cloud.speech.v2.Recognizer.default_recognition_config] 1296 // of the Recognizer resource as well as the 1297 // [config][google.cloud.speech.v2.BatchRecognizeRequest.config] at the 1298 // request level. 1299 RecognitionConfig config = 4; 1300 1301 // The list of fields in 1302 // [config][google.cloud.speech.v2.BatchRecognizeFileMetadata.config] that 1303 // override the values in the 1304 // [default_recognition_config][google.cloud.speech.v2.Recognizer.default_recognition_config] 1305 // of the recognizer during this recognition request. If no mask is provided, 1306 // all non-default valued fields in 1307 // [config][google.cloud.speech.v2.BatchRecognizeFileMetadata.config] override 1308 // the values in the recognizer for this recognition request. If a mask is 1309 // provided, only the fields listed in the mask override the config in the 1310 // recognizer for this recognition request. If a wildcard (`*`) is provided, 1311 // [config][google.cloud.speech.v2.BatchRecognizeFileMetadata.config] 1312 // completely overrides and replaces the config in the recognizer for this 1313 // recognition request. 1314 google.protobuf.FieldMask config_mask = 5; 1315} 1316 1317// A streaming speech recognition result corresponding to a portion of the audio 1318// that is currently being processed. 1319message StreamingRecognitionResult { 1320 // May contain one or more recognition hypotheses. These alternatives are 1321 // ordered in terms of accuracy, with the top (first) alternative being the 1322 // most probable, as ranked by the recognizer. 1323 repeated SpeechRecognitionAlternative alternatives = 1; 1324 1325 // If `false`, this 1326 // [StreamingRecognitionResult][google.cloud.speech.v2.StreamingRecognitionResult] 1327 // represents an interim result that may change. If `true`, this is the final 1328 // time the speech service will return this particular 1329 // [StreamingRecognitionResult][google.cloud.speech.v2.StreamingRecognitionResult], 1330 // the recognizer will not return any further hypotheses for this portion of 1331 // the transcript and corresponding audio. 1332 bool is_final = 2; 1333 1334 // An estimate of the likelihood that the recognizer will not change its guess 1335 // about this interim result. Values range from 0.0 (completely unstable) 1336 // to 1.0 (completely stable). This field is only provided for interim results 1337 // ([is_final][google.cloud.speech.v2.StreamingRecognitionResult.is_final]=`false`). 1338 // The default of 0.0 is a sentinel value indicating `stability` was not set. 1339 float stability = 3; 1340 1341 // Time offset of the end of this result relative to the beginning of the 1342 // audio. 1343 google.protobuf.Duration result_end_offset = 4; 1344 1345 // For multi-channel audio, this is the channel number corresponding to the 1346 // recognized result for the audio from that channel. 1347 // For 1348 // `audio_channel_count` = `N`, its output values can range from `1` to `N`. 1349 int32 channel_tag = 5; 1350 1351 // Output only. The [BCP-47](https://www.rfc-editor.org/rfc/bcp/bcp47.txt) 1352 // language tag of the language in this result. This language code was 1353 // detected to have the most likelihood of being spoken in the audio. 1354 string language_code = 6 [(google.api.field_behavior) = OUTPUT_ONLY]; 1355} 1356 1357// `StreamingRecognizeResponse` is the only message returned to the client by 1358// `StreamingRecognize`. A series of zero or more `StreamingRecognizeResponse` 1359// messages are streamed back to the client. If there is no recognizable 1360// audio then no messages are streamed back to the client. 1361// 1362// Here are some examples of `StreamingRecognizeResponse`s that might 1363// be returned while processing audio: 1364// 1365// 1. results { alternatives { transcript: "tube" } stability: 0.01 } 1366// 1367// 2. results { alternatives { transcript: "to be a" } stability: 0.01 } 1368// 1369// 3. results { alternatives { transcript: "to be" } stability: 0.9 } 1370// results { alternatives { transcript: " or not to be" } stability: 0.01 } 1371// 1372// 4. results { alternatives { transcript: "to be or not to be" 1373// confidence: 0.92 } 1374// alternatives { transcript: "to bee or not to bee" } 1375// is_final: true } 1376// 1377// 5. results { alternatives { transcript: " that's" } stability: 0.01 } 1378// 1379// 6. results { alternatives { transcript: " that is" } stability: 0.9 } 1380// results { alternatives { transcript: " the question" } stability: 0.01 } 1381// 1382// 7. results { alternatives { transcript: " that is the question" 1383// confidence: 0.98 } 1384// alternatives { transcript: " that was the question" } 1385// is_final: true } 1386// 1387// Notes: 1388// 1389// - Only two of the above responses #4 and #7 contain final results; they are 1390// indicated by `is_final: true`. Concatenating these together generates the 1391// full transcript: "to be or not to be that is the question". 1392// 1393// - The others contain interim `results`. #3 and #6 contain two interim 1394// `results`: the first portion has a high stability and is less likely to 1395// change; the second portion has a low stability and is very likely to 1396// change. A UI designer might choose to show only high stability `results`. 1397// 1398// - The specific `stability` and `confidence` values shown above are only for 1399// illustrative purposes. Actual values may vary. 1400// 1401// - In each response, only one of these fields will be set: 1402// `error`, 1403// `speech_event_type`, or 1404// one or more (repeated) `results`. 1405message StreamingRecognizeResponse { 1406 // Indicates the type of speech event. 1407 enum SpeechEventType { 1408 // No speech event specified. 1409 SPEECH_EVENT_TYPE_UNSPECIFIED = 0; 1410 1411 // This event indicates that the server has detected the end of the user's 1412 // speech utterance and expects no additional speech. Therefore, the server 1413 // will not process additional audio and will close the gRPC bidirectional 1414 // stream. This event is only sent if there was a force cutoff due to 1415 // silence being detected early. This event is only available through the 1416 // `latest_short` [model][google.cloud.speech.v2.Recognizer.model]. 1417 END_OF_SINGLE_UTTERANCE = 1; 1418 1419 // This event indicates that the server has detected the beginning of human 1420 // voice activity in the stream. This event can be returned multiple times 1421 // if speech starts and stops repeatedly throughout the stream. This event 1422 // is only sent if `voice_activity_events` is set to true. 1423 SPEECH_ACTIVITY_BEGIN = 2; 1424 1425 // This event indicates that the server has detected the end of human voice 1426 // activity in the stream. This event can be returned multiple times if 1427 // speech starts and stops repeatedly throughout the stream. This event is 1428 // only sent if `voice_activity_events` is set to true. 1429 SPEECH_ACTIVITY_END = 3; 1430 } 1431 1432 // This repeated list contains zero or more results that 1433 // correspond to consecutive portions of the audio currently being processed. 1434 // It contains zero or one 1435 // [is_final][google.cloud.speech.v2.StreamingRecognitionResult.is_final]=`true` 1436 // result (the newly settled portion), followed by zero or more 1437 // [is_final][google.cloud.speech.v2.StreamingRecognitionResult.is_final]=`false` 1438 // results (the interim results). 1439 repeated StreamingRecognitionResult results = 6; 1440 1441 // Indicates the type of speech event. 1442 SpeechEventType speech_event_type = 3; 1443 1444 // Time offset between the beginning of the audio and event emission. 1445 google.protobuf.Duration speech_event_offset = 7; 1446 1447 // Metadata about the recognition. 1448 RecognitionResponseMetadata metadata = 5; 1449} 1450 1451// Message representing the config for the Speech-to-Text API. This includes an 1452// optional [KMS key](https://cloud.google.com/kms/docs/resource-hierarchy#keys) 1453// with which incoming data will be encrypted. 1454message Config { 1455 option (google.api.resource) = { 1456 type: "speech.googleapis.com/Config" 1457 pattern: "projects/{project}/locations/{location}/config" 1458 }; 1459 1460 // Output only. The name of the config resource. There is exactly one config 1461 // resource per project per location. The expected format is 1462 // `projects/{project}/locations/{location}/config`. 1463 string name = 1 [(google.api.field_behavior) = OUTPUT_ONLY]; 1464 1465 // Optional. An optional [KMS key 1466 // name](https://cloud.google.com/kms/docs/resource-hierarchy#keys) that if 1467 // present, will be used to encrypt Speech-to-Text resources at-rest. Updating 1468 // this key will not encrypt existing resources using this key; only new 1469 // resources will be encrypted using this key. The expected format is 1470 // `projects/{project}/locations/{location}/keyRings/{key_ring}/cryptoKeys/{crypto_key}`. 1471 string kms_key_name = 2 [ 1472 (google.api.field_behavior) = OPTIONAL, 1473 (google.api.resource_reference) = { 1474 type: "cloudkms.googleapis.com/CryptoKey" 1475 } 1476 ]; 1477 1478 // Output only. The most recent time this resource was modified. 1479 google.protobuf.Timestamp update_time = 3 1480 [(google.api.field_behavior) = OUTPUT_ONLY]; 1481} 1482 1483// Request message for the 1484// [GetConfig][google.cloud.speech.v2.Speech.GetConfig] method. 1485message GetConfigRequest { 1486 // Required. The name of the config to retrieve. There is exactly one config 1487 // resource per project per location. The expected format is 1488 // `projects/{project}/locations/{location}/config`. 1489 string name = 1 [ 1490 (google.api.field_behavior) = REQUIRED, 1491 (google.api.resource_reference) = { type: "speech.googleapis.com/Config" } 1492 ]; 1493} 1494 1495// Request message for the 1496// [UpdateConfig][google.cloud.speech.v2.Speech.UpdateConfig] method. 1497message UpdateConfigRequest { 1498 // Required. The config to update. 1499 // 1500 // The config's `name` field is used to identify the config to be updated. 1501 // The expected format is `projects/{project}/locations/{location}/config`. 1502 Config config = 1 [(google.api.field_behavior) = REQUIRED]; 1503 1504 // The list of fields to be updated. 1505 google.protobuf.FieldMask update_mask = 2; 1506} 1507 1508// CustomClass for biasing in speech recognition. Used to define a set of words 1509// or phrases that represents a common concept or theme likely to appear in your 1510// audio, for example a list of passenger ship names. 1511message CustomClass { 1512 option (google.api.resource) = { 1513 type: "speech.googleapis.com/CustomClass" 1514 pattern: "projects/{project}/locations/{location}/customClasses/{custom_class}" 1515 style: DECLARATIVE_FRIENDLY 1516 }; 1517 1518 // An item of the class. 1519 message ClassItem { 1520 // The class item's value. 1521 string value = 1; 1522 } 1523 1524 // Set of states that define the lifecycle of a CustomClass. 1525 enum State { 1526 // Unspecified state. This is only used/useful for distinguishing 1527 // unset values. 1528 STATE_UNSPECIFIED = 0; 1529 1530 // The normal and active state. 1531 ACTIVE = 2; 1532 1533 // This CustomClass has been deleted. 1534 DELETED = 4; 1535 } 1536 1537 // Output only. The resource name of the CustomClass. 1538 // Format: 1539 // `projects/{project}/locations/{location}/customClasses/{custom_class}`. 1540 string name = 1 [(google.api.field_behavior) = OUTPUT_ONLY]; 1541 1542 // Output only. System-assigned unique identifier for the CustomClass. 1543 string uid = 2 [(google.api.field_behavior) = OUTPUT_ONLY]; 1544 1545 // User-settable, human-readable name for the CustomClass. Must be 63 1546 // characters or less. 1547 string display_name = 4; 1548 1549 // A collection of class items. 1550 repeated ClassItem items = 5; 1551 1552 // Output only. The CustomClass lifecycle state. 1553 State state = 15 [(google.api.field_behavior) = OUTPUT_ONLY]; 1554 1555 // Output only. Creation time. 1556 google.protobuf.Timestamp create_time = 6 1557 [(google.api.field_behavior) = OUTPUT_ONLY]; 1558 1559 // Output only. The most recent time this resource was modified. 1560 google.protobuf.Timestamp update_time = 7 1561 [(google.api.field_behavior) = OUTPUT_ONLY]; 1562 1563 // Output only. The time at which this resource was requested for deletion. 1564 google.protobuf.Timestamp delete_time = 8 1565 [(google.api.field_behavior) = OUTPUT_ONLY]; 1566 1567 // Output only. The time at which this resource will be purged. 1568 google.protobuf.Timestamp expire_time = 9 1569 [(google.api.field_behavior) = OUTPUT_ONLY]; 1570 1571 // Allows users to store small amounts of arbitrary data. 1572 // Both the key and the value must be 63 characters or less each. 1573 // At most 100 annotations. 1574 map<string, string> annotations = 10; 1575 1576 // Output only. This checksum is computed by the server based on the value of 1577 // other fields. This may be sent on update, undelete, and delete requests to 1578 // ensure the client has an up-to-date value before proceeding. 1579 string etag = 11 [(google.api.field_behavior) = OUTPUT_ONLY]; 1580 1581 // Output only. Whether or not this CustomClass is in the process of being 1582 // updated. 1583 bool reconciling = 12 [(google.api.field_behavior) = OUTPUT_ONLY]; 1584 1585 // Output only. The [KMS key 1586 // name](https://cloud.google.com/kms/docs/resource-hierarchy#keys) with which 1587 // the CustomClass is encrypted. The expected format is 1588 // `projects/{project}/locations/{location}/keyRings/{key_ring}/cryptoKeys/{crypto_key}`. 1589 string kms_key_name = 13 [ 1590 (google.api.field_behavior) = OUTPUT_ONLY, 1591 (google.api.resource_reference) = { 1592 type: "cloudkms.googleapis.com/CryptoKey" 1593 } 1594 ]; 1595 1596 // Output only. The [KMS key version 1597 // name](https://cloud.google.com/kms/docs/resource-hierarchy#key_versions) 1598 // with which the CustomClass is encrypted. The expected format is 1599 // `projects/{project}/locations/{location}/keyRings/{key_ring}/cryptoKeys/{crypto_key}/cryptoKeyVersions/{crypto_key_version}`. 1600 string kms_key_version_name = 14 [ 1601 (google.api.field_behavior) = OUTPUT_ONLY, 1602 (google.api.resource_reference) = { 1603 type: "cloudkms.googleapis.com/CryptoKeyVersion" 1604 } 1605 ]; 1606} 1607 1608// PhraseSet for biasing in speech recognition. A PhraseSet is used to provide 1609// "hints" to the speech recognizer to favor specific words and phrases in the 1610// results. 1611message PhraseSet { 1612 option (google.api.resource) = { 1613 type: "speech.googleapis.com/PhraseSet" 1614 pattern: "projects/{project}/locations/{location}/phraseSets/{phrase_set}" 1615 style: DECLARATIVE_FRIENDLY 1616 }; 1617 1618 // A Phrase contains words and phrase "hints" so that the speech recognition 1619 // is more likely to recognize them. This can be used to improve the accuracy 1620 // for specific words and phrases, for example, if specific commands are 1621 // typically spoken by the user. This can also be used to add additional words 1622 // to the vocabulary of the recognizer. 1623 // 1624 // List items can also include CustomClass references containing groups of 1625 // words that represent common concepts that occur in natural language. 1626 message Phrase { 1627 // The phrase itself. 1628 string value = 1; 1629 1630 // Hint Boost. Overrides the boost set at the phrase set level. 1631 // Positive value will increase the probability that a specific phrase will 1632 // be recognized over other similar sounding phrases. The higher the boost, 1633 // the higher the chance of false positive recognition as well. Negative 1634 // boost values would correspond to anti-biasing. Anti-biasing is not 1635 // enabled, so negative boost values will return an error. Boost values must 1636 // be between 0 and 20. Any values outside that range will return an error. 1637 // We recommend using a binary search approach to finding the optimal value 1638 // for your use case as well as adding phrases both with and without boost 1639 // to your requests. 1640 float boost = 2; 1641 } 1642 1643 // Set of states that define the lifecycle of a PhraseSet. 1644 enum State { 1645 // Unspecified state. This is only used/useful for distinguishing 1646 // unset values. 1647 STATE_UNSPECIFIED = 0; 1648 1649 // The normal and active state. 1650 ACTIVE = 2; 1651 1652 // This PhraseSet has been deleted. 1653 DELETED = 4; 1654 } 1655 1656 // Output only. The resource name of the PhraseSet. 1657 // Format: `projects/{project}/locations/{location}/phraseSets/{phrase_set}`. 1658 string name = 1 [(google.api.field_behavior) = OUTPUT_ONLY]; 1659 1660 // Output only. System-assigned unique identifier for the PhraseSet. 1661 string uid = 2 [(google.api.field_behavior) = OUTPUT_ONLY]; 1662 1663 // A list of word and phrases. 1664 repeated Phrase phrases = 3; 1665 1666 // Hint Boost. Positive value will increase the probability that a specific 1667 // phrase will be recognized over other similar sounding phrases. The higher 1668 // the boost, the higher the chance of false positive recognition as well. 1669 // Valid `boost` values are between 0 (exclusive) and 20. We recommend using a 1670 // binary search approach to finding the optimal value for your use case as 1671 // well as adding phrases both with and without boost to your requests. 1672 float boost = 4; 1673 1674 // User-settable, human-readable name for the PhraseSet. Must be 63 1675 // characters or less. 1676 string display_name = 5; 1677 1678 // Output only. The PhraseSet lifecycle state. 1679 State state = 15 [(google.api.field_behavior) = OUTPUT_ONLY]; 1680 1681 // Output only. Creation time. 1682 google.protobuf.Timestamp create_time = 6 1683 [(google.api.field_behavior) = OUTPUT_ONLY]; 1684 1685 // Output only. The most recent time this resource was modified. 1686 google.protobuf.Timestamp update_time = 7 1687 [(google.api.field_behavior) = OUTPUT_ONLY]; 1688 1689 // Output only. The time at which this resource was requested for deletion. 1690 google.protobuf.Timestamp delete_time = 8 1691 [(google.api.field_behavior) = OUTPUT_ONLY]; 1692 1693 // Output only. The time at which this resource will be purged. 1694 google.protobuf.Timestamp expire_time = 9 1695 [(google.api.field_behavior) = OUTPUT_ONLY]; 1696 1697 // Allows users to store small amounts of arbitrary data. 1698 // Both the key and the value must be 63 characters or less each. 1699 // At most 100 annotations. 1700 map<string, string> annotations = 10; 1701 1702 // Output only. This checksum is computed by the server based on the value of 1703 // other fields. This may be sent on update, undelete, and delete requests to 1704 // ensure the client has an up-to-date value before proceeding. 1705 string etag = 11 [(google.api.field_behavior) = OUTPUT_ONLY]; 1706 1707 // Output only. Whether or not this PhraseSet is in the process of being 1708 // updated. 1709 bool reconciling = 12 [(google.api.field_behavior) = OUTPUT_ONLY]; 1710 1711 // Output only. The [KMS key 1712 // name](https://cloud.google.com/kms/docs/resource-hierarchy#keys) with which 1713 // the PhraseSet is encrypted. The expected format is 1714 // `projects/{project}/locations/{location}/keyRings/{key_ring}/cryptoKeys/{crypto_key}`. 1715 string kms_key_name = 13 [ 1716 (google.api.field_behavior) = OUTPUT_ONLY, 1717 (google.api.resource_reference) = { 1718 type: "cloudkms.googleapis.com/CryptoKey" 1719 } 1720 ]; 1721 1722 // Output only. The [KMS key version 1723 // name](https://cloud.google.com/kms/docs/resource-hierarchy#key_versions) 1724 // with which the PhraseSet is encrypted. The expected format is 1725 // `projects/{project}/locations/{location}/keyRings/{key_ring}/cryptoKeys/{crypto_key}/cryptoKeyVersions/{crypto_key_version}`. 1726 string kms_key_version_name = 14 [ 1727 (google.api.field_behavior) = OUTPUT_ONLY, 1728 (google.api.resource_reference) = { 1729 type: "cloudkms.googleapis.com/CryptoKeyVersion" 1730 } 1731 ]; 1732} 1733 1734// Request message for the 1735// [CreateCustomClass][google.cloud.speech.v2.Speech.CreateCustomClass] method. 1736message CreateCustomClassRequest { 1737 // Required. The CustomClass to create. 1738 CustomClass custom_class = 1 [(google.api.field_behavior) = REQUIRED]; 1739 1740 // If set, validate the request and preview the CustomClass, but do not 1741 // actually create it. 1742 bool validate_only = 2; 1743 1744 // The ID to use for the CustomClass, which will become the final component of 1745 // the CustomClass's resource name. 1746 // 1747 // This value should be 4-63 characters, and valid characters 1748 // are /[a-z][0-9]-/. 1749 string custom_class_id = 3; 1750 1751 // Required. The project and location where this CustomClass will be created. 1752 // The expected format is `projects/{project}/locations/{location}`. 1753 string parent = 4 [ 1754 (google.api.field_behavior) = REQUIRED, 1755 (google.api.resource_reference) = { 1756 child_type: "speech.googleapis.com/CustomClass" 1757 } 1758 ]; 1759} 1760 1761// Request message for the 1762// [ListCustomClasses][google.cloud.speech.v2.Speech.ListCustomClasses] method. 1763message ListCustomClassesRequest { 1764 // Required. The project and location of CustomClass resources to list. The 1765 // expected format is `projects/{project}/locations/{location}`. 1766 string parent = 1 [ 1767 (google.api.field_behavior) = REQUIRED, 1768 (google.api.resource_reference) = { 1769 type: "locations.googleapis.com/Location" 1770 } 1771 ]; 1772 1773 // Number of results per requests. A valid page_size ranges from 0 to 100 1774 // inclusive. If the page_size is zero or unspecified, a page size of 5 will 1775 // be chosen. If the page size exceeds 100, it will be coerced down to 100. 1776 // Note that a call might return fewer results than the requested page size. 1777 int32 page_size = 2; 1778 1779 // A page token, received from a previous 1780 // [ListCustomClasses][google.cloud.speech.v2.Speech.ListCustomClasses] call. 1781 // Provide this to retrieve the subsequent page. 1782 // 1783 // When paginating, all other parameters provided to 1784 // [ListCustomClasses][google.cloud.speech.v2.Speech.ListCustomClasses] must 1785 // match the call that provided the page token. 1786 string page_token = 3; 1787 1788 // Whether, or not, to show resources that have been deleted. 1789 bool show_deleted = 4; 1790} 1791 1792// Response message for the 1793// [ListCustomClasses][google.cloud.speech.v2.Speech.ListCustomClasses] method. 1794message ListCustomClassesResponse { 1795 // The list of requested CustomClasses. 1796 repeated CustomClass custom_classes = 1; 1797 1798 // A token, which can be sent as 1799 // [page_token][google.cloud.speech.v2.ListCustomClassesRequest.page_token] to 1800 // retrieve the next page. If this field is omitted, there are no subsequent 1801 // pages. This token expires after 72 hours. 1802 string next_page_token = 2; 1803} 1804 1805// Request message for the 1806// [GetCustomClass][google.cloud.speech.v2.Speech.GetCustomClass] method. 1807message GetCustomClassRequest { 1808 // Required. The name of the CustomClass to retrieve. The expected format is 1809 // `projects/{project}/locations/{location}/customClasses/{custom_class}`. 1810 string name = 1 [ 1811 (google.api.field_behavior) = REQUIRED, 1812 (google.api.resource_reference) = { 1813 type: "speech.googleapis.com/CustomClass" 1814 } 1815 ]; 1816} 1817 1818// Request message for the 1819// [UpdateCustomClass][google.cloud.speech.v2.Speech.UpdateCustomClass] method. 1820message UpdateCustomClassRequest { 1821 // Required. The CustomClass to update. 1822 // 1823 // The CustomClass's `name` field is used to identify the CustomClass to 1824 // update. Format: 1825 // `projects/{project}/locations/{location}/customClasses/{custom_class}`. 1826 CustomClass custom_class = 1 [(google.api.field_behavior) = REQUIRED]; 1827 1828 // The list of fields to be updated. If empty, all fields are considered for 1829 // update. 1830 google.protobuf.FieldMask update_mask = 2; 1831 1832 // If set, validate the request and preview the updated CustomClass, but do 1833 // not actually update it. 1834 bool validate_only = 4; 1835} 1836 1837// Request message for the 1838// [DeleteCustomClass][google.cloud.speech.v2.Speech.DeleteCustomClass] method. 1839message DeleteCustomClassRequest { 1840 // Required. The name of the CustomClass to delete. 1841 // Format: 1842 // `projects/{project}/locations/{location}/customClasses/{custom_class}` 1843 string name = 1 [ 1844 (google.api.field_behavior) = REQUIRED, 1845 (google.api.resource_reference) = { 1846 type: "speech.googleapis.com/CustomClass" 1847 } 1848 ]; 1849 1850 // If set, validate the request and preview the deleted CustomClass, but do 1851 // not actually delete it. 1852 bool validate_only = 2; 1853 1854 // If set to true, and the CustomClass is not found, the request will succeed 1855 // and be a no-op (no Operation is recorded in this case). 1856 bool allow_missing = 4; 1857 1858 // This checksum is computed by the server based on the value of other 1859 // fields. This may be sent on update, undelete, and delete requests to ensure 1860 // the client has an up-to-date value before proceeding. 1861 string etag = 3; 1862} 1863 1864// Request message for the 1865// [UndeleteCustomClass][google.cloud.speech.v2.Speech.UndeleteCustomClass] 1866// method. 1867message UndeleteCustomClassRequest { 1868 // Required. The name of the CustomClass to undelete. 1869 // Format: 1870 // `projects/{project}/locations/{location}/customClasses/{custom_class}` 1871 string name = 1 [ 1872 (google.api.field_behavior) = REQUIRED, 1873 (google.api.resource_reference) = { 1874 type: "speech.googleapis.com/CustomClass" 1875 } 1876 ]; 1877 1878 // If set, validate the request and preview the undeleted CustomClass, but do 1879 // not actually undelete it. 1880 bool validate_only = 3; 1881 1882 // This checksum is computed by the server based on the value of other 1883 // fields. This may be sent on update, undelete, and delete requests to ensure 1884 // the client has an up-to-date value before proceeding. 1885 string etag = 4; 1886} 1887 1888// Request message for the 1889// [CreatePhraseSet][google.cloud.speech.v2.Speech.CreatePhraseSet] method. 1890message CreatePhraseSetRequest { 1891 // Required. The PhraseSet to create. 1892 PhraseSet phrase_set = 1 [(google.api.field_behavior) = REQUIRED]; 1893 1894 // If set, validate the request and preview the PhraseSet, but do not 1895 // actually create it. 1896 bool validate_only = 2; 1897 1898 // The ID to use for the PhraseSet, which will become the final component of 1899 // the PhraseSet's resource name. 1900 // 1901 // This value should be 4-63 characters, and valid characters 1902 // are /[a-z][0-9]-/. 1903 string phrase_set_id = 3; 1904 1905 // Required. The project and location where this PhraseSet will be created. 1906 // The expected format is `projects/{project}/locations/{location}`. 1907 string parent = 4 [ 1908 (google.api.field_behavior) = REQUIRED, 1909 (google.api.resource_reference) = { 1910 child_type: "speech.googleapis.com/PhraseSet" 1911 } 1912 ]; 1913} 1914 1915// Request message for the 1916// [ListPhraseSets][google.cloud.speech.v2.Speech.ListPhraseSets] method. 1917message ListPhraseSetsRequest { 1918 // Required. The project and location of PhraseSet resources to list. The 1919 // expected format is `projects/{project}/locations/{location}`. 1920 string parent = 1 [ 1921 (google.api.field_behavior) = REQUIRED, 1922 (google.api.resource_reference) = { 1923 type: "locations.googleapis.com/Location" 1924 } 1925 ]; 1926 1927 // The maximum number of PhraseSets to return. The service may return fewer 1928 // than this value. If unspecified, at most 5 PhraseSets will be returned. 1929 // The maximum value is 100; values above 100 will be coerced to 100. 1930 int32 page_size = 2; 1931 1932 // A page token, received from a previous 1933 // [ListPhraseSets][google.cloud.speech.v2.Speech.ListPhraseSets] call. 1934 // Provide this to retrieve the subsequent page. 1935 // 1936 // When paginating, all other parameters provided to 1937 // [ListPhraseSets][google.cloud.speech.v2.Speech.ListPhraseSets] must match 1938 // the call that provided the page token. 1939 string page_token = 3; 1940 1941 // Whether, or not, to show resources that have been deleted. 1942 bool show_deleted = 4; 1943} 1944 1945// Response message for the 1946// [ListPhraseSets][google.cloud.speech.v2.Speech.ListPhraseSets] method. 1947message ListPhraseSetsResponse { 1948 // The list of requested PhraseSets. 1949 repeated PhraseSet phrase_sets = 1; 1950 1951 // A token, which can be sent as 1952 // [page_token][google.cloud.speech.v2.ListPhraseSetsRequest.page_token] to 1953 // retrieve the next page. If this field is omitted, there are no subsequent 1954 // pages. This token expires after 72 hours. 1955 string next_page_token = 2; 1956} 1957 1958// Request message for the 1959// [GetPhraseSet][google.cloud.speech.v2.Speech.GetPhraseSet] method. 1960message GetPhraseSetRequest { 1961 // Required. The name of the PhraseSet to retrieve. The expected format is 1962 // `projects/{project}/locations/{location}/phraseSets/{phrase_set}`. 1963 string name = 1 [ 1964 (google.api.field_behavior) = REQUIRED, 1965 (google.api.resource_reference) = { 1966 type: "speech.googleapis.com/PhraseSet" 1967 } 1968 ]; 1969} 1970 1971// Request message for the 1972// [UpdatePhraseSet][google.cloud.speech.v2.Speech.UpdatePhraseSet] method. 1973message UpdatePhraseSetRequest { 1974 // Required. The PhraseSet to update. 1975 // 1976 // The PhraseSet's `name` field is used to identify the PhraseSet to update. 1977 // Format: `projects/{project}/locations/{location}/phraseSets/{phrase_set}`. 1978 PhraseSet phrase_set = 1 [(google.api.field_behavior) = REQUIRED]; 1979 1980 // The list of fields to update. If empty, all non-default valued fields are 1981 // considered for update. Use `*` to update the entire PhraseSet resource. 1982 google.protobuf.FieldMask update_mask = 2; 1983 1984 // If set, validate the request and preview the updated PhraseSet, but do not 1985 // actually update it. 1986 bool validate_only = 4; 1987} 1988 1989// Request message for the 1990// [DeletePhraseSet][google.cloud.speech.v2.Speech.DeletePhraseSet] method. 1991message DeletePhraseSetRequest { 1992 // Required. The name of the PhraseSet to delete. 1993 // Format: `projects/{project}/locations/{location}/phraseSets/{phrase_set}` 1994 string name = 1 [ 1995 (google.api.field_behavior) = REQUIRED, 1996 (google.api.resource_reference) = { 1997 type: "speech.googleapis.com/PhraseSet" 1998 } 1999 ]; 2000 2001 // If set, validate the request and preview the deleted PhraseSet, but do not 2002 // actually delete it. 2003 bool validate_only = 2; 2004 2005 // If set to true, and the PhraseSet is not found, the request will succeed 2006 // and be a no-op (no Operation is recorded in this case). 2007 bool allow_missing = 4; 2008 2009 // This checksum is computed by the server based on the value of other 2010 // fields. This may be sent on update, undelete, and delete requests to ensure 2011 // the client has an up-to-date value before proceeding. 2012 string etag = 3; 2013} 2014 2015// Request message for the 2016// [UndeletePhraseSet][google.cloud.speech.v2.Speech.UndeletePhraseSet] 2017// method. 2018message UndeletePhraseSetRequest { 2019 // Required. The name of the PhraseSet to undelete. 2020 // Format: `projects/{project}/locations/{location}/phraseSets/{phrase_set}` 2021 string name = 1 [ 2022 (google.api.field_behavior) = REQUIRED, 2023 (google.api.resource_reference) = { 2024 type: "speech.googleapis.com/PhraseSet" 2025 } 2026 ]; 2027 2028 // If set, validate the request and preview the undeleted PhraseSet, but do 2029 // not actually undelete it. 2030 bool validate_only = 3; 2031 2032 // This checksum is computed by the server based on the value of other 2033 // fields. This may be sent on update, undelete, and delete requests to ensure 2034 // the client has an up-to-date value before proceeding. 2035 string etag = 4; 2036} 2037