1// Copyright 2023 Google LLC 2// 3// Licensed under the Apache License, Version 2.0 (the "License"); 4// you may not use this file except in compliance with the License. 5// You may obtain a copy of the License at 6// 7// http://www.apache.org/licenses/LICENSE-2.0 8// 9// Unless required by applicable law or agreed to in writing, software 10// distributed under the License is distributed on an "AS IS" BASIS, 11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12// See the License for the specific language governing permissions and 13// limitations under the License. 14 15syntax = "proto3"; 16 17package google.cloud.speech.v2; 18 19import "google/api/annotations.proto"; 20import "google/api/client.proto"; 21import "google/api/field_behavior.proto"; 22import "google/api/resource.proto"; 23import "google/longrunning/operations.proto"; 24import "google/protobuf/duration.proto"; 25import "google/protobuf/field_mask.proto"; 26import "google/protobuf/timestamp.proto"; 27import "google/rpc/status.proto"; 28 29option go_package = "cloud.google.com/go/speech/apiv2/speechpb;speechpb"; 30option java_multiple_files = true; 31option java_outer_classname = "CloudSpeechProto"; 32option java_package = "com.google.cloud.speech.v2"; 33option (google.api.resource_definition) = { 34 type: "cloudkms.googleapis.com/CryptoKey" 35 pattern: "projects/{project}/locations/{location}/keyRings/{key_ring}/cryptoKeys/{crypto_key}" 36}; 37option (google.api.resource_definition) = { 38 type: "cloudkms.googleapis.com/CryptoKeyVersion" 39 pattern: "projects/{project}/locations/{location}/keyRings/{key_ring}/cryptoKeys/{crypto_key}/cryptoKeyVersions/{crypto_key_version}" 40}; 41 42// Enables speech transcription and resource management. 43service Speech { 44 option (google.api.default_host) = "speech.googleapis.com"; 45 option (google.api.oauth_scopes) = 46 "https://www.googleapis.com/auth/cloud-platform"; 47 48 // Creates a [Recognizer][google.cloud.speech.v2.Recognizer]. 49 rpc CreateRecognizer(CreateRecognizerRequest) 50 returns (google.longrunning.Operation) { 51 option (google.api.http) = { 52 post: "/v2/{parent=projects/*/locations/*}/recognizers" 53 body: "recognizer" 54 }; 55 option (google.api.method_signature) = "parent,recognizer,recognizer_id"; 56 option (google.longrunning.operation_info) = { 57 response_type: "Recognizer" 58 metadata_type: "OperationMetadata" 59 }; 60 } 61 62 // Lists Recognizers. 63 rpc ListRecognizers(ListRecognizersRequest) 64 returns (ListRecognizersResponse) { 65 option (google.api.http) = { 66 get: "/v2/{parent=projects/*/locations/*}/recognizers" 67 }; 68 option (google.api.method_signature) = "parent"; 69 } 70 71 // Returns the requested 72 // [Recognizer][google.cloud.speech.v2.Recognizer]. Fails with 73 // [NOT_FOUND][google.rpc.Code.NOT_FOUND] if the requested Recognizer doesn't 74 // exist. 75 rpc GetRecognizer(GetRecognizerRequest) returns (Recognizer) { 76 option (google.api.http) = { 77 get: "/v2/{name=projects/*/locations/*/recognizers/*}" 78 }; 79 option (google.api.method_signature) = "name"; 80 } 81 82 // Updates the [Recognizer][google.cloud.speech.v2.Recognizer]. 83 rpc UpdateRecognizer(UpdateRecognizerRequest) 84 returns (google.longrunning.Operation) { 85 option (google.api.http) = { 86 patch: "/v2/{recognizer.name=projects/*/locations/*/recognizers/*}" 87 body: "recognizer" 88 }; 89 option (google.api.method_signature) = "recognizer,update_mask"; 90 option (google.longrunning.operation_info) = { 91 response_type: "Recognizer" 92 metadata_type: "OperationMetadata" 93 }; 94 } 95 96 // Deletes the [Recognizer][google.cloud.speech.v2.Recognizer]. 97 rpc DeleteRecognizer(DeleteRecognizerRequest) 98 returns (google.longrunning.Operation) { 99 option (google.api.http) = { 100 delete: "/v2/{name=projects/*/locations/*/recognizers/*}" 101 }; 102 option (google.api.method_signature) = "name"; 103 option (google.longrunning.operation_info) = { 104 response_type: "Recognizer" 105 metadata_type: "OperationMetadata" 106 }; 107 } 108 109 // Undeletes the [Recognizer][google.cloud.speech.v2.Recognizer]. 110 rpc UndeleteRecognizer(UndeleteRecognizerRequest) 111 returns (google.longrunning.Operation) { 112 option (google.api.http) = { 113 post: "/v2/{name=projects/*/locations/*/recognizers/*}:undelete" 114 body: "*" 115 }; 116 option (google.api.method_signature) = "name"; 117 option (google.longrunning.operation_info) = { 118 response_type: "Recognizer" 119 metadata_type: "OperationMetadata" 120 }; 121 } 122 123 // Performs synchronous Speech recognition: receive results after all audio 124 // has been sent and processed. 125 rpc Recognize(RecognizeRequest) returns (RecognizeResponse) { 126 option (google.api.http) = { 127 post: "/v2/{recognizer=projects/*/locations/*/recognizers/*}:recognize" 128 body: "*" 129 }; 130 option (google.api.method_signature) = 131 "recognizer,config,config_mask,content"; 132 option (google.api.method_signature) = "recognizer,config,config_mask,uri"; 133 } 134 135 // Performs bidirectional streaming speech recognition: receive results while 136 // sending audio. This method is only available via the gRPC API (not REST). 137 rpc StreamingRecognize(stream StreamingRecognizeRequest) 138 returns (stream StreamingRecognizeResponse) {} 139 140 // Performs batch asynchronous speech recognition: send a request with N 141 // audio files and receive a long running operation that can be polled to see 142 // when the transcriptions are finished. 143 rpc BatchRecognize(BatchRecognizeRequest) 144 returns (google.longrunning.Operation) { 145 option (google.api.http) = { 146 post: "/v2/{recognizer=projects/*/locations/*/recognizers/*}:batchRecognize" 147 body: "*" 148 }; 149 option (google.api.method_signature) = 150 "recognizer,config,config_mask,files"; 151 option (google.longrunning.operation_info) = { 152 response_type: "BatchRecognizeResponse" 153 metadata_type: "OperationMetadata" 154 }; 155 } 156 157 // Returns the requested [Config][google.cloud.speech.v2.Config]. 158 rpc GetConfig(GetConfigRequest) returns (Config) { 159 option (google.api.http) = { 160 get: "/v2/{name=projects/*/locations/*/config}" 161 }; 162 option (google.api.method_signature) = "name"; 163 } 164 165 // Updates the [Config][google.cloud.speech.v2.Config]. 166 rpc UpdateConfig(UpdateConfigRequest) returns (Config) { 167 option (google.api.http) = { 168 patch: "/v2/{config.name=projects/*/locations/*/config}" 169 body: "config" 170 }; 171 option (google.api.method_signature) = "config,update_mask"; 172 } 173 174 // Creates a [CustomClass][google.cloud.speech.v2.CustomClass]. 175 rpc CreateCustomClass(CreateCustomClassRequest) 176 returns (google.longrunning.Operation) { 177 option (google.api.http) = { 178 post: "/v2/{parent=projects/*/locations/*}/customClasses" 179 body: "custom_class" 180 }; 181 option (google.api.method_signature) = 182 "parent,custom_class,custom_class_id"; 183 option (google.longrunning.operation_info) = { 184 response_type: "CustomClass" 185 metadata_type: "OperationMetadata" 186 }; 187 } 188 189 // Lists CustomClasses. 190 rpc ListCustomClasses(ListCustomClassesRequest) 191 returns (ListCustomClassesResponse) { 192 option (google.api.http) = { 193 get: "/v2/{parent=projects/*/locations/*}/customClasses" 194 }; 195 option (google.api.method_signature) = "parent"; 196 } 197 198 // Returns the requested 199 // [CustomClass][google.cloud.speech.v2.CustomClass]. 200 rpc GetCustomClass(GetCustomClassRequest) returns (CustomClass) { 201 option (google.api.http) = { 202 get: "/v2/{name=projects/*/locations/*/customClasses/*}" 203 }; 204 option (google.api.method_signature) = "name"; 205 } 206 207 // Updates the [CustomClass][google.cloud.speech.v2.CustomClass]. 208 rpc UpdateCustomClass(UpdateCustomClassRequest) 209 returns (google.longrunning.Operation) { 210 option (google.api.http) = { 211 patch: "/v2/{custom_class.name=projects/*/locations/*/customClasses/*}" 212 body: "custom_class" 213 }; 214 option (google.api.method_signature) = "custom_class,update_mask"; 215 option (google.longrunning.operation_info) = { 216 response_type: "CustomClass" 217 metadata_type: "OperationMetadata" 218 }; 219 } 220 221 // Deletes the [CustomClass][google.cloud.speech.v2.CustomClass]. 222 rpc DeleteCustomClass(DeleteCustomClassRequest) 223 returns (google.longrunning.Operation) { 224 option (google.api.http) = { 225 delete: "/v2/{name=projects/*/locations/*/customClasses/*}" 226 }; 227 option (google.api.method_signature) = "name"; 228 option (google.longrunning.operation_info) = { 229 response_type: "CustomClass" 230 metadata_type: "OperationMetadata" 231 }; 232 } 233 234 // Undeletes the [CustomClass][google.cloud.speech.v2.CustomClass]. 235 rpc UndeleteCustomClass(UndeleteCustomClassRequest) 236 returns (google.longrunning.Operation) { 237 option (google.api.http) = { 238 post: "/v2/{name=projects/*/locations/*/customClasses/*}:undelete" 239 body: "*" 240 }; 241 option (google.api.method_signature) = "name"; 242 option (google.longrunning.operation_info) = { 243 response_type: "CustomClass" 244 metadata_type: "OperationMetadata" 245 }; 246 } 247 248 // Creates a [PhraseSet][google.cloud.speech.v2.PhraseSet]. 249 rpc CreatePhraseSet(CreatePhraseSetRequest) 250 returns (google.longrunning.Operation) { 251 option (google.api.http) = { 252 post: "/v2/{parent=projects/*/locations/*}/phraseSets" 253 body: "phrase_set" 254 }; 255 option (google.api.method_signature) = "parent,phrase_set,phrase_set_id"; 256 option (google.longrunning.operation_info) = { 257 response_type: "PhraseSet" 258 metadata_type: "OperationMetadata" 259 }; 260 } 261 262 // Lists PhraseSets. 263 rpc ListPhraseSets(ListPhraseSetsRequest) returns (ListPhraseSetsResponse) { 264 option (google.api.http) = { 265 get: "/v2/{parent=projects/*/locations/*}/phraseSets" 266 }; 267 option (google.api.method_signature) = "parent"; 268 } 269 270 // Returns the requested 271 // [PhraseSet][google.cloud.speech.v2.PhraseSet]. 272 rpc GetPhraseSet(GetPhraseSetRequest) returns (PhraseSet) { 273 option (google.api.http) = { 274 get: "/v2/{name=projects/*/locations/*/phraseSets/*}" 275 }; 276 option (google.api.method_signature) = "name"; 277 } 278 279 // Updates the [PhraseSet][google.cloud.speech.v2.PhraseSet]. 280 rpc UpdatePhraseSet(UpdatePhraseSetRequest) 281 returns (google.longrunning.Operation) { 282 option (google.api.http) = { 283 patch: "/v2/{phrase_set.name=projects/*/locations/*/phraseSets/*}" 284 body: "phrase_set" 285 }; 286 option (google.api.method_signature) = "phrase_set,update_mask"; 287 option (google.longrunning.operation_info) = { 288 response_type: "PhraseSet" 289 metadata_type: "OperationMetadata" 290 }; 291 } 292 293 // Deletes the [PhraseSet][google.cloud.speech.v2.PhraseSet]. 294 rpc DeletePhraseSet(DeletePhraseSetRequest) 295 returns (google.longrunning.Operation) { 296 option (google.api.http) = { 297 delete: "/v2/{name=projects/*/locations/*/phraseSets/*}" 298 }; 299 option (google.api.method_signature) = "name"; 300 option (google.longrunning.operation_info) = { 301 response_type: "PhraseSet" 302 metadata_type: "OperationMetadata" 303 }; 304 } 305 306 // Undeletes the [PhraseSet][google.cloud.speech.v2.PhraseSet]. 307 rpc UndeletePhraseSet(UndeletePhraseSetRequest) 308 returns (google.longrunning.Operation) { 309 option (google.api.http) = { 310 post: "/v2/{name=projects/*/locations/*/phraseSets/*}:undelete" 311 body: "*" 312 }; 313 option (google.api.method_signature) = "name"; 314 option (google.longrunning.operation_info) = { 315 response_type: "PhraseSet" 316 metadata_type: "OperationMetadata" 317 }; 318 } 319} 320 321// Request message for the 322// [CreateRecognizer][google.cloud.speech.v2.Speech.CreateRecognizer] method. 323message CreateRecognizerRequest { 324 // Required. The Recognizer to create. 325 Recognizer recognizer = 1 [(google.api.field_behavior) = REQUIRED]; 326 327 // If set, validate the request and preview the Recognizer, but do not 328 // actually create it. 329 bool validate_only = 2; 330 331 // The ID to use for the Recognizer, which will become the final component of 332 // the Recognizer's resource name. 333 // 334 // This value should be 4-63 characters, and valid characters 335 // are /[a-z][0-9]-/. 336 string recognizer_id = 3; 337 338 // Required. The project and location where this Recognizer will be created. 339 // The expected format is `projects/{project}/locations/{location}`. 340 string parent = 4 [ 341 (google.api.field_behavior) = REQUIRED, 342 (google.api.resource_reference) = { 343 child_type: "speech.googleapis.com/Recognizer" 344 } 345 ]; 346} 347 348// Represents the metadata of a long-running operation. 349message OperationMetadata { 350 // The time the operation was created. 351 google.protobuf.Timestamp create_time = 1; 352 353 // The time the operation was last updated. 354 google.protobuf.Timestamp update_time = 2; 355 356 // The resource path for the target of the operation. 357 string resource = 3; 358 359 // The method that triggered the operation. 360 string method = 4; 361 362 // The [KMS key 363 // name](https://cloud.google.com/kms/docs/resource-hierarchy#keys) with which 364 // the content of the Operation is encrypted. The expected format is 365 // `projects/{project}/locations/{location}/keyRings/{key_ring}/cryptoKeys/{crypto_key}`. 366 string kms_key_name = 6 [(google.api.resource_reference) = { 367 type: "cloudkms.googleapis.com/CryptoKey" 368 }]; 369 370 // The [KMS key version 371 // name](https://cloud.google.com/kms/docs/resource-hierarchy#key_versions) 372 // with which content of the Operation is encrypted. The expected format is 373 // `projects/{project}/locations/{location}/keyRings/{key_ring}/cryptoKeys/{crypto_key}/cryptoKeyVersions/{crypto_key_version}`. 374 string kms_key_version_name = 7 [(google.api.resource_reference) = { 375 type: "cloudkms.googleapis.com/CryptoKeyVersion" 376 }]; 377 378 // The request that spawned the Operation. 379 oneof request { 380 // The BatchRecognizeRequest that spawned the Operation. 381 BatchRecognizeRequest batch_recognize_request = 8; 382 383 // The CreateRecognizerRequest that spawned the Operation. 384 CreateRecognizerRequest create_recognizer_request = 9; 385 386 // The UpdateRecognizerRequest that spawned the Operation. 387 UpdateRecognizerRequest update_recognizer_request = 10; 388 389 // The DeleteRecognizerRequest that spawned the Operation. 390 DeleteRecognizerRequest delete_recognizer_request = 11; 391 392 // The UndeleteRecognizerRequest that spawned the Operation. 393 UndeleteRecognizerRequest undelete_recognizer_request = 12; 394 395 // The CreateCustomClassRequest that spawned the Operation. 396 CreateCustomClassRequest create_custom_class_request = 13; 397 398 // The UpdateCustomClassRequest that spawned the Operation. 399 UpdateCustomClassRequest update_custom_class_request = 14; 400 401 // The DeleteCustomClassRequest that spawned the Operation. 402 DeleteCustomClassRequest delete_custom_class_request = 15; 403 404 // The UndeleteCustomClassRequest that spawned the Operation. 405 UndeleteCustomClassRequest undelete_custom_class_request = 16; 406 407 // The CreatePhraseSetRequest that spawned the Operation. 408 CreatePhraseSetRequest create_phrase_set_request = 17; 409 410 // The UpdatePhraseSetRequest that spawned the Operation. 411 UpdatePhraseSetRequest update_phrase_set_request = 18; 412 413 // The DeletePhraseSetRequest that spawned the Operation. 414 DeletePhraseSetRequest delete_phrase_set_request = 19; 415 416 // The UndeletePhraseSetRequest that spawned the Operation. 417 UndeletePhraseSetRequest undelete_phrase_set_request = 20; 418 419 // The UpdateConfigRequest that spawned the Operation. 420 UpdateConfigRequest update_config_request = 21 [deprecated = true]; 421 } 422 423 // The percent progress of the Operation. Values can range from 0-100. If the 424 // value is 100, then the operation is finished. 425 int32 progress_percent = 22; 426 427 // Specific metadata per RPC. 428 oneof metadata { 429 // Metadata specific to the BatchRecognize method. 430 BatchRecognizeMetadata batch_recognize_metadata = 23; 431 } 432} 433 434// Request message for the 435// [ListRecognizers][google.cloud.speech.v2.Speech.ListRecognizers] method. 436message ListRecognizersRequest { 437 // Required. The project and location of Recognizers to list. The expected 438 // format is `projects/{project}/locations/{location}`. 439 string parent = 1 [ 440 (google.api.field_behavior) = REQUIRED, 441 (google.api.resource_reference) = { 442 type: "locations.googleapis.com/Location" 443 } 444 ]; 445 446 // The maximum number of Recognizers to return. The service may return fewer 447 // than this value. If unspecified, at most 5 Recognizers will be returned. 448 // The maximum value is 100; values above 100 will be coerced to 100. 449 int32 page_size = 2; 450 451 // A page token, received from a previous 452 // [ListRecognizers][google.cloud.speech.v2.Speech.ListRecognizers] call. 453 // Provide this to retrieve the subsequent page. 454 // 455 // When paginating, all other parameters provided to 456 // [ListRecognizers][google.cloud.speech.v2.Speech.ListRecognizers] must match 457 // the call that provided the page token. 458 string page_token = 3; 459 460 // Whether, or not, to show resources that have been deleted. 461 bool show_deleted = 4; 462} 463 464// Response message for the 465// [ListRecognizers][google.cloud.speech.v2.Speech.ListRecognizers] method. 466message ListRecognizersResponse { 467 // The list of requested Recognizers. 468 repeated Recognizer recognizers = 1; 469 470 // A token, which can be sent as 471 // [page_token][google.cloud.speech.v2.ListRecognizersRequest.page_token] to 472 // retrieve the next page. If this field is omitted, there are no subsequent 473 // pages. This token expires after 72 hours. 474 string next_page_token = 2; 475} 476 477// Request message for the 478// [GetRecognizer][google.cloud.speech.v2.Speech.GetRecognizer] method. 479message GetRecognizerRequest { 480 // Required. The name of the Recognizer to retrieve. The expected format is 481 // `projects/{project}/locations/{location}/recognizers/{recognizer}`. 482 string name = 1 [ 483 (google.api.field_behavior) = REQUIRED, 484 (google.api.resource_reference) = { 485 type: "speech.googleapis.com/Recognizer" 486 } 487 ]; 488} 489 490// Request message for the 491// [UpdateRecognizer][google.cloud.speech.v2.Speech.UpdateRecognizer] method. 492message UpdateRecognizerRequest { 493 // Required. The Recognizer to update. 494 // 495 // The Recognizer's `name` field is used to identify the Recognizer to update. 496 // Format: `projects/{project}/locations/{location}/recognizers/{recognizer}`. 497 Recognizer recognizer = 1 [(google.api.field_behavior) = REQUIRED]; 498 499 // The list of fields to update. If empty, all non-default valued fields are 500 // considered for update. Use `*` to update the entire Recognizer resource. 501 google.protobuf.FieldMask update_mask = 2; 502 503 // If set, validate the request and preview the updated Recognizer, but do not 504 // actually update it. 505 bool validate_only = 4; 506} 507 508// Request message for the 509// [DeleteRecognizer][google.cloud.speech.v2.Speech.DeleteRecognizer] method. 510message DeleteRecognizerRequest { 511 // Required. The name of the Recognizer to delete. 512 // Format: `projects/{project}/locations/{location}/recognizers/{recognizer}` 513 string name = 1 [ 514 (google.api.field_behavior) = REQUIRED, 515 (google.api.resource_reference) = { 516 type: "speech.googleapis.com/Recognizer" 517 } 518 ]; 519 520 // If set, validate the request and preview the deleted Recognizer, but do not 521 // actually delete it. 522 bool validate_only = 2; 523 524 // If set to true, and the Recognizer is not found, the request will succeed 525 // and be a no-op (no Operation is recorded in this case). 526 bool allow_missing = 4; 527 528 // This checksum is computed by the server based on the value of other 529 // fields. This may be sent on update, undelete, and delete requests to ensure 530 // the client has an up-to-date value before proceeding. 531 string etag = 3; 532} 533 534// Request message for the 535// [UndeleteRecognizer][google.cloud.speech.v2.Speech.UndeleteRecognizer] 536// method. 537message UndeleteRecognizerRequest { 538 // Required. The name of the Recognizer to undelete. 539 // Format: `projects/{project}/locations/{location}/recognizers/{recognizer}` 540 string name = 1 [ 541 (google.api.field_behavior) = REQUIRED, 542 (google.api.resource_reference) = { 543 type: "speech.googleapis.com/Recognizer" 544 } 545 ]; 546 547 // If set, validate the request and preview the undeleted Recognizer, but do 548 // not actually undelete it. 549 bool validate_only = 3; 550 551 // This checksum is computed by the server based on the value of other 552 // fields. This may be sent on update, undelete, and delete requests to ensure 553 // the client has an up-to-date value before proceeding. 554 string etag = 4; 555} 556 557// A Recognizer message. Stores recognition configuration and metadata. 558message Recognizer { 559 option (google.api.resource) = { 560 type: "speech.googleapis.com/Recognizer" 561 pattern: "projects/{project}/locations/{location}/recognizers/{recognizer}" 562 style: DECLARATIVE_FRIENDLY 563 }; 564 565 // Set of states that define the lifecycle of a Recognizer. 566 enum State { 567 // The default value. This value is used if the state is omitted. 568 STATE_UNSPECIFIED = 0; 569 570 // The Recognizer is active and ready for use. 571 ACTIVE = 2; 572 573 // This Recognizer has been deleted. 574 DELETED = 4; 575 } 576 577 // Output only. Identifier. The resource name of the Recognizer. 578 // Format: `projects/{project}/locations/{location}/recognizers/{recognizer}`. 579 string name = 1 [ 580 (google.api.field_behavior) = OUTPUT_ONLY, 581 (google.api.field_behavior) = IDENTIFIER 582 ]; 583 584 // Output only. System-assigned unique identifier for the Recognizer. 585 string uid = 2 [(google.api.field_behavior) = OUTPUT_ONLY]; 586 587 // User-settable, human-readable name for the Recognizer. Must be 63 588 // characters or less. 589 string display_name = 3; 590 591 // Optional. This field is now deprecated. Prefer the 592 // [`model`][google.cloud.speech.v2.RecognitionConfig.model] field in the 593 // [`RecognitionConfig`][google.cloud.speech.v2.RecognitionConfig] message. 594 // 595 // Which model to use for recognition requests. Select the model best suited 596 // to your domain to get best results. 597 // 598 // Guidance for choosing which model to use can be found in the [Transcription 599 // Models 600 // Documentation](https://cloud.google.com/speech-to-text/v2/docs/transcription-model) 601 // and the models supported in each region can be found in the [Table Of 602 // Supported 603 // Models](https://cloud.google.com/speech-to-text/v2/docs/speech-to-text-supported-languages). 604 string model = 4 [deprecated = true, (google.api.field_behavior) = OPTIONAL]; 605 606 // Optional. This field is now deprecated. Prefer the 607 // [`language_codes`][google.cloud.speech.v2.RecognitionConfig.language_codes] 608 // field in the 609 // [`RecognitionConfig`][google.cloud.speech.v2.RecognitionConfig] message. 610 // 611 // The language of the supplied audio as a 612 // [BCP-47](https://www.rfc-editor.org/rfc/bcp/bcp47.txt) language tag. 613 // 614 // Supported languages for each model are listed in the [Table of Supported 615 // Models](https://cloud.google.com/speech-to-text/v2/docs/speech-to-text-supported-languages). 616 // 617 // If additional languages are provided, recognition result will contain 618 // recognition in the most likely language detected. The recognition result 619 // will include the language tag of the language detected in the audio. 620 // When you create or update a Recognizer, these values are 621 // stored in normalized BCP-47 form. For example, "en-us" is stored as 622 // "en-US". 623 repeated string language_codes = 17 624 [deprecated = true, (google.api.field_behavior) = OPTIONAL]; 625 626 // Default configuration to use for requests with this Recognizer. 627 // This can be overwritten by inline configuration in the 628 // [RecognizeRequest.config][google.cloud.speech.v2.RecognizeRequest.config] 629 // field. 630 RecognitionConfig default_recognition_config = 6; 631 632 // Allows users to store small amounts of arbitrary data. 633 // Both the key and the value must be 63 characters or less each. 634 // At most 100 annotations. 635 map<string, string> annotations = 7; 636 637 // Output only. The Recognizer lifecycle state. 638 State state = 8 [(google.api.field_behavior) = OUTPUT_ONLY]; 639 640 // Output only. Creation time. 641 google.protobuf.Timestamp create_time = 9 642 [(google.api.field_behavior) = OUTPUT_ONLY]; 643 644 // Output only. The most recent time this Recognizer was modified. 645 google.protobuf.Timestamp update_time = 10 646 [(google.api.field_behavior) = OUTPUT_ONLY]; 647 648 // Output only. The time at which this Recognizer was requested for deletion. 649 google.protobuf.Timestamp delete_time = 11 650 [(google.api.field_behavior) = OUTPUT_ONLY]; 651 652 // Output only. The time at which this Recognizer will be purged. 653 google.protobuf.Timestamp expire_time = 14 654 [(google.api.field_behavior) = OUTPUT_ONLY]; 655 656 // Output only. This checksum is computed by the server based on the value of 657 // other fields. This may be sent on update, undelete, and delete requests to 658 // ensure the client has an up-to-date value before proceeding. 659 string etag = 12 [(google.api.field_behavior) = OUTPUT_ONLY]; 660 661 // Output only. Whether or not this Recognizer is in the process of being 662 // updated. 663 bool reconciling = 13 [(google.api.field_behavior) = OUTPUT_ONLY]; 664 665 // Output only. The [KMS key 666 // name](https://cloud.google.com/kms/docs/resource-hierarchy#keys) with which 667 // the Recognizer is encrypted. The expected format is 668 // `projects/{project}/locations/{location}/keyRings/{key_ring}/cryptoKeys/{crypto_key}`. 669 string kms_key_name = 15 [ 670 (google.api.field_behavior) = OUTPUT_ONLY, 671 (google.api.resource_reference) = { 672 type: "cloudkms.googleapis.com/CryptoKey" 673 } 674 ]; 675 676 // Output only. The [KMS key version 677 // name](https://cloud.google.com/kms/docs/resource-hierarchy#key_versions) 678 // with which the Recognizer is encrypted. The expected format is 679 // `projects/{project}/locations/{location}/keyRings/{key_ring}/cryptoKeys/{crypto_key}/cryptoKeyVersions/{crypto_key_version}`. 680 string kms_key_version_name = 16 [ 681 (google.api.field_behavior) = OUTPUT_ONLY, 682 (google.api.resource_reference) = { 683 type: "cloudkms.googleapis.com/CryptoKeyVersion" 684 } 685 ]; 686} 687 688// Automatically detected decoding parameters. 689// Supported for the following encodings: 690// 691// * WAV_LINEAR16: 16-bit signed little-endian PCM samples in a WAV container. 692// 693// * WAV_MULAW: 8-bit companded mulaw samples in a WAV container. 694// 695// * WAV_ALAW: 8-bit companded alaw samples in a WAV container. 696// 697// * RFC4867_5_AMR: AMR frames with an rfc4867.5 header. 698// 699// * RFC4867_5_AMRWB: AMR-WB frames with an rfc4867.5 header. 700// 701// * FLAC: FLAC frames in the "native FLAC" container format. 702// 703// * MP3: MPEG audio frames with optional (ignored) ID3 metadata. 704// 705// * OGG_OPUS: Opus audio frames in an Ogg container. 706// 707// * WEBM_OPUS: Opus audio frames in a WebM container. 708// 709// * M4A: M4A audio format. 710message AutoDetectDecodingConfig {} 711 712// Explicitly specified decoding parameters. 713message ExplicitDecodingConfig { 714 // Supported audio data encodings. 715 enum AudioEncoding { 716 // Default value. This value is unused. 717 AUDIO_ENCODING_UNSPECIFIED = 0; 718 719 // Headerless 16-bit signed little-endian PCM samples. 720 LINEAR16 = 1; 721 722 // Headerless 8-bit companded mulaw samples. 723 MULAW = 2; 724 725 // Headerless 8-bit companded alaw samples. 726 ALAW = 3; 727 } 728 729 // Required. Encoding of the audio data sent for recognition. 730 AudioEncoding encoding = 1 [(google.api.field_behavior) = REQUIRED]; 731 732 // Sample rate in Hertz of the audio data sent for recognition. Valid 733 // values are: 8000-48000. 16000 is optimal. For best results, set the 734 // sampling rate of the audio source to 16000 Hz. If that's not possible, use 735 // the native sample rate of the audio source (instead of re-sampling). 736 // Supported for the following encodings: 737 // 738 // * LINEAR16: Headerless 16-bit signed little-endian PCM samples. 739 // 740 // * MULAW: Headerless 8-bit companded mulaw samples. 741 // 742 // * ALAW: Headerless 8-bit companded alaw samples. 743 int32 sample_rate_hertz = 2; 744 745 // Number of channels present in the audio data sent for recognition. 746 // Supported for the following encodings: 747 // 748 // * LINEAR16: Headerless 16-bit signed little-endian PCM samples. 749 // 750 // * MULAW: Headerless 8-bit companded mulaw samples. 751 // 752 // * ALAW: Headerless 8-bit companded alaw samples. 753 // 754 // The maximum allowed value is 8. 755 int32 audio_channel_count = 3; 756} 757 758// Configuration to enable speaker diarization. 759message SpeakerDiarizationConfig { 760 // Required. Minimum number of speakers in the conversation. This range gives 761 // you more flexibility by allowing the system to automatically determine the 762 // correct number of speakers. 763 // 764 // To fix the number of speakers detected in the audio, set 765 // `min_speaker_count` = `max_speaker_count`. 766 int32 min_speaker_count = 2 [(google.api.field_behavior) = REQUIRED]; 767 768 // Required. Maximum number of speakers in the conversation. Valid values are: 769 // 1-6. Must be >= `min_speaker_count`. This range gives you more flexibility 770 // by allowing the system to automatically determine the correct number of 771 // speakers. 772 int32 max_speaker_count = 3 [(google.api.field_behavior) = REQUIRED]; 773} 774 775// Available recognition features. 776message RecognitionFeatures { 777 // Options for how to recognize multi-channel audio. 778 enum MultiChannelMode { 779 // Default value for the multi-channel mode. If the audio contains 780 // multiple channels, only the first channel will be transcribed; other 781 // channels will be ignored. 782 MULTI_CHANNEL_MODE_UNSPECIFIED = 0; 783 784 // If selected, each channel in the provided audio is transcribed 785 // independently. This cannot be selected if the selected 786 // [model][google.cloud.speech.v2.Recognizer.model] is `latest_short`. 787 SEPARATE_RECOGNITION_PER_CHANNEL = 1; 788 } 789 790 // If set to `true`, the server will attempt to filter out profanities, 791 // replacing all but the initial character in each filtered word with 792 // asterisks, for instance, "f***". If set to `false` or omitted, profanities 793 // won't be filtered out. 794 bool profanity_filter = 1; 795 796 // If `true`, the top result includes a list of words and the start and end 797 // time offsets (timestamps) for those words. If `false`, no word-level time 798 // offset information is returned. The default is `false`. 799 bool enable_word_time_offsets = 2; 800 801 // If `true`, the top result includes a list of words and the confidence for 802 // those words. If `false`, no word-level confidence information is returned. 803 // The default is `false`. 804 bool enable_word_confidence = 3; 805 806 // If `true`, adds punctuation to recognition result hypotheses. This feature 807 // is only available in select languages. The default `false` value does not 808 // add punctuation to result hypotheses. 809 bool enable_automatic_punctuation = 4; 810 811 // The spoken punctuation behavior for the call. If `true`, replaces spoken 812 // punctuation with the corresponding symbols in the request. For example, 813 // "how are you question mark" becomes "how are you?". See 814 // https://cloud.google.com/speech-to-text/docs/spoken-punctuation for 815 // support. If `false`, spoken punctuation is not replaced. 816 bool enable_spoken_punctuation = 14; 817 818 // The spoken emoji behavior for the call. If `true`, adds spoken emoji 819 // formatting for the request. This will replace spoken emojis with the 820 // corresponding Unicode symbols in the final transcript. If `false`, spoken 821 // emojis are not replaced. 822 bool enable_spoken_emojis = 15; 823 824 // Mode for recognizing multi-channel audio. 825 MultiChannelMode multi_channel_mode = 17; 826 827 // Configuration to enable speaker diarization and set additional 828 // parameters to make diarization better suited for your application. 829 // When this is enabled, we send all the words from the beginning of the 830 // audio for the top alternative in every consecutive STREAMING responses. 831 // This is done in order to improve our speaker tags as our models learn to 832 // identify the speakers in the conversation over time. 833 // For non-streaming requests, the diarization results will be provided only 834 // in the top alternative of the FINAL SpeechRecognitionResult. 835 SpeakerDiarizationConfig diarization_config = 9; 836 837 // Maximum number of recognition hypotheses to be returned. 838 // The server may return fewer than `max_alternatives`. 839 // Valid values are `0`-`30`. A value of `0` or `1` will return a maximum of 840 // one. If omitted, will return a maximum of one. 841 int32 max_alternatives = 16; 842} 843 844// Transcription normalization configuration. Use transcription normalization 845// to automatically replace parts of the transcript with phrases of your 846// choosing. For StreamingRecognize, this normalization only applies to stable 847// partial transcripts (stability > 0.8) and final transcripts. 848message TranscriptNormalization { 849 // A single replacement configuration. 850 message Entry { 851 // What to replace. Max length is 100 characters. 852 string search = 1; 853 854 // What to replace with. Max length is 100 characters. 855 string replace = 2; 856 857 // Whether the search is case sensitive. 858 bool case_sensitive = 3; 859 } 860 861 // A list of replacement entries. We will perform replacement with one entry 862 // at a time. For example, the second entry in ["cat" => "dog", "mountain cat" 863 // => "mountain dog"] will never be applied because we will always process the 864 // first entry before it. At most 100 entries. 865 repeated Entry entries = 1; 866} 867 868// Translation configuration. Use to translate the given audio into text for the 869// desired language. 870message TranslationConfig { 871 // Required. The language code to translate to. 872 string target_language = 1 [(google.api.field_behavior) = REQUIRED]; 873} 874 875// Provides "hints" to the speech recognizer to favor specific words and phrases 876// in the results. PhraseSets can be specified as an inline resource, or a 877// reference to an existing PhraseSet resource. 878message SpeechAdaptation { 879 // A biasing PhraseSet, which can be either a string referencing the name of 880 // an existing PhraseSets resource, or an inline definition of a PhraseSet. 881 message AdaptationPhraseSet { 882 oneof value { 883 // The name of an existing PhraseSet resource. The user must have read 884 // access to the resource and it must not be deleted. 885 string phrase_set = 1 [(google.api.resource_reference) = { 886 type: "speech.googleapis.com/PhraseSet" 887 }]; 888 889 // An inline defined PhraseSet. 890 PhraseSet inline_phrase_set = 2; 891 } 892 } 893 894 // A list of inline or referenced PhraseSets. 895 repeated AdaptationPhraseSet phrase_sets = 1; 896 897 // A list of inline CustomClasses. Existing CustomClass resources can be 898 // referenced directly in a PhraseSet. 899 repeated CustomClass custom_classes = 2; 900} 901 902// Provides information to the Recognizer that specifies how to process the 903// recognition request. 904message RecognitionConfig { 905 // Decoding parameters for audio being sent for recognition. 906 oneof decoding_config { 907 // Automatically detect decoding parameters. 908 // Preferred for supported formats. 909 AutoDetectDecodingConfig auto_decoding_config = 7; 910 911 // Explicitly specified decoding parameters. 912 // Required if using headerless PCM audio (linear16, mulaw, alaw). 913 ExplicitDecodingConfig explicit_decoding_config = 8; 914 } 915 916 // Optional. Which model to use for recognition requests. Select the model 917 // best suited to your domain to get best results. 918 // 919 // Guidance for choosing which model to use can be found in the [Transcription 920 // Models 921 // Documentation](https://cloud.google.com/speech-to-text/v2/docs/transcription-model) 922 // and the models supported in each region can be found in the [Table Of 923 // Supported 924 // Models](https://cloud.google.com/speech-to-text/v2/docs/speech-to-text-supported-languages). 925 string model = 9 [(google.api.field_behavior) = OPTIONAL]; 926 927 // Optional. The language of the supplied audio as a 928 // [BCP-47](https://www.rfc-editor.org/rfc/bcp/bcp47.txt) language tag. 929 // Language tags are normalized to BCP-47 before they are used eg "en-us" 930 // becomes "en-US". 931 // 932 // Supported languages for each model are listed in the [Table of Supported 933 // Models](https://cloud.google.com/speech-to-text/v2/docs/speech-to-text-supported-languages). 934 // 935 // If additional languages are provided, recognition result will contain 936 // recognition in the most likely language detected. The recognition result 937 // will include the language tag of the language detected in the audio. 938 repeated string language_codes = 10 [(google.api.field_behavior) = OPTIONAL]; 939 940 // Speech recognition features to enable. 941 RecognitionFeatures features = 2; 942 943 // Speech adaptation context that weights recognizer predictions for specific 944 // words and phrases. 945 SpeechAdaptation adaptation = 6; 946 947 // Optional. Use transcription normalization to automatically replace parts of 948 // the transcript with phrases of your choosing. For StreamingRecognize, this 949 // normalization only applies to stable partial transcripts (stability > 0.8) 950 // and final transcripts. 951 TranscriptNormalization transcript_normalization = 11 952 [(google.api.field_behavior) = OPTIONAL]; 953 954 // Optional. Optional configuration used to automatically run translation on 955 // the given audio to the desired language for supported models. 956 TranslationConfig translation_config = 15 957 [(google.api.field_behavior) = OPTIONAL]; 958} 959 960// Request message for the 961// [Recognize][google.cloud.speech.v2.Speech.Recognize] method. Either 962// `content` or `uri` must be supplied. Supplying both or neither returns 963// [INVALID_ARGUMENT][google.rpc.Code.INVALID_ARGUMENT]. See [content 964// limits](https://cloud.google.com/speech-to-text/quotas#content). 965message RecognizeRequest { 966 // Required. The name of the Recognizer to use during recognition. The 967 // expected format is 968 // `projects/{project}/locations/{location}/recognizers/{recognizer}`. The 969 // {recognizer} segment may be set to `_` to use an empty implicit Recognizer. 970 string recognizer = 3 [ 971 (google.api.field_behavior) = REQUIRED, 972 (google.api.resource_reference) = { 973 type: "speech.googleapis.com/Recognizer" 974 } 975 ]; 976 977 // Features and audio metadata to use for the Automatic Speech Recognition. 978 // This field in combination with the 979 // [config_mask][google.cloud.speech.v2.RecognizeRequest.config_mask] field 980 // can be used to override parts of the 981 // [default_recognition_config][google.cloud.speech.v2.Recognizer.default_recognition_config] 982 // of the Recognizer resource. 983 RecognitionConfig config = 1; 984 985 // The list of fields in 986 // [config][google.cloud.speech.v2.RecognizeRequest.config] that override the 987 // values in the 988 // [default_recognition_config][google.cloud.speech.v2.Recognizer.default_recognition_config] 989 // of the recognizer during this recognition request. If no mask is provided, 990 // all non-default valued fields in 991 // [config][google.cloud.speech.v2.RecognizeRequest.config] override the 992 // values in the recognizer for this recognition request. If a mask is 993 // provided, only the fields listed in the mask override the config in the 994 // recognizer for this recognition request. If a wildcard (`*`) is provided, 995 // [config][google.cloud.speech.v2.RecognizeRequest.config] completely 996 // overrides and replaces the config in the recognizer for this recognition 997 // request. 998 google.protobuf.FieldMask config_mask = 8; 999 1000 // The audio source, which is either inline content or a Google Cloud 1001 // Storage URI. 1002 oneof audio_source { 1003 // The audio data bytes encoded as specified in 1004 // [RecognitionConfig][google.cloud.speech.v2.RecognitionConfig]. As 1005 // with all bytes fields, proto buffers use a pure binary representation, 1006 // whereas JSON representations use base64. 1007 bytes content = 5; 1008 1009 // URI that points to a file that contains audio data bytes as specified in 1010 // [RecognitionConfig][google.cloud.speech.v2.RecognitionConfig]. The file 1011 // must not be compressed (for example, gzip). Currently, only Google Cloud 1012 // Storage URIs are supported, which must be specified in the following 1013 // format: `gs://bucket_name/object_name` (other URI formats return 1014 // [INVALID_ARGUMENT][google.rpc.Code.INVALID_ARGUMENT]). For more 1015 // information, see [Request 1016 // URIs](https://cloud.google.com/storage/docs/reference-uris). 1017 string uri = 6; 1018 } 1019} 1020 1021// Metadata about the recognition request and response. 1022message RecognitionResponseMetadata { 1023 // When available, billed audio seconds for the corresponding request. 1024 google.protobuf.Duration total_billed_duration = 6; 1025} 1026 1027// Alternative hypotheses (a.k.a. n-best list). 1028message SpeechRecognitionAlternative { 1029 // Transcript text representing the words that the user spoke. 1030 string transcript = 1; 1031 1032 // The confidence estimate between 0.0 and 1.0. A higher number 1033 // indicates an estimated greater likelihood that the recognized words are 1034 // correct. This field is set only for the top alternative of a non-streaming 1035 // result or, of a streaming result where 1036 // [is_final][google.cloud.speech.v2.StreamingRecognitionResult.is_final] is 1037 // set to `true`. This field is not guaranteed to be accurate and users should 1038 // not rely on it to be always provided. The default of 0.0 is a sentinel 1039 // value indicating `confidence` was not set. 1040 float confidence = 2; 1041 1042 // A list of word-specific information for each recognized word. 1043 // When the 1044 // [SpeakerDiarizationConfig][google.cloud.speech.v2.SpeakerDiarizationConfig] 1045 // is set, you will see all the words from the beginning of the audio. 1046 repeated WordInfo words = 3; 1047} 1048 1049// Word-specific information for recognized words. 1050message WordInfo { 1051 // Time offset relative to the beginning of the audio, 1052 // and corresponding to the start of the spoken word. 1053 // This field is only set if 1054 // [enable_word_time_offsets][google.cloud.speech.v2.RecognitionFeatures.enable_word_time_offsets] 1055 // is `true` and only in the top hypothesis. This is an experimental feature 1056 // and the accuracy of the time offset can vary. 1057 google.protobuf.Duration start_offset = 1; 1058 1059 // Time offset relative to the beginning of the audio, 1060 // and corresponding to the end of the spoken word. 1061 // This field is only set if 1062 // [enable_word_time_offsets][google.cloud.speech.v2.RecognitionFeatures.enable_word_time_offsets] 1063 // is `true` and only in the top hypothesis. This is an experimental feature 1064 // and the accuracy of the time offset can vary. 1065 google.protobuf.Duration end_offset = 2; 1066 1067 // The word corresponding to this set of information. 1068 string word = 3; 1069 1070 // The confidence estimate between 0.0 and 1.0. A higher number 1071 // indicates an estimated greater likelihood that the recognized words are 1072 // correct. This field is set only for the top alternative of a non-streaming 1073 // result or, of a streaming result where 1074 // [is_final][google.cloud.speech.v2.StreamingRecognitionResult.is_final] is 1075 // set to `true`. This field is not guaranteed to be accurate and users should 1076 // not rely on it to be always provided. The default of 0.0 is a sentinel 1077 // value indicating `confidence` was not set. 1078 float confidence = 4; 1079 1080 // A distinct label is assigned for every speaker within the audio. This field 1081 // specifies which one of those speakers was detected to have spoken this 1082 // word. `speaker_label` is set if 1083 // [SpeakerDiarizationConfig][google.cloud.speech.v2.SpeakerDiarizationConfig] 1084 // is given and only in the top alternative. 1085 string speaker_label = 6; 1086} 1087 1088// A speech recognition result corresponding to a portion of the audio. 1089message SpeechRecognitionResult { 1090 // May contain one or more recognition hypotheses. These alternatives are 1091 // ordered in terms of accuracy, with the top (first) alternative being the 1092 // most probable, as ranked by the recognizer. 1093 repeated SpeechRecognitionAlternative alternatives = 1; 1094 1095 // For multi-channel audio, this is the channel number corresponding to the 1096 // recognized result for the audio from that channel. 1097 // For `audio_channel_count` = `N`, its output values can range from `1` to 1098 // `N`. 1099 int32 channel_tag = 2; 1100 1101 // Time offset of the end of this result relative to the beginning of the 1102 // audio. 1103 google.protobuf.Duration result_end_offset = 4; 1104 1105 // Output only. The [BCP-47](https://www.rfc-editor.org/rfc/bcp/bcp47.txt) 1106 // language tag of the language in this result. This language code was 1107 // detected to have the most likelihood of being spoken in the audio. 1108 string language_code = 5 [(google.api.field_behavior) = OUTPUT_ONLY]; 1109} 1110 1111// Response message for the 1112// [Recognize][google.cloud.speech.v2.Speech.Recognize] method. 1113message RecognizeResponse { 1114 // Sequential list of transcription results corresponding to sequential 1115 // portions of audio. 1116 repeated SpeechRecognitionResult results = 3; 1117 1118 // Metadata about the recognition. 1119 RecognitionResponseMetadata metadata = 2; 1120} 1121 1122// Available recognition features specific to streaming recognition requests. 1123message StreamingRecognitionFeatures { 1124 // Events that a timeout can be set on for voice activity. 1125 message VoiceActivityTimeout { 1126 // Duration to timeout the stream if no speech begins. If this is set and 1127 // no speech is detected in this duration at the start of the stream, the 1128 // server will close the stream. 1129 google.protobuf.Duration speech_start_timeout = 1; 1130 1131 // Duration to timeout the stream after speech ends. If this is set and no 1132 // speech is detected in this duration after speech was detected, the server 1133 // will close the stream. 1134 google.protobuf.Duration speech_end_timeout = 2; 1135 } 1136 1137 // If `true`, responses with voice activity speech events will be returned as 1138 // they are detected. 1139 bool enable_voice_activity_events = 1; 1140 1141 // Whether or not to stream interim results to the client. If set to true, 1142 // interim results will be streamed to the client. Otherwise, only the final 1143 // response will be streamed back. 1144 bool interim_results = 2; 1145 1146 // If set, the server will automatically close the stream after the specified 1147 // duration has elapsed after the last VOICE_ACTIVITY speech event has been 1148 // sent. The field `voice_activity_events` must also be set to true. 1149 VoiceActivityTimeout voice_activity_timeout = 3; 1150} 1151 1152// Provides configuration information for the StreamingRecognize request. 1153message StreamingRecognitionConfig { 1154 // Required. Features and audio metadata to use for the Automatic Speech 1155 // Recognition. This field in combination with the 1156 // [config_mask][google.cloud.speech.v2.StreamingRecognitionConfig.config_mask] 1157 // field can be used to override parts of the 1158 // [default_recognition_config][google.cloud.speech.v2.Recognizer.default_recognition_config] 1159 // of the Recognizer resource. 1160 RecognitionConfig config = 1 [(google.api.field_behavior) = REQUIRED]; 1161 1162 // The list of fields in 1163 // [config][google.cloud.speech.v2.StreamingRecognitionConfig.config] that 1164 // override the values in the 1165 // [default_recognition_config][google.cloud.speech.v2.Recognizer.default_recognition_config] 1166 // of the recognizer during this recognition request. If no mask is provided, 1167 // all non-default valued fields in 1168 // [config][google.cloud.speech.v2.StreamingRecognitionConfig.config] override 1169 // the values in the Recognizer for this recognition request. If a mask is 1170 // provided, only the fields listed in the mask override the config in the 1171 // Recognizer for this recognition request. If a wildcard (`*`) is provided, 1172 // [config][google.cloud.speech.v2.StreamingRecognitionConfig.config] 1173 // completely overrides and replaces the config in the recognizer for this 1174 // recognition request. 1175 google.protobuf.FieldMask config_mask = 3; 1176 1177 // Speech recognition features to enable specific to streaming audio 1178 // recognition requests. 1179 StreamingRecognitionFeatures streaming_features = 2; 1180} 1181 1182// Request message for the 1183// [StreamingRecognize][google.cloud.speech.v2.Speech.StreamingRecognize] 1184// method. Multiple 1185// [StreamingRecognizeRequest][google.cloud.speech.v2.StreamingRecognizeRequest] 1186// messages are sent in one call. 1187// 1188// If the [Recognizer][google.cloud.speech.v2.Recognizer] referenced by 1189// [recognizer][google.cloud.speech.v2.StreamingRecognizeRequest.recognizer] 1190// contains a fully specified request configuration then the stream may only 1191// contain messages with only 1192// [audio][google.cloud.speech.v2.StreamingRecognizeRequest.audio] set. 1193// 1194// Otherwise the first message must contain a 1195// [recognizer][google.cloud.speech.v2.StreamingRecognizeRequest.recognizer] and 1196// a 1197// [streaming_config][google.cloud.speech.v2.StreamingRecognizeRequest.streaming_config] 1198// message that together fully specify the request configuration and must not 1199// contain [audio][google.cloud.speech.v2.StreamingRecognizeRequest.audio]. All 1200// subsequent messages must only have 1201// [audio][google.cloud.speech.v2.StreamingRecognizeRequest.audio] set. 1202message StreamingRecognizeRequest { 1203 // Required. The name of the Recognizer to use during recognition. The 1204 // expected format is 1205 // `projects/{project}/locations/{location}/recognizers/{recognizer}`. The 1206 // {recognizer} segment may be set to `_` to use an empty implicit Recognizer. 1207 string recognizer = 3 [ 1208 (google.api.field_behavior) = REQUIRED, 1209 (google.api.resource_reference) = { 1210 type: "speech.googleapis.com/Recognizer" 1211 } 1212 ]; 1213 1214 oneof streaming_request { 1215 // StreamingRecognitionConfig to be used in this recognition attempt. 1216 // If provided, it will override the default RecognitionConfig stored in the 1217 // Recognizer. 1218 StreamingRecognitionConfig streaming_config = 6; 1219 1220 // Inline audio bytes to be Recognized. 1221 // Maximum size for this field is 15 KB per request. 1222 bytes audio = 5; 1223 } 1224} 1225 1226// Request message for the 1227// [BatchRecognize][google.cloud.speech.v2.Speech.BatchRecognize] 1228// method. 1229message BatchRecognizeRequest { 1230 // Possible processing strategies for batch requests. 1231 enum ProcessingStrategy { 1232 // Default value for the processing strategy. The request is processed as 1233 // soon as its received. 1234 PROCESSING_STRATEGY_UNSPECIFIED = 0; 1235 1236 // If selected, processes the request during lower utilization periods for a 1237 // price discount. The request is fulfilled within 24 hours. 1238 DYNAMIC_BATCHING = 1; 1239 } 1240 1241 // Required. The name of the Recognizer to use during recognition. The 1242 // expected format is 1243 // `projects/{project}/locations/{location}/recognizers/{recognizer}`. The 1244 // {recognizer} segment may be set to `_` to use an empty implicit Recognizer. 1245 string recognizer = 1 [ 1246 (google.api.field_behavior) = REQUIRED, 1247 (google.api.resource_reference) = { 1248 type: "speech.googleapis.com/Recognizer" 1249 } 1250 ]; 1251 1252 // Features and audio metadata to use for the Automatic Speech Recognition. 1253 // This field in combination with the 1254 // [config_mask][google.cloud.speech.v2.BatchRecognizeRequest.config_mask] 1255 // field can be used to override parts of the 1256 // [default_recognition_config][google.cloud.speech.v2.Recognizer.default_recognition_config] 1257 // of the Recognizer resource. 1258 RecognitionConfig config = 4; 1259 1260 // The list of fields in 1261 // [config][google.cloud.speech.v2.BatchRecognizeRequest.config] that override 1262 // the values in the 1263 // [default_recognition_config][google.cloud.speech.v2.Recognizer.default_recognition_config] 1264 // of the recognizer during this recognition request. If no mask is provided, 1265 // all given fields in 1266 // [config][google.cloud.speech.v2.BatchRecognizeRequest.config] override the 1267 // values in the recognizer for this recognition request. If a mask is 1268 // provided, only the fields listed in the mask override the config in the 1269 // recognizer for this recognition request. If a wildcard (`*`) is provided, 1270 // [config][google.cloud.speech.v2.BatchRecognizeRequest.config] completely 1271 // overrides and replaces the config in the recognizer for this recognition 1272 // request. 1273 google.protobuf.FieldMask config_mask = 5; 1274 1275 // Audio files with file metadata for ASR. 1276 // The maximum number of files allowed to be specified is 5. 1277 repeated BatchRecognizeFileMetadata files = 3; 1278 1279 // Configuration options for where to output the transcripts of each file. 1280 RecognitionOutputConfig recognition_output_config = 6; 1281 1282 // Processing strategy to use for this request. 1283 ProcessingStrategy processing_strategy = 7; 1284} 1285 1286// Output configurations for Cloud Storage. 1287message GcsOutputConfig { 1288 // The Cloud Storage URI prefix with which recognition results will be 1289 // written. 1290 string uri = 1; 1291} 1292 1293// Output configurations for inline response. 1294message InlineOutputConfig {} 1295 1296// Output configurations for serialized `BatchRecognizeResults` protos. 1297message NativeOutputFileFormatConfig {} 1298 1299// Output configurations for [WebVTT](https://www.w3.org/TR/webvtt1/) formatted 1300// subtitle file. 1301message VttOutputFileFormatConfig {} 1302 1303// Output configurations [SubRip 1304// Text](https://www.matroska.org/technical/subtitles.html#srt-subtitles) 1305// formatted subtitle file. 1306message SrtOutputFileFormatConfig {} 1307 1308// Configuration for the format of the results stored to `output`. 1309message OutputFormatConfig { 1310 // Configuration for the native output format. If this field is set or if no 1311 // other output format field is set then transcripts will be written to the 1312 // sink in the native format. 1313 NativeOutputFileFormatConfig native = 1; 1314 1315 // Configuration for the vtt output format. If this field is set then 1316 // transcripts will be written to the sink in the vtt format. 1317 VttOutputFileFormatConfig vtt = 2; 1318 1319 // Configuration for the srt output format. If this field is set then 1320 // transcripts will be written to the sink in the srt format. 1321 SrtOutputFileFormatConfig srt = 3; 1322} 1323 1324// Configuration options for the output(s) of recognition. 1325message RecognitionOutputConfig { 1326 oneof output { 1327 // If this message is populated, recognition results are written to the 1328 // provided Google Cloud Storage URI. 1329 GcsOutputConfig gcs_output_config = 1; 1330 1331 // If this message is populated, recognition results are provided in the 1332 // [BatchRecognizeResponse][google.cloud.speech.v2.BatchRecognizeResponse] 1333 // message of the Operation when completed. This is only supported when 1334 // calling [BatchRecognize][google.cloud.speech.v2.Speech.BatchRecognize] 1335 // with just one audio file. 1336 InlineOutputConfig inline_response_config = 2; 1337 } 1338 1339 // Optional. Configuration for the format of the results stored to `output`. 1340 // If unspecified transcripts will be written in the `NATIVE` format only. 1341 OutputFormatConfig output_format_config = 3 1342 [(google.api.field_behavior) = OPTIONAL]; 1343} 1344 1345// Response message for 1346// [BatchRecognize][google.cloud.speech.v2.Speech.BatchRecognize] that is 1347// packaged into a longrunning [Operation][google.longrunning.Operation]. 1348message BatchRecognizeResponse { 1349 // Map from filename to the final result for that file. 1350 map<string, BatchRecognizeFileResult> results = 1; 1351 1352 // When available, billed audio seconds for the corresponding request. 1353 google.protobuf.Duration total_billed_duration = 2; 1354} 1355 1356// Output type for Cloud Storage of BatchRecognize transcripts. Though this 1357// proto isn't returned in this API anywhere, the Cloud Storage transcripts will 1358// be this proto serialized and should be parsed as such. 1359message BatchRecognizeResults { 1360 // Sequential list of transcription results corresponding to sequential 1361 // portions of audio. 1362 repeated SpeechRecognitionResult results = 1; 1363 1364 // Metadata about the recognition. 1365 RecognitionResponseMetadata metadata = 2; 1366} 1367 1368// Final results written to Cloud Storage. 1369message CloudStorageResult { 1370 // The Cloud Storage URI to which recognition results were written. 1371 string uri = 1; 1372 1373 // The Cloud Storage URI to which recognition results were written as VTT 1374 // formatted captions. This is populated only when `VTT` output is requested. 1375 string vtt_format_uri = 2; 1376 1377 // The Cloud Storage URI to which recognition results were written as SRT 1378 // formatted captions. This is populated only when `SRT` output is requested. 1379 string srt_format_uri = 3; 1380} 1381 1382// Final results returned inline in the recognition response. 1383message InlineResult { 1384 // The transcript for the audio file. 1385 BatchRecognizeResults transcript = 1; 1386 1387 // The transcript for the audio file as VTT formatted captions. This is 1388 // populated only when `VTT` output is requested. 1389 string vtt_captions = 2; 1390 1391 // The transcript for the audio file as SRT formatted captions. This is 1392 // populated only when `SRT` output is requested. 1393 string srt_captions = 3; 1394} 1395 1396// Final results for a single file. 1397message BatchRecognizeFileResult { 1398 // Error if one was encountered. 1399 google.rpc.Status error = 2; 1400 1401 RecognitionResponseMetadata metadata = 3; 1402 1403 oneof result { 1404 // Recognition results written to Cloud Storage. This is 1405 // populated only when 1406 // [GcsOutputConfig][google.cloud.speech.v2.GcsOutputConfig] is set in 1407 // the 1408 // [RecognitionOutputConfig][[google.cloud.speech.v2.RecognitionOutputConfig]. 1409 CloudStorageResult cloud_storage_result = 5; 1410 1411 // Recognition results. This is populated only when 1412 // [InlineOutputConfig][google.cloud.speech.v2.InlineOutputConfig] is set in 1413 // the 1414 // [RecognitionOutputConfig][[google.cloud.speech.v2.RecognitionOutputConfig]. 1415 InlineResult inline_result = 6; 1416 } 1417 1418 // Deprecated. Use `cloud_storage_result.native_format_uri` instead. 1419 string uri = 1 [deprecated = true]; 1420 1421 // Deprecated. Use `inline_result.transcript` instead. 1422 BatchRecognizeResults transcript = 4 [deprecated = true]; 1423} 1424 1425// Metadata about transcription for a single file (for example, progress 1426// percent). 1427message BatchRecognizeTranscriptionMetadata { 1428 // How much of the file has been transcribed so far. 1429 int32 progress_percent = 1; 1430 1431 // Error if one was encountered. 1432 google.rpc.Status error = 2; 1433 1434 // The Cloud Storage URI to which recognition results will be written. 1435 string uri = 3; 1436} 1437 1438// Operation metadata for 1439// [BatchRecognize][google.cloud.speech.v2.Speech.BatchRecognize]. 1440message BatchRecognizeMetadata { 1441 // Map from provided filename to the transcription metadata for that file. 1442 map<string, BatchRecognizeTranscriptionMetadata> transcription_metadata = 1; 1443} 1444 1445// Metadata about a single file in a batch for BatchRecognize. 1446message BatchRecognizeFileMetadata { 1447 // The audio source, which is a Google Cloud Storage URI. 1448 oneof audio_source { 1449 // Cloud Storage URI for the audio file. 1450 string uri = 1; 1451 } 1452 1453 // Features and audio metadata to use for the Automatic Speech Recognition. 1454 // This field in combination with the 1455 // [config_mask][google.cloud.speech.v2.BatchRecognizeFileMetadata.config_mask] 1456 // field can be used to override parts of the 1457 // [default_recognition_config][google.cloud.speech.v2.Recognizer.default_recognition_config] 1458 // of the Recognizer resource as well as the 1459 // [config][google.cloud.speech.v2.BatchRecognizeRequest.config] at the 1460 // request level. 1461 RecognitionConfig config = 4; 1462 1463 // The list of fields in 1464 // [config][google.cloud.speech.v2.BatchRecognizeFileMetadata.config] that 1465 // override the values in the 1466 // [default_recognition_config][google.cloud.speech.v2.Recognizer.default_recognition_config] 1467 // of the recognizer during this recognition request. If no mask is provided, 1468 // all non-default valued fields in 1469 // [config][google.cloud.speech.v2.BatchRecognizeFileMetadata.config] override 1470 // the values in the recognizer for this recognition request. If a mask is 1471 // provided, only the fields listed in the mask override the config in the 1472 // recognizer for this recognition request. If a wildcard (`*`) is provided, 1473 // [config][google.cloud.speech.v2.BatchRecognizeFileMetadata.config] 1474 // completely overrides and replaces the config in the recognizer for this 1475 // recognition request. 1476 google.protobuf.FieldMask config_mask = 5; 1477} 1478 1479// A streaming speech recognition result corresponding to a portion of the audio 1480// that is currently being processed. 1481message StreamingRecognitionResult { 1482 // May contain one or more recognition hypotheses. These alternatives are 1483 // ordered in terms of accuracy, with the top (first) alternative being the 1484 // most probable, as ranked by the recognizer. 1485 repeated SpeechRecognitionAlternative alternatives = 1; 1486 1487 // If `false`, this 1488 // [StreamingRecognitionResult][google.cloud.speech.v2.StreamingRecognitionResult] 1489 // represents an interim result that may change. If `true`, this is the final 1490 // time the speech service will return this particular 1491 // [StreamingRecognitionResult][google.cloud.speech.v2.StreamingRecognitionResult], 1492 // the recognizer will not return any further hypotheses for this portion of 1493 // the transcript and corresponding audio. 1494 bool is_final = 2; 1495 1496 // An estimate of the likelihood that the recognizer will not change its guess 1497 // about this interim result. Values range from 0.0 (completely unstable) 1498 // to 1.0 (completely stable). This field is only provided for interim results 1499 // ([is_final][google.cloud.speech.v2.StreamingRecognitionResult.is_final]=`false`). 1500 // The default of 0.0 is a sentinel value indicating `stability` was not set. 1501 float stability = 3; 1502 1503 // Time offset of the end of this result relative to the beginning of the 1504 // audio. 1505 google.protobuf.Duration result_end_offset = 4; 1506 1507 // For multi-channel audio, this is the channel number corresponding to the 1508 // recognized result for the audio from that channel. 1509 // For 1510 // `audio_channel_count` = `N`, its output values can range from `1` to `N`. 1511 int32 channel_tag = 5; 1512 1513 // Output only. The [BCP-47](https://www.rfc-editor.org/rfc/bcp/bcp47.txt) 1514 // language tag of the language in this result. This language code was 1515 // detected to have the most likelihood of being spoken in the audio. 1516 string language_code = 6 [(google.api.field_behavior) = OUTPUT_ONLY]; 1517} 1518 1519// `StreamingRecognizeResponse` is the only message returned to the client by 1520// `StreamingRecognize`. A series of zero or more `StreamingRecognizeResponse` 1521// messages are streamed back to the client. If there is no recognizable 1522// audio then no messages are streamed back to the client. 1523// 1524// Here are some examples of `StreamingRecognizeResponse`s that might 1525// be returned while processing audio: 1526// 1527// 1. results { alternatives { transcript: "tube" } stability: 0.01 } 1528// 1529// 2. results { alternatives { transcript: "to be a" } stability: 0.01 } 1530// 1531// 3. results { alternatives { transcript: "to be" } stability: 0.9 } 1532// results { alternatives { transcript: " or not to be" } stability: 0.01 } 1533// 1534// 4. results { alternatives { transcript: "to be or not to be" 1535// confidence: 0.92 } 1536// alternatives { transcript: "to bee or not to bee" } 1537// is_final: true } 1538// 1539// 5. results { alternatives { transcript: " that's" } stability: 0.01 } 1540// 1541// 6. results { alternatives { transcript: " that is" } stability: 0.9 } 1542// results { alternatives { transcript: " the question" } stability: 0.01 } 1543// 1544// 7. results { alternatives { transcript: " that is the question" 1545// confidence: 0.98 } 1546// alternatives { transcript: " that was the question" } 1547// is_final: true } 1548// 1549// Notes: 1550// 1551// - Only two of the above responses #4 and #7 contain final results; they are 1552// indicated by `is_final: true`. Concatenating these together generates the 1553// full transcript: "to be or not to be that is the question". 1554// 1555// - The others contain interim `results`. #3 and #6 contain two interim 1556// `results`: the first portion has a high stability and is less likely to 1557// change; the second portion has a low stability and is very likely to 1558// change. A UI designer might choose to show only high stability `results`. 1559// 1560// - The specific `stability` and `confidence` values shown above are only for 1561// illustrative purposes. Actual values may vary. 1562// 1563// - In each response, only one of these fields will be set: 1564// `error`, 1565// `speech_event_type`, or 1566// one or more (repeated) `results`. 1567message StreamingRecognizeResponse { 1568 // Indicates the type of speech event. 1569 enum SpeechEventType { 1570 // No speech event specified. 1571 SPEECH_EVENT_TYPE_UNSPECIFIED = 0; 1572 1573 // This event indicates that the server has detected the end of the user's 1574 // speech utterance and expects no additional speech. Therefore, the server 1575 // will not process additional audio and will close the gRPC bidirectional 1576 // stream. This event is only sent if there was a force cutoff due to 1577 // silence being detected early. This event is only available through the 1578 // `latest_short` [model][google.cloud.speech.v2.Recognizer.model]. 1579 END_OF_SINGLE_UTTERANCE = 1; 1580 1581 // This event indicates that the server has detected the beginning of human 1582 // voice activity in the stream. This event can be returned multiple times 1583 // if speech starts and stops repeatedly throughout the stream. This event 1584 // is only sent if `voice_activity_events` is set to true. 1585 SPEECH_ACTIVITY_BEGIN = 2; 1586 1587 // This event indicates that the server has detected the end of human voice 1588 // activity in the stream. This event can be returned multiple times if 1589 // speech starts and stops repeatedly throughout the stream. This event is 1590 // only sent if `voice_activity_events` is set to true. 1591 SPEECH_ACTIVITY_END = 3; 1592 } 1593 1594 // This repeated list contains zero or more results that 1595 // correspond to consecutive portions of the audio currently being processed. 1596 // It contains zero or one 1597 // [is_final][google.cloud.speech.v2.StreamingRecognitionResult.is_final]=`true` 1598 // result (the newly settled portion), followed by zero or more 1599 // [is_final][google.cloud.speech.v2.StreamingRecognitionResult.is_final]=`false` 1600 // results (the interim results). 1601 repeated StreamingRecognitionResult results = 6; 1602 1603 // Indicates the type of speech event. 1604 SpeechEventType speech_event_type = 3; 1605 1606 // Time offset between the beginning of the audio and event emission. 1607 google.protobuf.Duration speech_event_offset = 7; 1608 1609 // Metadata about the recognition. 1610 RecognitionResponseMetadata metadata = 5; 1611} 1612 1613// Message representing the config for the Speech-to-Text API. This includes an 1614// optional [KMS key](https://cloud.google.com/kms/docs/resource-hierarchy#keys) 1615// with which incoming data will be encrypted. 1616message Config { 1617 option (google.api.resource) = { 1618 type: "speech.googleapis.com/Config" 1619 pattern: "projects/{project}/locations/{location}/config" 1620 }; 1621 1622 // Output only. Identifier. The name of the config resource. There is exactly 1623 // one config resource per project per location. The expected format is 1624 // `projects/{project}/locations/{location}/config`. 1625 string name = 1 [ 1626 (google.api.field_behavior) = OUTPUT_ONLY, 1627 (google.api.field_behavior) = IDENTIFIER 1628 ]; 1629 1630 // Optional. An optional [KMS key 1631 // name](https://cloud.google.com/kms/docs/resource-hierarchy#keys) that if 1632 // present, will be used to encrypt Speech-to-Text resources at-rest. Updating 1633 // this key will not encrypt existing resources using this key; only new 1634 // resources will be encrypted using this key. The expected format is 1635 // `projects/{project}/locations/{location}/keyRings/{key_ring}/cryptoKeys/{crypto_key}`. 1636 string kms_key_name = 2 [ 1637 (google.api.field_behavior) = OPTIONAL, 1638 (google.api.resource_reference) = { 1639 type: "cloudkms.googleapis.com/CryptoKey" 1640 } 1641 ]; 1642 1643 // Output only. The most recent time this resource was modified. 1644 google.protobuf.Timestamp update_time = 3 1645 [(google.api.field_behavior) = OUTPUT_ONLY]; 1646} 1647 1648// Request message for the 1649// [GetConfig][google.cloud.speech.v2.Speech.GetConfig] method. 1650message GetConfigRequest { 1651 // Required. The name of the config to retrieve. There is exactly one config 1652 // resource per project per location. The expected format is 1653 // `projects/{project}/locations/{location}/config`. 1654 string name = 1 [ 1655 (google.api.field_behavior) = REQUIRED, 1656 (google.api.resource_reference) = { type: "speech.googleapis.com/Config" } 1657 ]; 1658} 1659 1660// Request message for the 1661// [UpdateConfig][google.cloud.speech.v2.Speech.UpdateConfig] method. 1662message UpdateConfigRequest { 1663 // Required. The config to update. 1664 // 1665 // The config's `name` field is used to identify the config to be updated. 1666 // The expected format is `projects/{project}/locations/{location}/config`. 1667 Config config = 1 [(google.api.field_behavior) = REQUIRED]; 1668 1669 // The list of fields to be updated. 1670 google.protobuf.FieldMask update_mask = 2; 1671} 1672 1673// CustomClass for biasing in speech recognition. Used to define a set of words 1674// or phrases that represents a common concept or theme likely to appear in your 1675// audio, for example a list of passenger ship names. 1676message CustomClass { 1677 option (google.api.resource) = { 1678 type: "speech.googleapis.com/CustomClass" 1679 pattern: "projects/{project}/locations/{location}/customClasses/{custom_class}" 1680 style: DECLARATIVE_FRIENDLY 1681 }; 1682 1683 // An item of the class. 1684 message ClassItem { 1685 // The class item's value. 1686 string value = 1; 1687 } 1688 1689 // Set of states that define the lifecycle of a CustomClass. 1690 enum State { 1691 // Unspecified state. This is only used/useful for distinguishing 1692 // unset values. 1693 STATE_UNSPECIFIED = 0; 1694 1695 // The normal and active state. 1696 ACTIVE = 2; 1697 1698 // This CustomClass has been deleted. 1699 DELETED = 4; 1700 } 1701 1702 // Output only. Identifier. The resource name of the CustomClass. 1703 // Format: 1704 // `projects/{project}/locations/{location}/customClasses/{custom_class}`. 1705 string name = 1 [ 1706 (google.api.field_behavior) = OUTPUT_ONLY, 1707 (google.api.field_behavior) = IDENTIFIER 1708 ]; 1709 1710 // Output only. System-assigned unique identifier for the CustomClass. 1711 string uid = 2 [(google.api.field_behavior) = OUTPUT_ONLY]; 1712 1713 // Optional. User-settable, human-readable name for the CustomClass. Must be 1714 // 63 characters or less. 1715 string display_name = 4 [(google.api.field_behavior) = OPTIONAL]; 1716 1717 // A collection of class items. 1718 repeated ClassItem items = 5; 1719 1720 // Output only. The CustomClass lifecycle state. 1721 State state = 15 [(google.api.field_behavior) = OUTPUT_ONLY]; 1722 1723 // Output only. Creation time. 1724 google.protobuf.Timestamp create_time = 6 1725 [(google.api.field_behavior) = OUTPUT_ONLY]; 1726 1727 // Output only. The most recent time this resource was modified. 1728 google.protobuf.Timestamp update_time = 7 1729 [(google.api.field_behavior) = OUTPUT_ONLY]; 1730 1731 // Output only. The time at which this resource was requested for deletion. 1732 google.protobuf.Timestamp delete_time = 8 1733 [(google.api.field_behavior) = OUTPUT_ONLY]; 1734 1735 // Output only. The time at which this resource will be purged. 1736 google.protobuf.Timestamp expire_time = 9 1737 [(google.api.field_behavior) = OUTPUT_ONLY]; 1738 1739 // Optional. Allows users to store small amounts of arbitrary data. 1740 // Both the key and the value must be 63 characters or less each. 1741 // At most 100 annotations. 1742 map<string, string> annotations = 10 [(google.api.field_behavior) = OPTIONAL]; 1743 1744 // Output only. This checksum is computed by the server based on the value of 1745 // other fields. This may be sent on update, undelete, and delete requests to 1746 // ensure the client has an up-to-date value before proceeding. 1747 string etag = 11 [(google.api.field_behavior) = OUTPUT_ONLY]; 1748 1749 // Output only. Whether or not this CustomClass is in the process of being 1750 // updated. 1751 bool reconciling = 12 [(google.api.field_behavior) = OUTPUT_ONLY]; 1752 1753 // Output only. The [KMS key 1754 // name](https://cloud.google.com/kms/docs/resource-hierarchy#keys) with which 1755 // the CustomClass is encrypted. The expected format is 1756 // `projects/{project}/locations/{location}/keyRings/{key_ring}/cryptoKeys/{crypto_key}`. 1757 string kms_key_name = 13 [ 1758 (google.api.field_behavior) = OUTPUT_ONLY, 1759 (google.api.resource_reference) = { 1760 type: "cloudkms.googleapis.com/CryptoKey" 1761 } 1762 ]; 1763 1764 // Output only. The [KMS key version 1765 // name](https://cloud.google.com/kms/docs/resource-hierarchy#key_versions) 1766 // with which the CustomClass is encrypted. The expected format is 1767 // `projects/{project}/locations/{location}/keyRings/{key_ring}/cryptoKeys/{crypto_key}/cryptoKeyVersions/{crypto_key_version}`. 1768 string kms_key_version_name = 14 [ 1769 (google.api.field_behavior) = OUTPUT_ONLY, 1770 (google.api.resource_reference) = { 1771 type: "cloudkms.googleapis.com/CryptoKeyVersion" 1772 } 1773 ]; 1774} 1775 1776// PhraseSet for biasing in speech recognition. A PhraseSet is used to provide 1777// "hints" to the speech recognizer to favor specific words and phrases in the 1778// results. 1779message PhraseSet { 1780 option (google.api.resource) = { 1781 type: "speech.googleapis.com/PhraseSet" 1782 pattern: "projects/{project}/locations/{location}/phraseSets/{phrase_set}" 1783 style: DECLARATIVE_FRIENDLY 1784 }; 1785 1786 // A Phrase contains words and phrase "hints" so that the speech recognition 1787 // is more likely to recognize them. This can be used to improve the accuracy 1788 // for specific words and phrases, for example, if specific commands are 1789 // typically spoken by the user. This can also be used to add additional words 1790 // to the vocabulary of the recognizer. 1791 // 1792 // List items can also include CustomClass references containing groups of 1793 // words that represent common concepts that occur in natural language. 1794 message Phrase { 1795 // The phrase itself. 1796 string value = 1; 1797 1798 // Hint Boost. Overrides the boost set at the phrase set level. 1799 // Positive value will increase the probability that a specific phrase will 1800 // be recognized over other similar sounding phrases. The higher the boost, 1801 // the higher the chance of false positive recognition as well. Negative 1802 // boost values would correspond to anti-biasing. Anti-biasing is not 1803 // enabled, so negative boost values will return an error. Boost values must 1804 // be between 0 and 20. Any values outside that range will return an error. 1805 // We recommend using a binary search approach to finding the optimal value 1806 // for your use case as well as adding phrases both with and without boost 1807 // to your requests. 1808 float boost = 2; 1809 } 1810 1811 // Set of states that define the lifecycle of a PhraseSet. 1812 enum State { 1813 // Unspecified state. This is only used/useful for distinguishing 1814 // unset values. 1815 STATE_UNSPECIFIED = 0; 1816 1817 // The normal and active state. 1818 ACTIVE = 2; 1819 1820 // This PhraseSet has been deleted. 1821 DELETED = 4; 1822 } 1823 1824 // Output only. Identifier. The resource name of the PhraseSet. 1825 // Format: `projects/{project}/locations/{location}/phraseSets/{phrase_set}`. 1826 string name = 1 [ 1827 (google.api.field_behavior) = OUTPUT_ONLY, 1828 (google.api.field_behavior) = IDENTIFIER 1829 ]; 1830 1831 // Output only. System-assigned unique identifier for the PhraseSet. 1832 string uid = 2 [(google.api.field_behavior) = OUTPUT_ONLY]; 1833 1834 // A list of word and phrases. 1835 repeated Phrase phrases = 3; 1836 1837 // Hint Boost. Positive value will increase the probability that a specific 1838 // phrase will be recognized over other similar sounding phrases. The higher 1839 // the boost, the higher the chance of false positive recognition as well. 1840 // Valid `boost` values are between 0 (exclusive) and 20. We recommend using a 1841 // binary search approach to finding the optimal value for your use case as 1842 // well as adding phrases both with and without boost to your requests. 1843 float boost = 4; 1844 1845 // User-settable, human-readable name for the PhraseSet. Must be 63 1846 // characters or less. 1847 string display_name = 5; 1848 1849 // Output only. The PhraseSet lifecycle state. 1850 State state = 15 [(google.api.field_behavior) = OUTPUT_ONLY]; 1851 1852 // Output only. Creation time. 1853 google.protobuf.Timestamp create_time = 6 1854 [(google.api.field_behavior) = OUTPUT_ONLY]; 1855 1856 // Output only. The most recent time this resource was modified. 1857 google.protobuf.Timestamp update_time = 7 1858 [(google.api.field_behavior) = OUTPUT_ONLY]; 1859 1860 // Output only. The time at which this resource was requested for deletion. 1861 google.protobuf.Timestamp delete_time = 8 1862 [(google.api.field_behavior) = OUTPUT_ONLY]; 1863 1864 // Output only. The time at which this resource will be purged. 1865 google.protobuf.Timestamp expire_time = 9 1866 [(google.api.field_behavior) = OUTPUT_ONLY]; 1867 1868 // Allows users to store small amounts of arbitrary data. 1869 // Both the key and the value must be 63 characters or less each. 1870 // At most 100 annotations. 1871 map<string, string> annotations = 10; 1872 1873 // Output only. This checksum is computed by the server based on the value of 1874 // other fields. This may be sent on update, undelete, and delete requests to 1875 // ensure the client has an up-to-date value before proceeding. 1876 string etag = 11 [(google.api.field_behavior) = OUTPUT_ONLY]; 1877 1878 // Output only. Whether or not this PhraseSet is in the process of being 1879 // updated. 1880 bool reconciling = 12 [(google.api.field_behavior) = OUTPUT_ONLY]; 1881 1882 // Output only. The [KMS key 1883 // name](https://cloud.google.com/kms/docs/resource-hierarchy#keys) with which 1884 // the PhraseSet is encrypted. The expected format is 1885 // `projects/{project}/locations/{location}/keyRings/{key_ring}/cryptoKeys/{crypto_key}`. 1886 string kms_key_name = 13 [ 1887 (google.api.field_behavior) = OUTPUT_ONLY, 1888 (google.api.resource_reference) = { 1889 type: "cloudkms.googleapis.com/CryptoKey" 1890 } 1891 ]; 1892 1893 // Output only. The [KMS key version 1894 // name](https://cloud.google.com/kms/docs/resource-hierarchy#key_versions) 1895 // with which the PhraseSet is encrypted. The expected format is 1896 // `projects/{project}/locations/{location}/keyRings/{key_ring}/cryptoKeys/{crypto_key}/cryptoKeyVersions/{crypto_key_version}`. 1897 string kms_key_version_name = 14 [ 1898 (google.api.field_behavior) = OUTPUT_ONLY, 1899 (google.api.resource_reference) = { 1900 type: "cloudkms.googleapis.com/CryptoKeyVersion" 1901 } 1902 ]; 1903} 1904 1905// Request message for the 1906// [CreateCustomClass][google.cloud.speech.v2.Speech.CreateCustomClass] method. 1907message CreateCustomClassRequest { 1908 // Required. The CustomClass to create. 1909 CustomClass custom_class = 1 [(google.api.field_behavior) = REQUIRED]; 1910 1911 // If set, validate the request and preview the CustomClass, but do not 1912 // actually create it. 1913 bool validate_only = 2; 1914 1915 // The ID to use for the CustomClass, which will become the final component of 1916 // the CustomClass's resource name. 1917 // 1918 // This value should be 4-63 characters, and valid characters 1919 // are /[a-z][0-9]-/. 1920 string custom_class_id = 3; 1921 1922 // Required. The project and location where this CustomClass will be created. 1923 // The expected format is `projects/{project}/locations/{location}`. 1924 string parent = 4 [ 1925 (google.api.field_behavior) = REQUIRED, 1926 (google.api.resource_reference) = { 1927 child_type: "speech.googleapis.com/CustomClass" 1928 } 1929 ]; 1930} 1931 1932// Request message for the 1933// [ListCustomClasses][google.cloud.speech.v2.Speech.ListCustomClasses] method. 1934message ListCustomClassesRequest { 1935 // Required. The project and location of CustomClass resources to list. The 1936 // expected format is `projects/{project}/locations/{location}`. 1937 string parent = 1 [ 1938 (google.api.field_behavior) = REQUIRED, 1939 (google.api.resource_reference) = { 1940 type: "locations.googleapis.com/Location" 1941 } 1942 ]; 1943 1944 // Number of results per requests. A valid page_size ranges from 0 to 100 1945 // inclusive. If the page_size is zero or unspecified, a page size of 5 will 1946 // be chosen. If the page size exceeds 100, it will be coerced down to 100. 1947 // Note that a call might return fewer results than the requested page size. 1948 int32 page_size = 2; 1949 1950 // A page token, received from a previous 1951 // [ListCustomClasses][google.cloud.speech.v2.Speech.ListCustomClasses] call. 1952 // Provide this to retrieve the subsequent page. 1953 // 1954 // When paginating, all other parameters provided to 1955 // [ListCustomClasses][google.cloud.speech.v2.Speech.ListCustomClasses] must 1956 // match the call that provided the page token. 1957 string page_token = 3; 1958 1959 // Whether, or not, to show resources that have been deleted. 1960 bool show_deleted = 4; 1961} 1962 1963// Response message for the 1964// [ListCustomClasses][google.cloud.speech.v2.Speech.ListCustomClasses] method. 1965message ListCustomClassesResponse { 1966 // The list of requested CustomClasses. 1967 repeated CustomClass custom_classes = 1; 1968 1969 // A token, which can be sent as 1970 // [page_token][google.cloud.speech.v2.ListCustomClassesRequest.page_token] to 1971 // retrieve the next page. If this field is omitted, there are no subsequent 1972 // pages. This token expires after 72 hours. 1973 string next_page_token = 2; 1974} 1975 1976// Request message for the 1977// [GetCustomClass][google.cloud.speech.v2.Speech.GetCustomClass] method. 1978message GetCustomClassRequest { 1979 // Required. The name of the CustomClass to retrieve. The expected format is 1980 // `projects/{project}/locations/{location}/customClasses/{custom_class}`. 1981 string name = 1 [ 1982 (google.api.field_behavior) = REQUIRED, 1983 (google.api.resource_reference) = { 1984 type: "speech.googleapis.com/CustomClass" 1985 } 1986 ]; 1987} 1988 1989// Request message for the 1990// [UpdateCustomClass][google.cloud.speech.v2.Speech.UpdateCustomClass] method. 1991message UpdateCustomClassRequest { 1992 // Required. The CustomClass to update. 1993 // 1994 // The CustomClass's `name` field is used to identify the CustomClass to 1995 // update. Format: 1996 // `projects/{project}/locations/{location}/customClasses/{custom_class}`. 1997 CustomClass custom_class = 1 [(google.api.field_behavior) = REQUIRED]; 1998 1999 // The list of fields to be updated. If empty, all fields are considered for 2000 // update. 2001 google.protobuf.FieldMask update_mask = 2; 2002 2003 // If set, validate the request and preview the updated CustomClass, but do 2004 // not actually update it. 2005 bool validate_only = 4; 2006} 2007 2008// Request message for the 2009// [DeleteCustomClass][google.cloud.speech.v2.Speech.DeleteCustomClass] method. 2010message DeleteCustomClassRequest { 2011 // Required. The name of the CustomClass to delete. 2012 // Format: 2013 // `projects/{project}/locations/{location}/customClasses/{custom_class}` 2014 string name = 1 [ 2015 (google.api.field_behavior) = REQUIRED, 2016 (google.api.resource_reference) = { 2017 type: "speech.googleapis.com/CustomClass" 2018 } 2019 ]; 2020 2021 // If set, validate the request and preview the deleted CustomClass, but do 2022 // not actually delete it. 2023 bool validate_only = 2; 2024 2025 // If set to true, and the CustomClass is not found, the request will succeed 2026 // and be a no-op (no Operation is recorded in this case). 2027 bool allow_missing = 4; 2028 2029 // This checksum is computed by the server based on the value of other 2030 // fields. This may be sent on update, undelete, and delete requests to ensure 2031 // the client has an up-to-date value before proceeding. 2032 string etag = 3; 2033} 2034 2035// Request message for the 2036// [UndeleteCustomClass][google.cloud.speech.v2.Speech.UndeleteCustomClass] 2037// method. 2038message UndeleteCustomClassRequest { 2039 // Required. The name of the CustomClass to undelete. 2040 // Format: 2041 // `projects/{project}/locations/{location}/customClasses/{custom_class}` 2042 string name = 1 [ 2043 (google.api.field_behavior) = REQUIRED, 2044 (google.api.resource_reference) = { 2045 type: "speech.googleapis.com/CustomClass" 2046 } 2047 ]; 2048 2049 // If set, validate the request and preview the undeleted CustomClass, but do 2050 // not actually undelete it. 2051 bool validate_only = 3; 2052 2053 // This checksum is computed by the server based on the value of other 2054 // fields. This may be sent on update, undelete, and delete requests to ensure 2055 // the client has an up-to-date value before proceeding. 2056 string etag = 4; 2057} 2058 2059// Request message for the 2060// [CreatePhraseSet][google.cloud.speech.v2.Speech.CreatePhraseSet] method. 2061message CreatePhraseSetRequest { 2062 // Required. The PhraseSet to create. 2063 PhraseSet phrase_set = 1 [(google.api.field_behavior) = REQUIRED]; 2064 2065 // If set, validate the request and preview the PhraseSet, but do not 2066 // actually create it. 2067 bool validate_only = 2; 2068 2069 // The ID to use for the PhraseSet, which will become the final component of 2070 // the PhraseSet's resource name. 2071 // 2072 // This value should be 4-63 characters, and valid characters 2073 // are /[a-z][0-9]-/. 2074 string phrase_set_id = 3; 2075 2076 // Required. The project and location where this PhraseSet will be created. 2077 // The expected format is `projects/{project}/locations/{location}`. 2078 string parent = 4 [ 2079 (google.api.field_behavior) = REQUIRED, 2080 (google.api.resource_reference) = { 2081 child_type: "speech.googleapis.com/PhraseSet" 2082 } 2083 ]; 2084} 2085 2086// Request message for the 2087// [ListPhraseSets][google.cloud.speech.v2.Speech.ListPhraseSets] method. 2088message ListPhraseSetsRequest { 2089 // Required. The project and location of PhraseSet resources to list. The 2090 // expected format is `projects/{project}/locations/{location}`. 2091 string parent = 1 [ 2092 (google.api.field_behavior) = REQUIRED, 2093 (google.api.resource_reference) = { 2094 type: "locations.googleapis.com/Location" 2095 } 2096 ]; 2097 2098 // The maximum number of PhraseSets to return. The service may return fewer 2099 // than this value. If unspecified, at most 5 PhraseSets will be returned. 2100 // The maximum value is 100; values above 100 will be coerced to 100. 2101 int32 page_size = 2; 2102 2103 // A page token, received from a previous 2104 // [ListPhraseSets][google.cloud.speech.v2.Speech.ListPhraseSets] call. 2105 // Provide this to retrieve the subsequent page. 2106 // 2107 // When paginating, all other parameters provided to 2108 // [ListPhraseSets][google.cloud.speech.v2.Speech.ListPhraseSets] must match 2109 // the call that provided the page token. 2110 string page_token = 3; 2111 2112 // Whether, or not, to show resources that have been deleted. 2113 bool show_deleted = 4; 2114} 2115 2116// Response message for the 2117// [ListPhraseSets][google.cloud.speech.v2.Speech.ListPhraseSets] method. 2118message ListPhraseSetsResponse { 2119 // The list of requested PhraseSets. 2120 repeated PhraseSet phrase_sets = 1; 2121 2122 // A token, which can be sent as 2123 // [page_token][google.cloud.speech.v2.ListPhraseSetsRequest.page_token] to 2124 // retrieve the next page. If this field is omitted, there are no subsequent 2125 // pages. This token expires after 72 hours. 2126 string next_page_token = 2; 2127} 2128 2129// Request message for the 2130// [GetPhraseSet][google.cloud.speech.v2.Speech.GetPhraseSet] method. 2131message GetPhraseSetRequest { 2132 // Required. The name of the PhraseSet to retrieve. The expected format is 2133 // `projects/{project}/locations/{location}/phraseSets/{phrase_set}`. 2134 string name = 1 [ 2135 (google.api.field_behavior) = REQUIRED, 2136 (google.api.resource_reference) = { 2137 type: "speech.googleapis.com/PhraseSet" 2138 } 2139 ]; 2140} 2141 2142// Request message for the 2143// [UpdatePhraseSet][google.cloud.speech.v2.Speech.UpdatePhraseSet] method. 2144message UpdatePhraseSetRequest { 2145 // Required. The PhraseSet to update. 2146 // 2147 // The PhraseSet's `name` field is used to identify the PhraseSet to update. 2148 // Format: `projects/{project}/locations/{location}/phraseSets/{phrase_set}`. 2149 PhraseSet phrase_set = 1 [(google.api.field_behavior) = REQUIRED]; 2150 2151 // The list of fields to update. If empty, all non-default valued fields are 2152 // considered for update. Use `*` to update the entire PhraseSet resource. 2153 google.protobuf.FieldMask update_mask = 2; 2154 2155 // If set, validate the request and preview the updated PhraseSet, but do not 2156 // actually update it. 2157 bool validate_only = 4; 2158} 2159 2160// Request message for the 2161// [DeletePhraseSet][google.cloud.speech.v2.Speech.DeletePhraseSet] method. 2162message DeletePhraseSetRequest { 2163 // Required. The name of the PhraseSet to delete. 2164 // Format: `projects/{project}/locations/{location}/phraseSets/{phrase_set}` 2165 string name = 1 [ 2166 (google.api.field_behavior) = REQUIRED, 2167 (google.api.resource_reference) = { 2168 type: "speech.googleapis.com/PhraseSet" 2169 } 2170 ]; 2171 2172 // If set, validate the request and preview the deleted PhraseSet, but do not 2173 // actually delete it. 2174 bool validate_only = 2; 2175 2176 // If set to true, and the PhraseSet is not found, the request will succeed 2177 // and be a no-op (no Operation is recorded in this case). 2178 bool allow_missing = 4; 2179 2180 // This checksum is computed by the server based on the value of other 2181 // fields. This may be sent on update, undelete, and delete requests to ensure 2182 // the client has an up-to-date value before proceeding. 2183 string etag = 3; 2184} 2185 2186// Request message for the 2187// [UndeletePhraseSet][google.cloud.speech.v2.Speech.UndeletePhraseSet] 2188// method. 2189message UndeletePhraseSetRequest { 2190 // Required. The name of the PhraseSet to undelete. 2191 // Format: `projects/{project}/locations/{location}/phraseSets/{phrase_set}` 2192 string name = 1 [ 2193 (google.api.field_behavior) = REQUIRED, 2194 (google.api.resource_reference) = { 2195 type: "speech.googleapis.com/PhraseSet" 2196 } 2197 ]; 2198 2199 // If set, validate the request and preview the undeleted PhraseSet, but do 2200 // not actually undelete it. 2201 bool validate_only = 3; 2202 2203 // This checksum is computed by the server based on the value of other 2204 // fields. This may be sent on update, undelete, and delete requests to ensure 2205 // the client has an up-to-date value before proceeding. 2206 string etag = 4; 2207} 2208