1// Copyright 2023 Google LLC 2// 3// Licensed under the Apache License, Version 2.0 (the "License"); 4// you may not use this file except in compliance with the License. 5// You may obtain a copy of the License at 6// 7// http://www.apache.org/licenses/LICENSE-2.0 8// 9// Unless required by applicable law or agreed to in writing, software 10// distributed under the License is distributed on an "AS IS" BASIS, 11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12// See the License for the specific language governing permissions and 13// limitations under the License. 14 15syntax = "proto3"; 16 17package google.cloud.dialogflow.v2; 18 19import "google/api/annotations.proto"; 20import "google/api/client.proto"; 21import "google/api/field_behavior.proto"; 22import "google/api/resource.proto"; 23import "google/cloud/dialogflow/v2/audio_config.proto"; 24import "google/cloud/dialogflow/v2/context.proto"; 25import "google/cloud/dialogflow/v2/intent.proto"; 26import "google/cloud/dialogflow/v2/session_entity_type.proto"; 27import "google/protobuf/duration.proto"; 28import "google/protobuf/field_mask.proto"; 29import "google/protobuf/struct.proto"; 30import "google/rpc/status.proto"; 31import "google/type/latlng.proto"; 32 33option cc_enable_arenas = true; 34option csharp_namespace = "Google.Cloud.Dialogflow.V2"; 35option go_package = "cloud.google.com/go/dialogflow/apiv2/dialogflowpb;dialogflowpb"; 36option java_multiple_files = true; 37option java_outer_classname = "SessionProto"; 38option java_package = "com.google.cloud.dialogflow.v2"; 39option objc_class_prefix = "DF"; 40option (google.api.resource_definition) = { 41 type: "dialogflow.googleapis.com/Session" 42 pattern: "projects/{project}/agent/sessions/{session}" 43 pattern: "projects/{project}/agent/environments/{environment}/users/{user}/sessions/{session}" 44 pattern: "projects/{project}/locations/{location}/agent/sessions/{session}" 45 pattern: "projects/{project}/locations/{location}/agent/environments/{environment}/users/{user}/sessions/{session}" 46}; 47 48// A service used for session interactions. 49// 50// For more information, see the [API interactions 51// guide](https://cloud.google.com/dialogflow/docs/api-overview). 52service Sessions { 53 option (google.api.default_host) = "dialogflow.googleapis.com"; 54 option (google.api.oauth_scopes) = 55 "https://www.googleapis.com/auth/cloud-platform," 56 "https://www.googleapis.com/auth/dialogflow"; 57 58 // Processes a natural language query and returns structured, actionable data 59 // as a result. This method is not idempotent, because it may cause contexts 60 // and session entity types to be updated, which in turn might affect 61 // results of future queries. 62 // 63 // If you might use 64 // [Agent Assist](https://cloud.google.com/dialogflow/docs/#aa) 65 // or other CCAI products now or in the future, consider using 66 // [AnalyzeContent][google.cloud.dialogflow.v2.Participants.AnalyzeContent] 67 // instead of `DetectIntent`. `AnalyzeContent` has additional 68 // functionality for Agent Assist and other CCAI products. 69 // 70 // Note: Always use agent versions for production traffic. 71 // See [Versions and 72 // environments](https://cloud.google.com/dialogflow/es/docs/agents-versions). 73 rpc DetectIntent(DetectIntentRequest) returns (DetectIntentResponse) { 74 option (google.api.http) = { 75 post: "/v2/{session=projects/*/agent/sessions/*}:detectIntent" 76 body: "*" 77 additional_bindings { 78 post: "/v2/{session=projects/*/agent/environments/*/users/*/sessions/*}:detectIntent" 79 body: "*" 80 } 81 additional_bindings { 82 post: "/v2/{session=projects/*/locations/*/agent/sessions/*}:detectIntent" 83 body: "*" 84 } 85 additional_bindings { 86 post: "/v2/{session=projects/*/locations/*/agent/environments/*/users/*/sessions/*}:detectIntent" 87 body: "*" 88 } 89 }; 90 option (google.api.method_signature) = "session,query_input"; 91 } 92 93 // Processes a natural language query in audio format in a streaming fashion 94 // and returns structured, actionable data as a result. This method is only 95 // available via the gRPC API (not REST). 96 // 97 // If you might use 98 // [Agent Assist](https://cloud.google.com/dialogflow/docs/#aa) 99 // or other CCAI products now or in the future, consider using 100 // [StreamingAnalyzeContent][google.cloud.dialogflow.v2.Participants.StreamingAnalyzeContent] 101 // instead of `StreamingDetectIntent`. `StreamingAnalyzeContent` has 102 // additional functionality for Agent Assist and other CCAI products. 103 // 104 // Note: Always use agent versions for production traffic. 105 // See [Versions and 106 // environments](https://cloud.google.com/dialogflow/es/docs/agents-versions). 107 rpc StreamingDetectIntent(stream StreamingDetectIntentRequest) 108 returns (stream StreamingDetectIntentResponse) {} 109} 110 111// The request to detect user's intent. 112message DetectIntentRequest { 113 // Required. The name of the session this query is sent to. Format: 114 // `projects/<Project ID>/agent/sessions/<Session ID>`, or 115 // `projects/<Project ID>/agent/environments/<Environment ID>/users/<User 116 // ID>/sessions/<Session ID>`. If `Environment ID` is not specified, we assume 117 // default 'draft' environment (`Environment ID` might be referred to as 118 // environment name at some places). If `User ID` is not specified, we are 119 // using "-". It's up to the API caller to choose an appropriate `Session ID` 120 // and `User Id`. They can be a random number or some type of user and session 121 // identifiers (preferably hashed). The length of the `Session ID` and 122 // `User ID` must not exceed 36 characters. 123 // 124 // For more information, see the [API interactions 125 // guide](https://cloud.google.com/dialogflow/docs/api-overview). 126 // 127 // Note: Always use agent versions for production traffic. 128 // See [Versions and 129 // environments](https://cloud.google.com/dialogflow/es/docs/agents-versions). 130 string session = 1 [ 131 (google.api.field_behavior) = REQUIRED, 132 (google.api.resource_reference) = { 133 type: "dialogflow.googleapis.com/Session" 134 } 135 ]; 136 137 // The parameters of this query. 138 QueryParameters query_params = 2; 139 140 // Required. The input specification. It can be set to: 141 // 142 // 1. an audio config 143 // which instructs the speech recognizer how to process the speech audio, 144 // 145 // 2. a conversational query in the form of text, or 146 // 147 // 3. an event that specifies which intent to trigger. 148 QueryInput query_input = 3 [(google.api.field_behavior) = REQUIRED]; 149 150 // Instructs the speech synthesizer how to generate the output 151 // audio. If this field is not set and agent-level speech synthesizer is not 152 // configured, no output audio is generated. 153 OutputAudioConfig output_audio_config = 4; 154 155 // Mask for 156 // [output_audio_config][google.cloud.dialogflow.v2.DetectIntentRequest.output_audio_config] 157 // indicating which settings in this request-level config should override 158 // speech synthesizer settings defined at agent-level. 159 // 160 // If unspecified or empty, 161 // [output_audio_config][google.cloud.dialogflow.v2.DetectIntentRequest.output_audio_config] 162 // replaces the agent-level config in its entirety. 163 google.protobuf.FieldMask output_audio_config_mask = 7; 164 165 // The natural language speech audio to be processed. This field 166 // should be populated iff `query_input` is set to an input audio config. 167 // A single request can contain up to 1 minute of speech audio data. 168 bytes input_audio = 5; 169} 170 171// The message returned from the DetectIntent method. 172message DetectIntentResponse { 173 // The unique identifier of the response. It can be used to 174 // locate a response in the training example set or for reporting issues. 175 string response_id = 1; 176 177 // The selected results of the conversational query or event processing. 178 // See `alternative_query_results` for additional potential results. 179 QueryResult query_result = 2; 180 181 // Specifies the status of the webhook request. 182 google.rpc.Status webhook_status = 3; 183 184 // The audio data bytes encoded as specified in the request. 185 // Note: The output audio is generated based on the values of default platform 186 // text responses found in the `query_result.fulfillment_messages` field. If 187 // multiple default text responses exist, they will be concatenated when 188 // generating audio. If no default platform text responses exist, the 189 // generated audio content will be empty. 190 // 191 // In some scenarios, multiple output audio fields may be present in the 192 // response structure. In these cases, only the top-most-level audio output 193 // has content. 194 bytes output_audio = 4; 195 196 // The config used by the speech synthesizer to generate the output audio. 197 OutputAudioConfig output_audio_config = 6; 198} 199 200// Represents the parameters of the conversational query. 201message QueryParameters { 202 // The time zone of this conversational query from the 203 // [time zone database](https://www.iana.org/time-zones), e.g., 204 // America/New_York, Europe/Paris. If not provided, the time zone specified in 205 // agent settings is used. 206 string time_zone = 1; 207 208 // The geo location of this conversational query. 209 google.type.LatLng geo_location = 2; 210 211 // The collection of contexts to be activated before this query is 212 // executed. 213 repeated Context contexts = 3; 214 215 // Specifies whether to delete all contexts in the current session 216 // before the new ones are activated. 217 bool reset_contexts = 4; 218 219 // Additional session entity types to replace or extend developer 220 // entity types with. The entity synonyms apply to all languages and persist 221 // for the session of this query. 222 repeated SessionEntityType session_entity_types = 5; 223 224 // This field can be used to pass custom data to your webhook. 225 // Arbitrary JSON objects are supported. 226 // If supplied, the value is used to populate the 227 // `WebhookRequest.original_detect_intent_request.payload` 228 // field sent to your webhook. 229 google.protobuf.Struct payload = 6; 230 231 // Configures the type of sentiment analysis to perform. If not 232 // provided, sentiment analysis is not performed. 233 SentimentAnalysisRequestConfig sentiment_analysis_request_config = 10; 234 235 // This field can be used to pass HTTP headers for a webhook 236 // call. These headers will be sent to webhook along with the headers that 237 // have been configured through the Dialogflow web console. The headers 238 // defined within this field will overwrite the headers configured through the 239 // Dialogflow console if there is a conflict. Header names are 240 // case-insensitive. Google's specified headers are not allowed. Including: 241 // "Host", "Content-Length", "Connection", "From", "User-Agent", 242 // "Accept-Encoding", "If-Modified-Since", "If-None-Match", "X-Forwarded-For", 243 // etc. 244 map<string, string> webhook_headers = 14; 245} 246 247// Represents the query input. It can contain either: 248// 249// 1. An audio config which 250// instructs the speech recognizer how to process the speech audio. 251// 252// 2. A conversational query in the form of text,. 253// 254// 3. An event that specifies which intent to trigger. 255message QueryInput { 256 // Required. The input specification. 257 oneof input { 258 // Instructs the speech recognizer how to process the speech audio. 259 InputAudioConfig audio_config = 1; 260 261 // The natural language text to be processed. Text length must not exceed 262 // 256 character for virtual agent interactions. 263 TextInput text = 2; 264 265 // The event to be processed. 266 EventInput event = 3; 267 } 268} 269 270// Represents the result of conversational query or event processing. 271message QueryResult { 272 // The original conversational query text: 273 // 274 // - If natural language text was provided as input, `query_text` contains 275 // a copy of the input. 276 // - If natural language speech audio was provided as input, `query_text` 277 // contains the speech recognition result. If speech recognizer produced 278 // multiple alternatives, a particular one is picked. 279 // - If automatic spell correction is enabled, `query_text` will contain the 280 // corrected user input. 281 string query_text = 1; 282 283 // The language that was triggered during intent detection. 284 // See [Language 285 // Support](https://cloud.google.com/dialogflow/docs/reference/language) 286 // for a list of the currently supported language codes. 287 string language_code = 15; 288 289 // The Speech recognition confidence between 0.0 and 1.0. A higher number 290 // indicates an estimated greater likelihood that the recognized words are 291 // correct. The default of 0.0 is a sentinel value indicating that confidence 292 // was not set. 293 // 294 // This field is not guaranteed to be accurate or set. In particular this 295 // field isn't set for StreamingDetectIntent since the streaming endpoint has 296 // separate confidence estimates per portion of the audio in 297 // StreamingRecognitionResult. 298 float speech_recognition_confidence = 2; 299 300 // The action name from the matched intent. 301 string action = 3; 302 303 // The collection of extracted parameters. 304 // 305 // Depending on your protocol or client library language, this is a 306 // map, associative array, symbol table, dictionary, or JSON object 307 // composed of a collection of (MapKey, MapValue) pairs: 308 // 309 // - MapKey type: string 310 // - MapKey value: parameter name 311 // - MapValue type: 312 // - If parameter's entity type is a composite entity: map 313 // - Else: depending on parameter value type, could be one of string, 314 // number, boolean, null, list or map 315 // - MapValue value: 316 // - If parameter's entity type is a composite entity: 317 // map from composite entity property names to property values 318 // - Else: parameter value 319 google.protobuf.Struct parameters = 4; 320 321 // This field is set to: 322 // 323 // - `false` if the matched intent has required parameters and not all of 324 // the required parameter values have been collected. 325 // - `true` if all required parameter values have been collected, or if the 326 // matched intent doesn't contain any required parameters. 327 bool all_required_params_present = 5; 328 329 // Indicates whether the conversational query triggers a cancellation for slot 330 // filling. For more information, see the [cancel slot filling 331 // documentation](https://cloud.google.com/dialogflow/es/docs/intents-actions-parameters#cancel). 332 bool cancels_slot_filling = 21; 333 334 // The text to be pronounced to the user or shown on the screen. 335 // Note: This is a legacy field, `fulfillment_messages` should be preferred. 336 string fulfillment_text = 6; 337 338 // The collection of rich messages to present to the user. 339 repeated Intent.Message fulfillment_messages = 7; 340 341 // If the query was fulfilled by a webhook call, this field is set to the 342 // value of the `source` field returned in the webhook response. 343 string webhook_source = 8; 344 345 // If the query was fulfilled by a webhook call, this field is set to the 346 // value of the `payload` field returned in the webhook response. 347 google.protobuf.Struct webhook_payload = 9; 348 349 // The collection of output contexts. If applicable, 350 // `output_contexts.parameters` contains entries with name 351 // `<parameter name>.original` containing the original parameter values 352 // before the query. 353 repeated Context output_contexts = 10; 354 355 // The intent that matched the conversational query. Some, not 356 // all fields are filled in this message, including but not limited to: 357 // `name`, `display_name`, `end_interaction` and `is_fallback`. 358 Intent intent = 11; 359 360 // The intent detection confidence. Values range from 0.0 361 // (completely uncertain) to 1.0 (completely certain). 362 // This value is for informational purpose only and is only used to 363 // help match the best intent within the classification threshold. 364 // This value may change for the same end-user expression at any time due to a 365 // model retraining or change in implementation. 366 // If there are `multiple knowledge_answers` messages, this value is set to 367 // the greatest `knowledgeAnswers.match_confidence` value in the list. 368 float intent_detection_confidence = 12; 369 370 // Free-form diagnostic information for the associated detect intent request. 371 // The fields of this data can change without notice, so you should not write 372 // code that depends on its structure. 373 // The data may contain: 374 // 375 // - webhook call latency 376 // - webhook errors 377 google.protobuf.Struct diagnostic_info = 14; 378 379 // The sentiment analysis result, which depends on the 380 // `sentiment_analysis_request_config` specified in the request. 381 SentimentAnalysisResult sentiment_analysis_result = 17; 382} 383 384// The top-level message sent by the client to the 385// [Sessions.StreamingDetectIntent][google.cloud.dialogflow.v2.Sessions.StreamingDetectIntent] 386// method. 387// 388// Multiple request messages should be sent in order: 389// 390// 1. The first message must contain 391// [session][google.cloud.dialogflow.v2.StreamingDetectIntentRequest.session], 392// [query_input][google.cloud.dialogflow.v2.StreamingDetectIntentRequest.query_input] 393// plus optionally 394// [query_params][google.cloud.dialogflow.v2.StreamingDetectIntentRequest.query_params]. 395// If the client wants to receive an audio response, it should also contain 396// [output_audio_config][google.cloud.dialogflow.v2.StreamingDetectIntentRequest.output_audio_config]. 397// The message must not contain 398// [input_audio][google.cloud.dialogflow.v2.StreamingDetectIntentRequest.input_audio]. 399// 2. If 400// [query_input][google.cloud.dialogflow.v2.StreamingDetectIntentRequest.query_input] 401// was set to 402// [query_input.audio_config][google.cloud.dialogflow.v2.InputAudioConfig], 403// all subsequent messages must contain 404// [input_audio][google.cloud.dialogflow.v2.StreamingDetectIntentRequest.input_audio] 405// to continue with Speech recognition. If you decide to rather detect an 406// intent from text input after you already started Speech recognition, 407// please send a message with 408// [query_input.text][google.cloud.dialogflow.v2.QueryInput.text]. 409// 410// However, note that: 411// 412// * Dialogflow will bill you for the audio duration so far. 413// * Dialogflow discards all Speech recognition results in favor of the 414// input text. 415// * Dialogflow will use the language code from the first message. 416// 417// After you sent all input, you must half-close or abort the request stream. 418message StreamingDetectIntentRequest { 419 // Required. The name of the session the query is sent to. 420 // Format of the session name: 421 // `projects/<Project ID>/agent/sessions/<Session ID>`, or 422 // `projects/<Project ID>/agent/environments/<Environment ID>/users/<User 423 // ID>/sessions/<Session ID>`. If `Environment ID` is not specified, we assume 424 // default 'draft' environment. If `User ID` is not specified, we are using 425 // "-". It's up to the API caller to choose an appropriate `Session ID` and 426 // `User Id`. They can be a random number or some type of user and session 427 // identifiers (preferably hashed). The length of the `Session ID` and 428 // `User ID` must not exceed 36 characters. 429 // 430 // For more information, see the [API interactions 431 // guide](https://cloud.google.com/dialogflow/docs/api-overview). 432 // 433 // Note: Always use agent versions for production traffic. 434 // See [Versions and 435 // environments](https://cloud.google.com/dialogflow/es/docs/agents-versions). 436 string session = 1 [ 437 (google.api.field_behavior) = REQUIRED, 438 (google.api.resource_reference) = { 439 type: "dialogflow.googleapis.com/Session" 440 } 441 ]; 442 443 // The parameters of this query. 444 QueryParameters query_params = 2; 445 446 // Required. The input specification. It can be set to: 447 // 448 // 1. an audio config which instructs the speech recognizer how to process 449 // the speech audio, 450 // 451 // 2. a conversational query in the form of text, or 452 // 453 // 3. an event that specifies which intent to trigger. 454 QueryInput query_input = 3 [(google.api.field_behavior) = REQUIRED]; 455 456 // Please use 457 // [InputAudioConfig.single_utterance][google.cloud.dialogflow.v2.InputAudioConfig.single_utterance] 458 // instead. If `false` (default), recognition does not cease until the client 459 // closes the stream. If `true`, the recognizer will detect a single spoken 460 // utterance in input audio. Recognition ceases when it detects the audio's 461 // voice has stopped or paused. In this case, once a detected intent is 462 // received, the client should close the stream and start a new request with a 463 // new stream as needed. This setting is ignored when `query_input` is a piece 464 // of text or an event. 465 bool single_utterance = 4 [deprecated = true]; 466 467 // Instructs the speech synthesizer how to generate the output 468 // audio. If this field is not set and agent-level speech synthesizer is not 469 // configured, no output audio is generated. 470 OutputAudioConfig output_audio_config = 5; 471 472 // Mask for 473 // [output_audio_config][google.cloud.dialogflow.v2.StreamingDetectIntentRequest.output_audio_config] 474 // indicating which settings in this request-level config should override 475 // speech synthesizer settings defined at agent-level. 476 // 477 // If unspecified or empty, 478 // [output_audio_config][google.cloud.dialogflow.v2.StreamingDetectIntentRequest.output_audio_config] 479 // replaces the agent-level config in its entirety. 480 google.protobuf.FieldMask output_audio_config_mask = 7; 481 482 // The input audio content to be recognized. Must be sent if 483 // `query_input` was set to a streaming input audio config. The complete audio 484 // over all streaming messages must not exceed 1 minute. 485 bytes input_audio = 6; 486 487 // if true, `StreamingDetectIntentResponse.debugging_info` will get populated. 488 bool enable_debugging_info = 8; 489} 490 491// Cloud conversation info for easier debugging. 492// It will get populated in `StreamingDetectIntentResponse` or 493// `StreamingAnalyzeContentResponse` when the flag `enable_debugging_info` is 494// set to true in corresponding requests. 495message CloudConversationDebuggingInfo { 496 // Number of input audio data chunks in streaming requests. 497 int32 audio_data_chunks = 1; 498 499 // Time offset of the end of speech utterance relative to the 500 // beginning of the first audio chunk. 501 google.protobuf.Duration result_end_time_offset = 2; 502 503 // Duration of first audio chunk. 504 google.protobuf.Duration first_audio_duration = 3; 505 506 // Whether client used single utterance mode. 507 bool single_utterance = 5; 508 509 // Time offsets of the speech partial results relative to the beginning of 510 // the stream. 511 repeated google.protobuf.Duration speech_partial_results_end_times = 6; 512 513 // Time offsets of the speech final results (is_final=true) relative to the 514 // beginning of the stream. 515 repeated google.protobuf.Duration speech_final_results_end_times = 7; 516 517 // Total number of partial responses. 518 int32 partial_responses = 8; 519 520 // Time offset of Speaker ID stream close time relative to the Speech stream 521 // close time in milliseconds. Only meaningful for conversations involving 522 // passive verification. 523 int32 speaker_id_passive_latency_ms_offset = 9; 524 525 // Whether a barge-in event is triggered in this request. 526 bool bargein_event_triggered = 10; 527 528 // Whether speech uses single utterance mode. 529 bool speech_single_utterance = 11; 530 531 // Time offsets of the DTMF partial results relative to the beginning of 532 // the stream. 533 repeated google.protobuf.Duration dtmf_partial_results_times = 12; 534 535 // Time offsets of the DTMF final results relative to the beginning of 536 // the stream. 537 repeated google.protobuf.Duration dtmf_final_results_times = 13; 538 539 // Time offset of the end-of-single-utterance signal relative to the 540 // beginning of the stream. 541 google.protobuf.Duration single_utterance_end_time_offset = 14; 542 543 // No speech timeout settings observed at runtime. 544 google.protobuf.Duration no_speech_timeout = 15; 545 546 // Whether the streaming terminates with an injected text query. 547 bool is_input_text = 16; 548 549 // Client half close time in terms of input audio duration. 550 google.protobuf.Duration client_half_close_time_offset = 17; 551 552 // Client half close time in terms of API streaming duration. 553 google.protobuf.Duration client_half_close_streaming_time_offset = 18; 554} 555 556// The top-level message returned from the 557// `StreamingDetectIntent` method. 558// 559// Multiple response messages can be returned in order: 560// 561// 1. If the `StreamingDetectIntentRequest.input_audio` field was 562// set, the `recognition_result` field is populated for one 563// or more messages. 564// See the 565// [StreamingRecognitionResult][google.cloud.dialogflow.v2.StreamingRecognitionResult] 566// message for details about the result message sequence. 567// 568// 2. The next message contains `response_id`, `query_result` 569// and optionally `webhook_status` if a WebHook was called. 570message StreamingDetectIntentResponse { 571 // The unique identifier of the response. It can be used to 572 // locate a response in the training example set or for reporting issues. 573 string response_id = 1; 574 575 // The result of speech recognition. 576 StreamingRecognitionResult recognition_result = 2; 577 578 // The result of the conversational query or event processing. 579 QueryResult query_result = 3; 580 581 // Specifies the status of the webhook request. 582 google.rpc.Status webhook_status = 4; 583 584 // The audio data bytes encoded as specified in the request. 585 // Note: The output audio is generated based on the values of default platform 586 // text responses found in the `query_result.fulfillment_messages` field. If 587 // multiple default text responses exist, they will be concatenated when 588 // generating audio. If no default platform text responses exist, the 589 // generated audio content will be empty. 590 // 591 // In some scenarios, multiple output audio fields may be present in the 592 // response structure. In these cases, only the top-most-level audio output 593 // has content. 594 bytes output_audio = 5; 595 596 // The config used by the speech synthesizer to generate the output audio. 597 OutputAudioConfig output_audio_config = 6; 598 599 // Debugging info that would get populated when 600 // `StreamingDetectIntentRequest.enable_debugging_info` is set to true. 601 CloudConversationDebuggingInfo debugging_info = 8; 602} 603 604// Contains a speech recognition result corresponding to a portion of the audio 605// that is currently being processed or an indication that this is the end 606// of the single requested utterance. 607// 608// While end-user audio is being processed, Dialogflow sends a series of 609// results. Each result may contain a `transcript` value. A transcript 610// represents a portion of the utterance. While the recognizer is processing 611// audio, transcript values may be interim values or finalized values. 612// Once a transcript is finalized, the `is_final` value is set to true and 613// processing continues for the next transcript. 614// 615// If `StreamingDetectIntentRequest.query_input.audio_config.single_utterance` 616// was true, and the recognizer has completed processing audio, 617// the `message_type` value is set to `END_OF_SINGLE_UTTERANCE and the 618// following (last) result contains the last finalized transcript. 619// 620// The complete end-user utterance is determined by concatenating the 621// finalized transcript values received for the series of results. 622// 623// In the following example, single utterance is enabled. In the case where 624// single utterance is not enabled, result 7 would not occur. 625// 626// ``` 627// Num | transcript | message_type | is_final 628// --- | ----------------------- | ----------------------- | -------- 629// 1 | "tube" | TRANSCRIPT | false 630// 2 | "to be a" | TRANSCRIPT | false 631// 3 | "to be" | TRANSCRIPT | false 632// 4 | "to be or not to be" | TRANSCRIPT | true 633// 5 | "that's" | TRANSCRIPT | false 634// 6 | "that is | TRANSCRIPT | false 635// 7 | unset | END_OF_SINGLE_UTTERANCE | unset 636// 8 | " that is the question" | TRANSCRIPT | true 637// ``` 638// 639// Concatenating the finalized transcripts with `is_final` set to true, 640// the complete utterance becomes "to be or not to be that is the question". 641message StreamingRecognitionResult { 642 // Type of the response message. 643 enum MessageType { 644 // Not specified. Should never be used. 645 MESSAGE_TYPE_UNSPECIFIED = 0; 646 647 // Message contains a (possibly partial) transcript. 648 TRANSCRIPT = 1; 649 650 // Event indicates that the server has detected the end of the user's speech 651 // utterance and expects no additional inputs. 652 // Therefore, the server will not process additional audio (although it may 653 // subsequently return additional results). The client should stop sending 654 // additional audio data, half-close the gRPC connection, and wait for any 655 // additional results until the server closes the gRPC connection. This 656 // message is only sent if `single_utterance` was set to `true`, and is not 657 // used otherwise. 658 END_OF_SINGLE_UTTERANCE = 2; 659 } 660 661 // Type of the result message. 662 MessageType message_type = 1; 663 664 // Transcript text representing the words that the user spoke. 665 // Populated if and only if `message_type` = `TRANSCRIPT`. 666 string transcript = 2; 667 668 // If `false`, the `StreamingRecognitionResult` represents an 669 // interim result that may change. If `true`, the recognizer will not return 670 // any further hypotheses about this piece of the audio. May only be populated 671 // for `message_type` = `TRANSCRIPT`. 672 bool is_final = 3; 673 674 // The Speech confidence between 0.0 and 1.0 for the current portion of audio. 675 // A higher number indicates an estimated greater likelihood that the 676 // recognized words are correct. The default of 0.0 is a sentinel value 677 // indicating that confidence was not set. 678 // 679 // This field is typically only provided if `is_final` is true and you should 680 // not rely on it being accurate or even set. 681 float confidence = 4; 682 683 // Word-specific information for the words recognized by Speech in 684 // [transcript][google.cloud.dialogflow.v2.StreamingRecognitionResult.transcript]. 685 // Populated if and only if `message_type` = `TRANSCRIPT` and 686 // [InputAudioConfig.enable_word_info] is set. 687 repeated SpeechWordInfo speech_word_info = 7; 688 689 // Time offset of the end of this Speech recognition result relative to the 690 // beginning of the audio. Only populated for `message_type` = `TRANSCRIPT`. 691 google.protobuf.Duration speech_end_offset = 8; 692 693 // Detected language code for the transcript. 694 string language_code = 10; 695} 696 697// ============================================================================ 698// Auxiliary proto messages. 699// 700// Represents the natural language text to be processed. 701message TextInput { 702 // Required. The UTF-8 encoded natural language text to be processed. 703 // Text length must not exceed 256 characters for virtual agent interactions. 704 string text = 1 [(google.api.field_behavior) = REQUIRED]; 705 706 // Required. The language of this conversational query. See [Language 707 // Support](https://cloud.google.com/dialogflow/docs/reference/language) 708 // for a list of the currently supported language codes. Note that queries in 709 // the same session do not necessarily need to specify the same language. 710 string language_code = 2 [(google.api.field_behavior) = REQUIRED]; 711} 712 713// Events allow for matching intents by event name instead of the natural 714// language input. For instance, input `<event: { name: "welcome_event", 715// parameters: { name: "Sam" } }>` can trigger a personalized welcome response. 716// The parameter `name` may be used by the agent in the response: 717// `"Hello #welcome_event.name! What can I do for you today?"`. 718message EventInput { 719 // Required. The unique identifier of the event. 720 string name = 1 [(google.api.field_behavior) = REQUIRED]; 721 722 // The collection of parameters associated with the event. 723 // 724 // Depending on your protocol or client library language, this is a 725 // map, associative array, symbol table, dictionary, or JSON object 726 // composed of a collection of (MapKey, MapValue) pairs: 727 // 728 // - MapKey type: string 729 // - MapKey value: parameter name 730 // - MapValue type: 731 // - If parameter's entity type is a composite entity: map 732 // - Else: depending on parameter value type, could be one of string, 733 // number, boolean, null, list or map 734 // - MapValue value: 735 // - If parameter's entity type is a composite entity: 736 // map from composite entity property names to property values 737 // - Else: parameter value 738 google.protobuf.Struct parameters = 2; 739 740 // Required. The language of this query. See [Language 741 // Support](https://cloud.google.com/dialogflow/docs/reference/language) 742 // for a list of the currently supported language codes. Note that queries in 743 // the same session do not necessarily need to specify the same language. 744 // 745 // This field is ignored when used in the context of a 746 // [WebhookResponse.followup_event_input][google.cloud.dialogflow.v2.WebhookResponse.followup_event_input] 747 // field, because the language was already defined in the originating detect 748 // intent request. 749 string language_code = 3 [(google.api.field_behavior) = REQUIRED]; 750} 751 752// Configures the types of sentiment analysis to perform. 753message SentimentAnalysisRequestConfig { 754 // Instructs the service to perform sentiment analysis on 755 // `query_text`. If not provided, sentiment analysis is not performed on 756 // `query_text`. 757 bool analyze_query_text_sentiment = 1; 758} 759 760// The result of sentiment analysis. Sentiment analysis inspects user input 761// and identifies the prevailing subjective opinion, especially to determine a 762// user's attitude as positive, negative, or neutral. 763// For [Participants.DetectIntent][], it needs to be configured in 764// [DetectIntentRequest.query_params][google.cloud.dialogflow.v2.DetectIntentRequest.query_params]. 765// For [Participants.StreamingDetectIntent][], it needs to be configured in 766// [StreamingDetectIntentRequest.query_params][google.cloud.dialogflow.v2.StreamingDetectIntentRequest.query_params]. 767// And for 768// [Participants.AnalyzeContent][google.cloud.dialogflow.v2.Participants.AnalyzeContent] 769// and 770// [Participants.StreamingAnalyzeContent][google.cloud.dialogflow.v2.Participants.StreamingAnalyzeContent], 771// it needs to be configured in 772// [ConversationProfile.human_agent_assistant_config][google.cloud.dialogflow.v2.ConversationProfile.human_agent_assistant_config] 773message SentimentAnalysisResult { 774 // The sentiment analysis result for `query_text`. 775 Sentiment query_text_sentiment = 1; 776} 777 778// The sentiment, such as positive/negative feeling or association, for a unit 779// of analysis, such as the query text. See: 780// https://cloud.google.com/natural-language/docs/basics#interpreting_sentiment_analysis_values 781// for how to interpret the result. 782message Sentiment { 783 // Sentiment score between -1.0 (negative sentiment) and 1.0 (positive 784 // sentiment). 785 float score = 1; 786 787 // A non-negative number in the [0, +inf) range, which represents the absolute 788 // magnitude of sentiment, regardless of score (positive or negative). 789 float magnitude = 2; 790} 791