1// Copyright 2023 Google LLC
2//
3// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at
6//
7//     http://www.apache.org/licenses/LICENSE-2.0
8//
9// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS,
11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12// See the License for the specific language governing permissions and
13// limitations under the License.
14
15syntax = "proto3";
16
17package google.cloud.dialogflow.v2;
18
19import "google/api/annotations.proto";
20import "google/api/client.proto";
21import "google/api/field_behavior.proto";
22import "google/api/resource.proto";
23import "google/cloud/dialogflow/v2/audio_config.proto";
24import "google/cloud/dialogflow/v2/context.proto";
25import "google/cloud/dialogflow/v2/intent.proto";
26import "google/cloud/dialogflow/v2/session_entity_type.proto";
27import "google/protobuf/duration.proto";
28import "google/protobuf/field_mask.proto";
29import "google/protobuf/struct.proto";
30import "google/rpc/status.proto";
31import "google/type/latlng.proto";
32
33option cc_enable_arenas = true;
34option csharp_namespace = "Google.Cloud.Dialogflow.V2";
35option go_package = "cloud.google.com/go/dialogflow/apiv2/dialogflowpb;dialogflowpb";
36option java_multiple_files = true;
37option java_outer_classname = "SessionProto";
38option java_package = "com.google.cloud.dialogflow.v2";
39option objc_class_prefix = "DF";
40option (google.api.resource_definition) = {
41  type: "dialogflow.googleapis.com/Session"
42  pattern: "projects/{project}/agent/sessions/{session}"
43  pattern: "projects/{project}/agent/environments/{environment}/users/{user}/sessions/{session}"
44  pattern: "projects/{project}/locations/{location}/agent/sessions/{session}"
45  pattern: "projects/{project}/locations/{location}/agent/environments/{environment}/users/{user}/sessions/{session}"
46};
47
48// A service used for session interactions.
49//
50// For more information, see the [API interactions
51// guide](https://cloud.google.com/dialogflow/docs/api-overview).
52service Sessions {
53  option (google.api.default_host) = "dialogflow.googleapis.com";
54  option (google.api.oauth_scopes) =
55      "https://www.googleapis.com/auth/cloud-platform,"
56      "https://www.googleapis.com/auth/dialogflow";
57
58  // Processes a natural language query and returns structured, actionable data
59  // as a result. This method is not idempotent, because it may cause contexts
60  // and session entity types to be updated, which in turn might affect
61  // results of future queries.
62  //
63  // If you might use
64  // [Agent Assist](https://cloud.google.com/dialogflow/docs/#aa)
65  // or other CCAI products now or in the future, consider using
66  // [AnalyzeContent][google.cloud.dialogflow.v2.Participants.AnalyzeContent]
67  // instead of `DetectIntent`. `AnalyzeContent` has additional
68  // functionality for Agent Assist and other CCAI products.
69  //
70  // Note: Always use agent versions for production traffic.
71  // See [Versions and
72  // environments](https://cloud.google.com/dialogflow/es/docs/agents-versions).
73  rpc DetectIntent(DetectIntentRequest) returns (DetectIntentResponse) {
74    option (google.api.http) = {
75      post: "/v2/{session=projects/*/agent/sessions/*}:detectIntent"
76      body: "*"
77      additional_bindings {
78        post: "/v2/{session=projects/*/agent/environments/*/users/*/sessions/*}:detectIntent"
79        body: "*"
80      }
81      additional_bindings {
82        post: "/v2/{session=projects/*/locations/*/agent/sessions/*}:detectIntent"
83        body: "*"
84      }
85      additional_bindings {
86        post: "/v2/{session=projects/*/locations/*/agent/environments/*/users/*/sessions/*}:detectIntent"
87        body: "*"
88      }
89    };
90    option (google.api.method_signature) = "session,query_input";
91  }
92
93  // Processes a natural language query in audio format in a streaming fashion
94  // and returns structured, actionable data as a result. This method is only
95  // available via the gRPC API (not REST).
96  //
97  // If you might use
98  // [Agent Assist](https://cloud.google.com/dialogflow/docs/#aa)
99  // or other CCAI products now or in the future, consider using
100  // [StreamingAnalyzeContent][google.cloud.dialogflow.v2.Participants.StreamingAnalyzeContent]
101  // instead of `StreamingDetectIntent`. `StreamingAnalyzeContent` has
102  // additional functionality for Agent Assist and other CCAI products.
103  //
104  // Note: Always use agent versions for production traffic.
105  // See [Versions and
106  // environments](https://cloud.google.com/dialogflow/es/docs/agents-versions).
107  rpc StreamingDetectIntent(stream StreamingDetectIntentRequest)
108      returns (stream StreamingDetectIntentResponse) {}
109}
110
111// The request to detect user's intent.
112message DetectIntentRequest {
113  // Required. The name of the session this query is sent to. Format:
114  // `projects/<Project ID>/agent/sessions/<Session ID>`, or
115  // `projects/<Project ID>/agent/environments/<Environment ID>/users/<User
116  // ID>/sessions/<Session ID>`. If `Environment ID` is not specified, we assume
117  // default 'draft' environment (`Environment ID` might be referred to as
118  // environment name at some places). If `User ID` is not specified, we are
119  // using "-". It's up to the API caller to choose an appropriate `Session ID`
120  // and `User Id`. They can be a random number or some type of user and session
121  // identifiers (preferably hashed). The length of the `Session ID` and
122  // `User ID` must not exceed 36 characters.
123  //
124  // For more information, see the [API interactions
125  // guide](https://cloud.google.com/dialogflow/docs/api-overview).
126  //
127  // Note: Always use agent versions for production traffic.
128  // See [Versions and
129  // environments](https://cloud.google.com/dialogflow/es/docs/agents-versions).
130  string session = 1 [
131    (google.api.field_behavior) = REQUIRED,
132    (google.api.resource_reference) = {
133      type: "dialogflow.googleapis.com/Session"
134    }
135  ];
136
137  // The parameters of this query.
138  QueryParameters query_params = 2;
139
140  // Required. The input specification. It can be set to:
141  //
142  // 1.  an audio config
143  //     which instructs the speech recognizer how to process the speech audio,
144  //
145  // 2.  a conversational query in the form of text, or
146  //
147  // 3.  an event that specifies which intent to trigger.
148  QueryInput query_input = 3 [(google.api.field_behavior) = REQUIRED];
149
150  // Instructs the speech synthesizer how to generate the output
151  // audio. If this field is not set and agent-level speech synthesizer is not
152  // configured, no output audio is generated.
153  OutputAudioConfig output_audio_config = 4;
154
155  // Mask for
156  // [output_audio_config][google.cloud.dialogflow.v2.DetectIntentRequest.output_audio_config]
157  // indicating which settings in this request-level config should override
158  // speech synthesizer settings defined at agent-level.
159  //
160  // If unspecified or empty,
161  // [output_audio_config][google.cloud.dialogflow.v2.DetectIntentRequest.output_audio_config]
162  // replaces the agent-level config in its entirety.
163  google.protobuf.FieldMask output_audio_config_mask = 7;
164
165  // The natural language speech audio to be processed. This field
166  // should be populated iff `query_input` is set to an input audio config.
167  // A single request can contain up to 1 minute of speech audio data.
168  bytes input_audio = 5;
169}
170
171// The message returned from the DetectIntent method.
172message DetectIntentResponse {
173  // The unique identifier of the response. It can be used to
174  // locate a response in the training example set or for reporting issues.
175  string response_id = 1;
176
177  // The selected results of the conversational query or event processing.
178  // See `alternative_query_results` for additional potential results.
179  QueryResult query_result = 2;
180
181  // Specifies the status of the webhook request.
182  google.rpc.Status webhook_status = 3;
183
184  // The audio data bytes encoded as specified in the request.
185  // Note: The output audio is generated based on the values of default platform
186  // text responses found in the `query_result.fulfillment_messages` field. If
187  // multiple default text responses exist, they will be concatenated when
188  // generating audio. If no default platform text responses exist, the
189  // generated audio content will be empty.
190  //
191  // In some scenarios, multiple output audio fields may be present in the
192  // response structure. In these cases, only the top-most-level audio output
193  // has content.
194  bytes output_audio = 4;
195
196  // The config used by the speech synthesizer to generate the output audio.
197  OutputAudioConfig output_audio_config = 6;
198}
199
200// Represents the parameters of the conversational query.
201message QueryParameters {
202  // The time zone of this conversational query from the
203  // [time zone database](https://www.iana.org/time-zones), e.g.,
204  // America/New_York, Europe/Paris. If not provided, the time zone specified in
205  // agent settings is used.
206  string time_zone = 1;
207
208  // The geo location of this conversational query.
209  google.type.LatLng geo_location = 2;
210
211  // The collection of contexts to be activated before this query is
212  // executed.
213  repeated Context contexts = 3;
214
215  // Specifies whether to delete all contexts in the current session
216  // before the new ones are activated.
217  bool reset_contexts = 4;
218
219  // Additional session entity types to replace or extend developer
220  // entity types with. The entity synonyms apply to all languages and persist
221  // for the session of this query.
222  repeated SessionEntityType session_entity_types = 5;
223
224  // This field can be used to pass custom data to your webhook.
225  // Arbitrary JSON objects are supported.
226  // If supplied, the value is used to populate the
227  // `WebhookRequest.original_detect_intent_request.payload`
228  // field sent to your webhook.
229  google.protobuf.Struct payload = 6;
230
231  // Configures the type of sentiment analysis to perform. If not
232  // provided, sentiment analysis is not performed.
233  SentimentAnalysisRequestConfig sentiment_analysis_request_config = 10;
234
235  // This field can be used to pass HTTP headers for a webhook
236  // call. These headers will be sent to webhook along with the headers that
237  // have been configured through the Dialogflow web console. The headers
238  // defined within this field will overwrite the headers configured through the
239  // Dialogflow console if there is a conflict. Header names are
240  // case-insensitive. Google's specified headers are not allowed. Including:
241  // "Host", "Content-Length", "Connection", "From", "User-Agent",
242  // "Accept-Encoding", "If-Modified-Since", "If-None-Match", "X-Forwarded-For",
243  // etc.
244  map<string, string> webhook_headers = 14;
245}
246
247// Represents the query input. It can contain either:
248//
249// 1.  An audio config which
250//     instructs the speech recognizer how to process the speech audio.
251//
252// 2.  A conversational query in the form of text,.
253//
254// 3.  An event that specifies which intent to trigger.
255message QueryInput {
256  // Required. The input specification.
257  oneof input {
258    // Instructs the speech recognizer how to process the speech audio.
259    InputAudioConfig audio_config = 1;
260
261    // The natural language text to be processed. Text length must not exceed
262    // 256 character for virtual agent interactions.
263    TextInput text = 2;
264
265    // The event to be processed.
266    EventInput event = 3;
267  }
268}
269
270// Represents the result of conversational query or event processing.
271message QueryResult {
272  // The original conversational query text:
273  //
274  // - If natural language text was provided as input, `query_text` contains
275  //   a copy of the input.
276  // - If natural language speech audio was provided as input, `query_text`
277  //   contains the speech recognition result. If speech recognizer produced
278  //   multiple alternatives, a particular one is picked.
279  // - If automatic spell correction is enabled, `query_text` will contain the
280  //   corrected user input.
281  string query_text = 1;
282
283  // The language that was triggered during intent detection.
284  // See [Language
285  // Support](https://cloud.google.com/dialogflow/docs/reference/language)
286  // for a list of the currently supported language codes.
287  string language_code = 15;
288
289  // The Speech recognition confidence between 0.0 and 1.0. A higher number
290  // indicates an estimated greater likelihood that the recognized words are
291  // correct. The default of 0.0 is a sentinel value indicating that confidence
292  // was not set.
293  //
294  // This field is not guaranteed to be accurate or set. In particular this
295  // field isn't set for StreamingDetectIntent since the streaming endpoint has
296  // separate confidence estimates per portion of the audio in
297  // StreamingRecognitionResult.
298  float speech_recognition_confidence = 2;
299
300  // The action name from the matched intent.
301  string action = 3;
302
303  // The collection of extracted parameters.
304  //
305  // Depending on your protocol or client library language, this is a
306  // map, associative array, symbol table, dictionary, or JSON object
307  // composed of a collection of (MapKey, MapValue) pairs:
308  //
309  // -   MapKey type: string
310  // -   MapKey value: parameter name
311  // -   MapValue type:
312  //     -   If parameter's entity type is a composite entity: map
313  //     -   Else: depending on parameter value type, could be one of string,
314  //         number, boolean, null, list or map
315  // -   MapValue value:
316  //     -   If parameter's entity type is a composite entity:
317  //         map from composite entity property names to property values
318  //     -   Else: parameter value
319  google.protobuf.Struct parameters = 4;
320
321  // This field is set to:
322  //
323  // - `false` if the matched intent has required parameters and not all of
324  //    the required parameter values have been collected.
325  // - `true` if all required parameter values have been collected, or if the
326  //    matched intent doesn't contain any required parameters.
327  bool all_required_params_present = 5;
328
329  // Indicates whether the conversational query triggers a cancellation for slot
330  // filling. For more information, see the [cancel slot filling
331  // documentation](https://cloud.google.com/dialogflow/es/docs/intents-actions-parameters#cancel).
332  bool cancels_slot_filling = 21;
333
334  // The text to be pronounced to the user or shown on the screen.
335  // Note: This is a legacy field, `fulfillment_messages` should be preferred.
336  string fulfillment_text = 6;
337
338  // The collection of rich messages to present to the user.
339  repeated Intent.Message fulfillment_messages = 7;
340
341  // If the query was fulfilled by a webhook call, this field is set to the
342  // value of the `source` field returned in the webhook response.
343  string webhook_source = 8;
344
345  // If the query was fulfilled by a webhook call, this field is set to the
346  // value of the `payload` field returned in the webhook response.
347  google.protobuf.Struct webhook_payload = 9;
348
349  // The collection of output contexts. If applicable,
350  // `output_contexts.parameters` contains entries with name
351  // `<parameter name>.original` containing the original parameter values
352  // before the query.
353  repeated Context output_contexts = 10;
354
355  // The intent that matched the conversational query. Some, not
356  // all fields are filled in this message, including but not limited to:
357  // `name`, `display_name`, `end_interaction` and `is_fallback`.
358  Intent intent = 11;
359
360  // The intent detection confidence. Values range from 0.0
361  // (completely uncertain) to 1.0 (completely certain).
362  // This value is for informational purpose only and is only used to
363  // help match the best intent within the classification threshold.
364  // This value may change for the same end-user expression at any time due to a
365  // model retraining or change in implementation.
366  // If there are `multiple knowledge_answers` messages, this value is set to
367  // the greatest `knowledgeAnswers.match_confidence` value in the list.
368  float intent_detection_confidence = 12;
369
370  // Free-form diagnostic information for the associated detect intent request.
371  // The fields of this data can change without notice, so you should not write
372  // code that depends on its structure.
373  // The data may contain:
374  //
375  // - webhook call latency
376  // - webhook errors
377  google.protobuf.Struct diagnostic_info = 14;
378
379  // The sentiment analysis result, which depends on the
380  // `sentiment_analysis_request_config` specified in the request.
381  SentimentAnalysisResult sentiment_analysis_result = 17;
382}
383
384// The top-level message sent by the client to the
385// [Sessions.StreamingDetectIntent][google.cloud.dialogflow.v2.Sessions.StreamingDetectIntent]
386// method.
387//
388// Multiple request messages should be sent in order:
389//
390// 1.  The first message must contain
391// [session][google.cloud.dialogflow.v2.StreamingDetectIntentRequest.session],
392//     [query_input][google.cloud.dialogflow.v2.StreamingDetectIntentRequest.query_input]
393//     plus optionally
394//     [query_params][google.cloud.dialogflow.v2.StreamingDetectIntentRequest.query_params].
395//     If the client wants to receive an audio response, it should also contain
396//     [output_audio_config][google.cloud.dialogflow.v2.StreamingDetectIntentRequest.output_audio_config].
397//     The message must not contain
398//     [input_audio][google.cloud.dialogflow.v2.StreamingDetectIntentRequest.input_audio].
399// 2.  If
400// [query_input][google.cloud.dialogflow.v2.StreamingDetectIntentRequest.query_input]
401// was set to
402//     [query_input.audio_config][google.cloud.dialogflow.v2.InputAudioConfig],
403//     all subsequent messages must contain
404//     [input_audio][google.cloud.dialogflow.v2.StreamingDetectIntentRequest.input_audio]
405//     to continue with Speech recognition. If you decide to rather detect an
406//     intent from text input after you already started Speech recognition,
407//     please send a message with
408//     [query_input.text][google.cloud.dialogflow.v2.QueryInput.text].
409//
410//     However, note that:
411//
412//     * Dialogflow will bill you for the audio duration so far.
413//     * Dialogflow discards all Speech recognition results in favor of the
414//       input text.
415//     * Dialogflow will use the language code from the first message.
416//
417// After you sent all input, you must half-close or abort the request stream.
418message StreamingDetectIntentRequest {
419  // Required. The name of the session the query is sent to.
420  // Format of the session name:
421  // `projects/<Project ID>/agent/sessions/<Session ID>`, or
422  // `projects/<Project ID>/agent/environments/<Environment ID>/users/<User
423  // ID>/sessions/<Session ID>`. If `Environment ID` is not specified, we assume
424  // default 'draft' environment. If `User ID` is not specified, we are using
425  // "-". It's up to the API caller to choose an appropriate `Session ID` and
426  // `User Id`. They can be a random number or some type of user and session
427  // identifiers (preferably hashed). The length of the `Session ID` and
428  // `User ID` must not exceed 36 characters.
429  //
430  // For more information, see the [API interactions
431  // guide](https://cloud.google.com/dialogflow/docs/api-overview).
432  //
433  // Note: Always use agent versions for production traffic.
434  // See [Versions and
435  // environments](https://cloud.google.com/dialogflow/es/docs/agents-versions).
436  string session = 1 [
437    (google.api.field_behavior) = REQUIRED,
438    (google.api.resource_reference) = {
439      type: "dialogflow.googleapis.com/Session"
440    }
441  ];
442
443  // The parameters of this query.
444  QueryParameters query_params = 2;
445
446  // Required. The input specification. It can be set to:
447  //
448  // 1.  an audio config which instructs the speech recognizer how to process
449  //     the speech audio,
450  //
451  // 2.  a conversational query in the form of text, or
452  //
453  // 3.  an event that specifies which intent to trigger.
454  QueryInput query_input = 3 [(google.api.field_behavior) = REQUIRED];
455
456  // Please use
457  // [InputAudioConfig.single_utterance][google.cloud.dialogflow.v2.InputAudioConfig.single_utterance]
458  // instead. If `false` (default), recognition does not cease until the client
459  // closes the stream. If `true`, the recognizer will detect a single spoken
460  // utterance in input audio. Recognition ceases when it detects the audio's
461  // voice has stopped or paused. In this case, once a detected intent is
462  // received, the client should close the stream and start a new request with a
463  // new stream as needed. This setting is ignored when `query_input` is a piece
464  // of text or an event.
465  bool single_utterance = 4 [deprecated = true];
466
467  // Instructs the speech synthesizer how to generate the output
468  // audio. If this field is not set and agent-level speech synthesizer is not
469  // configured, no output audio is generated.
470  OutputAudioConfig output_audio_config = 5;
471
472  // Mask for
473  // [output_audio_config][google.cloud.dialogflow.v2.StreamingDetectIntentRequest.output_audio_config]
474  // indicating which settings in this request-level config should override
475  // speech synthesizer settings defined at agent-level.
476  //
477  // If unspecified or empty,
478  // [output_audio_config][google.cloud.dialogflow.v2.StreamingDetectIntentRequest.output_audio_config]
479  // replaces the agent-level config in its entirety.
480  google.protobuf.FieldMask output_audio_config_mask = 7;
481
482  // The input audio content to be recognized. Must be sent if
483  // `query_input` was set to a streaming input audio config. The complete audio
484  // over all streaming messages must not exceed 1 minute.
485  bytes input_audio = 6;
486
487  // if true, `StreamingDetectIntentResponse.debugging_info` will get populated.
488  bool enable_debugging_info = 8;
489}
490
491// Cloud conversation info for easier debugging.
492// It will get populated in `StreamingDetectIntentResponse` or
493// `StreamingAnalyzeContentResponse` when the flag `enable_debugging_info` is
494// set to true in corresponding requests.
495message CloudConversationDebuggingInfo {
496  // Number of input audio data chunks in streaming requests.
497  int32 audio_data_chunks = 1;
498
499  // Time offset of the end of speech utterance relative to the
500  // beginning of the first audio chunk.
501  google.protobuf.Duration result_end_time_offset = 2;
502
503  // Duration of first audio chunk.
504  google.protobuf.Duration first_audio_duration = 3;
505
506  // Whether client used single utterance mode.
507  bool single_utterance = 5;
508
509  // Time offsets of the speech partial results relative to the beginning of
510  // the stream.
511  repeated google.protobuf.Duration speech_partial_results_end_times = 6;
512
513  // Time offsets of the speech final results (is_final=true) relative to the
514  // beginning of the stream.
515  repeated google.protobuf.Duration speech_final_results_end_times = 7;
516
517  // Total number of partial responses.
518  int32 partial_responses = 8;
519
520  // Time offset of Speaker ID stream close time relative to the Speech stream
521  // close time in milliseconds. Only meaningful for conversations involving
522  // passive verification.
523  int32 speaker_id_passive_latency_ms_offset = 9;
524
525  // Whether a barge-in event is triggered in this request.
526  bool bargein_event_triggered = 10;
527
528  // Whether speech uses single utterance mode.
529  bool speech_single_utterance = 11;
530
531  // Time offsets of the DTMF partial results relative to the beginning of
532  // the stream.
533  repeated google.protobuf.Duration dtmf_partial_results_times = 12;
534
535  // Time offsets of the DTMF final results relative to the beginning of
536  // the stream.
537  repeated google.protobuf.Duration dtmf_final_results_times = 13;
538
539  // Time offset of the end-of-single-utterance signal relative to the
540  // beginning of the stream.
541  google.protobuf.Duration single_utterance_end_time_offset = 14;
542
543  // No speech timeout settings observed at runtime.
544  google.protobuf.Duration no_speech_timeout = 15;
545
546  // Whether the streaming terminates with an injected text query.
547  bool is_input_text = 16;
548
549  // Client half close time in terms of input audio duration.
550  google.protobuf.Duration client_half_close_time_offset = 17;
551
552  // Client half close time in terms of API streaming duration.
553  google.protobuf.Duration client_half_close_streaming_time_offset = 18;
554}
555
556// The top-level message returned from the
557// `StreamingDetectIntent` method.
558//
559// Multiple response messages can be returned in order:
560//
561// 1.  If the `StreamingDetectIntentRequest.input_audio` field was
562//     set, the `recognition_result` field is populated for one
563//     or more messages.
564//     See the
565//     [StreamingRecognitionResult][google.cloud.dialogflow.v2.StreamingRecognitionResult]
566//     message for details about the result message sequence.
567//
568// 2.  The next message contains `response_id`, `query_result`
569//     and optionally `webhook_status` if a WebHook was called.
570message StreamingDetectIntentResponse {
571  // The unique identifier of the response. It can be used to
572  // locate a response in the training example set or for reporting issues.
573  string response_id = 1;
574
575  // The result of speech recognition.
576  StreamingRecognitionResult recognition_result = 2;
577
578  // The result of the conversational query or event processing.
579  QueryResult query_result = 3;
580
581  // Specifies the status of the webhook request.
582  google.rpc.Status webhook_status = 4;
583
584  // The audio data bytes encoded as specified in the request.
585  // Note: The output audio is generated based on the values of default platform
586  // text responses found in the `query_result.fulfillment_messages` field. If
587  // multiple default text responses exist, they will be concatenated when
588  // generating audio. If no default platform text responses exist, the
589  // generated audio content will be empty.
590  //
591  // In some scenarios, multiple output audio fields may be present in the
592  // response structure. In these cases, only the top-most-level audio output
593  // has content.
594  bytes output_audio = 5;
595
596  // The config used by the speech synthesizer to generate the output audio.
597  OutputAudioConfig output_audio_config = 6;
598
599  // Debugging info that would get populated when
600  // `StreamingDetectIntentRequest.enable_debugging_info` is set to true.
601  CloudConversationDebuggingInfo debugging_info = 8;
602}
603
604// Contains a speech recognition result corresponding to a portion of the audio
605// that is currently being processed or an indication that this is the end
606// of the single requested utterance.
607//
608// While end-user audio is being processed, Dialogflow sends a series of
609// results. Each result may contain a `transcript` value. A transcript
610// represents a portion of the utterance. While the recognizer is processing
611// audio, transcript values may be interim values or finalized values.
612// Once a transcript is finalized, the `is_final` value is set to true and
613// processing continues for the next transcript.
614//
615// If `StreamingDetectIntentRequest.query_input.audio_config.single_utterance`
616// was true, and the recognizer has completed processing audio,
617// the `message_type` value is set to `END_OF_SINGLE_UTTERANCE and the
618// following (last) result contains the last finalized transcript.
619//
620// The complete end-user utterance is determined by concatenating the
621// finalized transcript values received for the series of results.
622//
623// In the following example, single utterance is enabled. In the case where
624// single utterance is not enabled, result 7 would not occur.
625//
626// ```
627// Num | transcript              | message_type            | is_final
628// --- | ----------------------- | ----------------------- | --------
629// 1   | "tube"                  | TRANSCRIPT              | false
630// 2   | "to be a"               | TRANSCRIPT              | false
631// 3   | "to be"                 | TRANSCRIPT              | false
632// 4   | "to be or not to be"    | TRANSCRIPT              | true
633// 5   | "that's"                | TRANSCRIPT              | false
634// 6   | "that is                | TRANSCRIPT              | false
635// 7   | unset                   | END_OF_SINGLE_UTTERANCE | unset
636// 8   | " that is the question" | TRANSCRIPT              | true
637// ```
638//
639// Concatenating the finalized transcripts with `is_final` set to true,
640// the complete utterance becomes "to be or not to be that is the question".
641message StreamingRecognitionResult {
642  // Type of the response message.
643  enum MessageType {
644    // Not specified. Should never be used.
645    MESSAGE_TYPE_UNSPECIFIED = 0;
646
647    // Message contains a (possibly partial) transcript.
648    TRANSCRIPT = 1;
649
650    // Event indicates that the server has detected the end of the user's speech
651    // utterance and expects no additional inputs.
652    // Therefore, the server will not process additional audio (although it may
653    // subsequently return additional results). The client should stop sending
654    // additional audio data, half-close the gRPC connection, and wait for any
655    // additional results until the server closes the gRPC connection. This
656    // message is only sent if `single_utterance` was set to `true`, and is not
657    // used otherwise.
658    END_OF_SINGLE_UTTERANCE = 2;
659  }
660
661  // Type of the result message.
662  MessageType message_type = 1;
663
664  // Transcript text representing the words that the user spoke.
665  // Populated if and only if `message_type` = `TRANSCRIPT`.
666  string transcript = 2;
667
668  // If `false`, the `StreamingRecognitionResult` represents an
669  // interim result that may change. If `true`, the recognizer will not return
670  // any further hypotheses about this piece of the audio. May only be populated
671  // for `message_type` = `TRANSCRIPT`.
672  bool is_final = 3;
673
674  // The Speech confidence between 0.0 and 1.0 for the current portion of audio.
675  // A higher number indicates an estimated greater likelihood that the
676  // recognized words are correct. The default of 0.0 is a sentinel value
677  // indicating that confidence was not set.
678  //
679  // This field is typically only provided if `is_final` is true and you should
680  // not rely on it being accurate or even set.
681  float confidence = 4;
682
683  // Word-specific information for the words recognized by Speech in
684  // [transcript][google.cloud.dialogflow.v2.StreamingRecognitionResult.transcript].
685  // Populated if and only if `message_type` = `TRANSCRIPT` and
686  // [InputAudioConfig.enable_word_info] is set.
687  repeated SpeechWordInfo speech_word_info = 7;
688
689  // Time offset of the end of this Speech recognition result relative to the
690  // beginning of the audio. Only populated for `message_type` = `TRANSCRIPT`.
691  google.protobuf.Duration speech_end_offset = 8;
692
693  // Detected language code for the transcript.
694  string language_code = 10;
695}
696
697// ============================================================================
698// Auxiliary proto messages.
699//
700// Represents the natural language text to be processed.
701message TextInput {
702  // Required. The UTF-8 encoded natural language text to be processed.
703  // Text length must not exceed 256 characters for virtual agent interactions.
704  string text = 1 [(google.api.field_behavior) = REQUIRED];
705
706  // Required. The language of this conversational query. See [Language
707  // Support](https://cloud.google.com/dialogflow/docs/reference/language)
708  // for a list of the currently supported language codes. Note that queries in
709  // the same session do not necessarily need to specify the same language.
710  string language_code = 2 [(google.api.field_behavior) = REQUIRED];
711}
712
713// Events allow for matching intents by event name instead of the natural
714// language input. For instance, input `<event: { name: "welcome_event",
715// parameters: { name: "Sam" } }>` can trigger a personalized welcome response.
716// The parameter `name` may be used by the agent in the response:
717// `"Hello #welcome_event.name! What can I do for you today?"`.
718message EventInput {
719  // Required. The unique identifier of the event.
720  string name = 1 [(google.api.field_behavior) = REQUIRED];
721
722  // The collection of parameters associated with the event.
723  //
724  // Depending on your protocol or client library language, this is a
725  // map, associative array, symbol table, dictionary, or JSON object
726  // composed of a collection of (MapKey, MapValue) pairs:
727  //
728  // -   MapKey type: string
729  // -   MapKey value: parameter name
730  // -   MapValue type:
731  //     -   If parameter's entity type is a composite entity: map
732  //     -   Else: depending on parameter value type, could be one of string,
733  //         number, boolean, null, list or map
734  // -   MapValue value:
735  //     -   If parameter's entity type is a composite entity:
736  //         map from composite entity property names to property values
737  //     -   Else: parameter value
738  google.protobuf.Struct parameters = 2;
739
740  // Required. The language of this query. See [Language
741  // Support](https://cloud.google.com/dialogflow/docs/reference/language)
742  // for a list of the currently supported language codes. Note that queries in
743  // the same session do not necessarily need to specify the same language.
744  //
745  // This field is ignored when used in the context of a
746  // [WebhookResponse.followup_event_input][google.cloud.dialogflow.v2.WebhookResponse.followup_event_input]
747  // field, because the language was already defined in the originating detect
748  // intent request.
749  string language_code = 3 [(google.api.field_behavior) = REQUIRED];
750}
751
752// Configures the types of sentiment analysis to perform.
753message SentimentAnalysisRequestConfig {
754  // Instructs the service to perform sentiment analysis on
755  // `query_text`. If not provided, sentiment analysis is not performed on
756  // `query_text`.
757  bool analyze_query_text_sentiment = 1;
758}
759
760// The result of sentiment analysis. Sentiment analysis inspects user input
761// and identifies the prevailing subjective opinion, especially to determine a
762// user's attitude as positive, negative, or neutral.
763// For [Participants.DetectIntent][], it needs to be configured in
764// [DetectIntentRequest.query_params][google.cloud.dialogflow.v2.DetectIntentRequest.query_params].
765// For [Participants.StreamingDetectIntent][], it needs to be configured in
766// [StreamingDetectIntentRequest.query_params][google.cloud.dialogflow.v2.StreamingDetectIntentRequest.query_params].
767// And for
768// [Participants.AnalyzeContent][google.cloud.dialogflow.v2.Participants.AnalyzeContent]
769// and
770// [Participants.StreamingAnalyzeContent][google.cloud.dialogflow.v2.Participants.StreamingAnalyzeContent],
771// it needs to be configured in
772// [ConversationProfile.human_agent_assistant_config][google.cloud.dialogflow.v2.ConversationProfile.human_agent_assistant_config]
773message SentimentAnalysisResult {
774  // The sentiment analysis result for `query_text`.
775  Sentiment query_text_sentiment = 1;
776}
777
778// The sentiment, such as positive/negative feeling or association, for a unit
779// of analysis, such as the query text. See:
780// https://cloud.google.com/natural-language/docs/basics#interpreting_sentiment_analysis_values
781// for how to interpret the result.
782message Sentiment {
783  // Sentiment score between -1.0 (negative sentiment) and 1.0 (positive
784  // sentiment).
785  float score = 1;
786
787  // A non-negative number in the [0, +inf) range, which represents the absolute
788  // magnitude of sentiment, regardless of score (positive or negative).
789  float magnitude = 2;
790}
791