1// Copyright 2023 Google LLC
2//
3// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at
6//
7//     http://www.apache.org/licenses/LICENSE-2.0
8//
9// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS,
11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12// See the License for the specific language governing permissions and
13// limitations under the License.
14
15syntax = "proto3";
16
17package google.cloud.dialogflow.v2beta1;
18
19import "google/api/annotations.proto";
20import "google/api/client.proto";
21import "google/api/field_behavior.proto";
22import "google/api/resource.proto";
23import "google/cloud/dialogflow/v2beta1/agent.proto";
24import "google/cloud/dialogflow/v2beta1/audio_config.proto";
25import "google/cloud/dialogflow/v2beta1/context.proto";
26import "google/cloud/dialogflow/v2beta1/intent.proto";
27import "google/cloud/dialogflow/v2beta1/session_entity_type.proto";
28import "google/protobuf/duration.proto";
29import "google/protobuf/field_mask.proto";
30import "google/protobuf/struct.proto";
31import "google/rpc/status.proto";
32import "google/type/latlng.proto";
33
34option cc_enable_arenas = true;
35option csharp_namespace = "Google.Cloud.Dialogflow.V2Beta1";
36option go_package = "cloud.google.com/go/dialogflow/apiv2beta1/dialogflowpb;dialogflowpb";
37option java_multiple_files = true;
38option java_outer_classname = "SessionProto";
39option java_package = "com.google.cloud.dialogflow.v2beta1";
40option objc_class_prefix = "DF";
41option (google.api.resource_definition) = {
42  type: "dialogflow.googleapis.com/Session"
43  pattern: "projects/{project}/agent/sessions/{session}"
44  pattern: "projects/{project}/agent/environments/{environment}/users/{user}/sessions/{session}"
45  pattern: "projects/{project}/locations/{location}/agent/sessions/{session}"
46  pattern: "projects/{project}/locations/{location}/agent/environments/{environment}/users/{user}/sessions/{session}"
47};
48
49// A service used for session interactions.
50//
51// For more information, see the [API interactions
52// guide](https://cloud.google.com/dialogflow/docs/api-overview).
53service Sessions {
54  option (google.api.default_host) = "dialogflow.googleapis.com";
55  option (google.api.oauth_scopes) =
56      "https://www.googleapis.com/auth/cloud-platform,"
57      "https://www.googleapis.com/auth/dialogflow";
58
59  // Processes a natural language query and returns structured, actionable data
60  // as a result. This method is not idempotent, because it may cause contexts
61  // and session entity types to be updated, which in turn might affect
62  // results of future queries.
63  //
64  // If you might use
65  // [Agent Assist](https://cloud.google.com/dialogflow/docs/#aa)
66  // or other CCAI products now or in the future, consider using
67  // [AnalyzeContent][google.cloud.dialogflow.v2beta1.Participants.AnalyzeContent]
68  // instead of `DetectIntent`. `AnalyzeContent` has additional
69  // functionality for Agent Assist and other CCAI products.
70  //
71  // Note: Always use agent versions for production traffic.
72  // See [Versions and
73  // environments](https://cloud.google.com/dialogflow/es/docs/agents-versions).
74  rpc DetectIntent(DetectIntentRequest) returns (DetectIntentResponse) {
75    option (google.api.http) = {
76      post: "/v2beta1/{session=projects/*/agent/sessions/*}:detectIntent"
77      body: "*"
78      additional_bindings {
79        post: "/v2beta1/{session=projects/*/agent/environments/*/users/*/sessions/*}:detectIntent"
80        body: "*"
81      }
82      additional_bindings {
83        post: "/v2beta1/{session=projects/*/locations/*/agent/sessions/*}:detectIntent"
84        body: "*"
85      }
86      additional_bindings {
87        post: "/v2beta1/{session=projects/*/locations/*/agent/environments/*/users/*/sessions/*}:detectIntent"
88        body: "*"
89      }
90    };
91    option (google.api.method_signature) = "session,query_input";
92  }
93
94  // Processes a natural language query in audio format in a streaming fashion
95  // and returns structured, actionable data as a result. This method is only
96  // available via the gRPC API (not REST).
97  //
98  // If you might use
99  // [Agent Assist](https://cloud.google.com/dialogflow/docs/#aa)
100  // or other CCAI products now or in the future, consider using
101  // [StreamingAnalyzeContent][google.cloud.dialogflow.v2beta1.Participants.StreamingAnalyzeContent]
102  // instead of `StreamingDetectIntent`. `StreamingAnalyzeContent` has
103  // additional functionality for Agent Assist and other CCAI products.
104  //
105  // Note: Always use agent versions for production traffic.
106  // See [Versions and
107  // environments](https://cloud.google.com/dialogflow/es/docs/agents-versions).
108  rpc StreamingDetectIntent(stream StreamingDetectIntentRequest)
109      returns (stream StreamingDetectIntentResponse) {}
110}
111
112// The request to detect user's intent.
113message DetectIntentRequest {
114  // Required. The name of the session this query is sent to. Supported formats:
115  // - `projects/<Project ID>/agent/sessions/<Session ID>,
116  // - `projects/<Project ID>/locations/<Location ID>/agent/sessions/<Session
117  //   ID>`,
118  // - `projects/<Project ID>/agent/environments/<Environment ID>/users/<User
119  //   ID>/sessions/<Session ID>`,
120  // - `projects/<Project ID>/locations/<Location
121  //   ID>/agent/environments/<Environment ID>/users/<User ID>/sessions/<Session
122  //   ID>`,
123  //
124  // If `Location ID` is not specified we assume default 'us' location. If
125  // `Environment ID` is not specified, we assume default 'draft' environment
126  // (`Environment ID` might be referred to as environment name at some places).
127  // If `User ID` is not specified, we are using "-". It's up to the API caller
128  // to choose an appropriate `Session ID` and `User Id`. They can be a random
129  // number or some type of user and session identifiers (preferably hashed).
130  // The length of the `Session ID` and `User ID` must not exceed 36 characters.
131  // For more information, see the [API interactions
132  // guide](https://cloud.google.com/dialogflow/docs/api-overview).
133  //
134  // Note: Always use agent versions for production traffic.
135  // See [Versions and
136  // environments](https://cloud.google.com/dialogflow/es/docs/agents-versions).
137  string session = 1 [
138    (google.api.field_behavior) = REQUIRED,
139    (google.api.resource_reference) = {
140      type: "dialogflow.googleapis.com/Session"
141    }
142  ];
143
144  // The parameters of this query.
145  QueryParameters query_params = 2;
146
147  // Required. The input specification. It can be set to:
148  //
149  // 1.  an audio config
150  //     which instructs the speech recognizer how to process the speech audio,
151  //
152  // 2.  a conversational query in the form of text, or
153  //
154  // 3.  an event that specifies which intent to trigger.
155  QueryInput query_input = 3 [(google.api.field_behavior) = REQUIRED];
156
157  // Instructs the speech synthesizer how to generate the output
158  // audio. If this field is not set and agent-level speech synthesizer is not
159  // configured, no output audio is generated.
160  OutputAudioConfig output_audio_config = 4;
161
162  // Mask for
163  // [output_audio_config][google.cloud.dialogflow.v2beta1.DetectIntentRequest.output_audio_config]
164  // indicating which settings in this request-level config should override
165  // speech synthesizer settings defined at agent-level.
166  //
167  // If unspecified or empty,
168  // [output_audio_config][google.cloud.dialogflow.v2beta1.DetectIntentRequest.output_audio_config]
169  // replaces the agent-level config in its entirety.
170  google.protobuf.FieldMask output_audio_config_mask = 7;
171
172  // The natural language speech audio to be processed. This field
173  // should be populated iff `query_input` is set to an input audio config.
174  // A single request can contain up to 1 minute of speech audio data.
175  bytes input_audio = 5;
176}
177
178// The message returned from the DetectIntent method.
179message DetectIntentResponse {
180  // The unique identifier of the response. It can be used to
181  // locate a response in the training example set or for reporting issues.
182  string response_id = 1;
183
184  // The selected results of the conversational query or event processing.
185  // See `alternative_query_results` for additional potential results.
186  QueryResult query_result = 2;
187
188  // If Knowledge Connectors are enabled, there could be more than one result
189  // returned for a given query or event, and this field will contain all
190  // results except for the top one, which is captured in query_result. The
191  // alternative results are ordered by decreasing
192  // `QueryResult.intent_detection_confidence`. If Knowledge Connectors are
193  // disabled, this field will be empty until multiple responses for regular
194  // intents are supported, at which point those additional results will be
195  // surfaced here.
196  repeated QueryResult alternative_query_results = 5;
197
198  // Specifies the status of the webhook request.
199  google.rpc.Status webhook_status = 3;
200
201  // The audio data bytes encoded as specified in the request.
202  // Note: The output audio is generated based on the values of default platform
203  // text responses found in the `query_result.fulfillment_messages` field. If
204  // multiple default text responses exist, they will be concatenated when
205  // generating audio. If no default platform text responses exist, the
206  // generated audio content will be empty.
207  //
208  // In some scenarios, multiple output audio fields may be present in the
209  // response structure. In these cases, only the top-most-level audio output
210  // has content.
211  bytes output_audio = 4;
212
213  // The config used by the speech synthesizer to generate the output audio.
214  OutputAudioConfig output_audio_config = 6;
215}
216
217// Represents the parameters of the conversational query.
218message QueryParameters {
219  // The time zone of this conversational query from the
220  // [time zone database](https://www.iana.org/time-zones), e.g.,
221  // America/New_York, Europe/Paris. If not provided, the time zone specified in
222  // agent settings is used.
223  string time_zone = 1;
224
225  // The geo location of this conversational query.
226  google.type.LatLng geo_location = 2;
227
228  // The collection of contexts to be activated before this query is
229  // executed.
230  repeated Context contexts = 3;
231
232  // Specifies whether to delete all contexts in the current session
233  // before the new ones are activated.
234  bool reset_contexts = 4;
235
236  // Additional session entity types to replace or extend developer
237  // entity types with. The entity synonyms apply to all languages and persist
238  // for the session of this query.
239  repeated SessionEntityType session_entity_types = 5;
240
241  // This field can be used to pass custom data to your webhook.
242  // Arbitrary JSON objects are supported.
243  // If supplied, the value is used to populate the
244  // `WebhookRequest.original_detect_intent_request.payload`
245  // field sent to your webhook.
246  google.protobuf.Struct payload = 6;
247
248  // KnowledgeBases to get alternative results from. If not set, the
249  // KnowledgeBases enabled in the agent (through UI) will be used.
250  // Format:  `projects/<Project ID>/knowledgeBases/<Knowledge Base ID>`.
251  repeated string knowledge_base_names = 12;
252
253  // Configures the type of sentiment analysis to perform. If not
254  // provided, sentiment analysis is not performed.
255  // Note: Sentiment Analysis is only currently available for Essentials Edition
256  // agents.
257  SentimentAnalysisRequestConfig sentiment_analysis_request_config = 10;
258
259  // For mega agent query, directly specify which sub agents to query.
260  // If any specified sub agent is not linked to the mega agent, an error will
261  // be returned. If empty, Dialogflow will decide which sub agents to query.
262  // If specified for a non-mega-agent query, will be silently ignored.
263  repeated SubAgent sub_agents = 13;
264
265  // This field can be used to pass HTTP headers for a webhook
266  // call. These headers will be sent to webhook along with the headers that
267  // have been configured through Dialogflow web console. The headers defined
268  // within this field will overwrite the headers configured through Dialogflow
269  // console if there is a conflict. Header names are case-insensitive.
270  // Google's specified headers are not allowed. Including: "Host",
271  // "Content-Length", "Connection", "From", "User-Agent", "Accept-Encoding",
272  // "If-Modified-Since", "If-None-Match", "X-Forwarded-For", etc.
273  map<string, string> webhook_headers = 14;
274}
275
276// Represents the query input. It can contain either:
277//
278// 1.  An audio config which
279//     instructs the speech recognizer how to process the speech audio.
280//
281// 2.  A conversational query in the form of text.
282//
283// 3.  An event that specifies which intent to trigger.
284message QueryInput {
285  // Required. The input specification.
286  oneof input {
287    // Instructs the speech recognizer how to process the speech audio.
288    InputAudioConfig audio_config = 1;
289
290    // The natural language text to be processed.
291    TextInput text = 2;
292
293    // The event to be processed.
294    EventInput event = 3;
295
296    // The DTMF digits used to invoke intent and fill in parameter value.
297    TelephonyDtmfEvents dtmf = 4;
298  }
299}
300
301// Represents the result of conversational query or event processing.
302message QueryResult {
303  // The original conversational query text:
304  //
305  // - If natural language text was provided as input, `query_text` contains
306  //   a copy of the input.
307  // - If natural language speech audio was provided as input, `query_text`
308  //   contains the speech recognition result. If speech recognizer produced
309  //   multiple alternatives, a particular one is picked.
310  // - If automatic spell correction is enabled, `query_text` will contain the
311  //   corrected user input.
312  string query_text = 1;
313
314  // The language that was triggered during intent detection.
315  // See [Language
316  // Support](https://cloud.google.com/dialogflow/docs/reference/language)
317  // for a list of the currently supported language codes.
318  string language_code = 15;
319
320  // The Speech recognition confidence between 0.0 and 1.0. A higher number
321  // indicates an estimated greater likelihood that the recognized words are
322  // correct. The default of 0.0 is a sentinel value indicating that confidence
323  // was not set.
324  //
325  // This field is not guaranteed to be accurate or set. In particular this
326  // field isn't set for StreamingDetectIntent since the streaming endpoint has
327  // separate confidence estimates per portion of the audio in
328  // StreamingRecognitionResult.
329  float speech_recognition_confidence = 2;
330
331  // The action name from the matched intent.
332  string action = 3;
333
334  // The collection of extracted parameters.
335  //
336  // Depending on your protocol or client library language, this is a
337  // map, associative array, symbol table, dictionary, or JSON object
338  // composed of a collection of (MapKey, MapValue) pairs:
339  //
340  // -   MapKey type: string
341  // -   MapKey value: parameter name
342  // -   MapValue type:
343  //     -   If parameter's entity type is a composite entity: map
344  //     -   Else: depending on parameter value type, could be one of string,
345  //         number, boolean, null, list or map
346  // -   MapValue value:
347  //     -   If parameter's entity type is a composite entity:
348  //         map from composite entity property names to property values
349  //     -   Else: parameter value
350  google.protobuf.Struct parameters = 4;
351
352  // This field is set to:
353  //
354  // - `false` if the matched intent has required parameters and not all of
355  //    the required parameter values have been collected.
356  // - `true` if all required parameter values have been collected, or if the
357  //    matched intent doesn't contain any required parameters.
358  bool all_required_params_present = 5;
359
360  // Indicates whether the conversational query triggers a cancellation for slot
361  // filling. For more information, see the [cancel slot filling
362  // documentation](https://cloud.google.com/dialogflow/es/docs/intents-actions-parameters#cancel).
363  bool cancels_slot_filling = 21;
364
365  // The text to be pronounced to the user or shown on the screen.
366  // Note: This is a legacy field, `fulfillment_messages` should be preferred.
367  string fulfillment_text = 6;
368
369  // The collection of rich messages to present to the user.
370  repeated Intent.Message fulfillment_messages = 7;
371
372  // If the query was fulfilled by a webhook call, this field is set to the
373  // value of the `source` field returned in the webhook response.
374  string webhook_source = 8;
375
376  // If the query was fulfilled by a webhook call, this field is set to the
377  // value of the `payload` field returned in the webhook response.
378  google.protobuf.Struct webhook_payload = 9;
379
380  // The collection of output contexts. If applicable,
381  // `output_contexts.parameters` contains entries with name
382  // `<parameter name>.original` containing the original parameter values
383  // before the query.
384  repeated Context output_contexts = 10;
385
386  // The intent that matched the conversational query. Some, not
387  // all fields are filled in this message, including but not limited to:
388  // `name`, `display_name`, `end_interaction` and `is_fallback`.
389  Intent intent = 11;
390
391  // The intent detection confidence. Values range from 0.0
392  // (completely uncertain) to 1.0 (completely certain).
393  // This value is for informational purpose only and is only used to
394  // help match the best intent within the classification threshold.
395  // This value may change for the same end-user expression at any time due to a
396  // model retraining or change in implementation.
397  // If there are `multiple knowledge_answers` messages, this value is set to
398  // the greatest `knowledgeAnswers.match_confidence` value in the list.
399  float intent_detection_confidence = 12;
400
401  // Free-form diagnostic information for the associated detect intent request.
402  // The fields of this data can change without notice, so you should not write
403  // code that depends on its structure.
404  // The data may contain:
405  //
406  // - webhook call latency
407  // - webhook errors
408  google.protobuf.Struct diagnostic_info = 14;
409
410  // The sentiment analysis result, which depends on the
411  // `sentiment_analysis_request_config` specified in the request.
412  SentimentAnalysisResult sentiment_analysis_result = 17;
413
414  // The result from Knowledge Connector (if any), ordered by decreasing
415  // `KnowledgeAnswers.match_confidence`.
416  KnowledgeAnswers knowledge_answers = 18;
417}
418
419// Represents the result of querying a Knowledge base.
420message KnowledgeAnswers {
421  // An answer from Knowledge Connector.
422  message Answer {
423    // Represents the system's confidence that this knowledge answer is a good
424    // match for this conversational query.
425    enum MatchConfidenceLevel {
426      // Not specified.
427      MATCH_CONFIDENCE_LEVEL_UNSPECIFIED = 0;
428
429      // Indicates that the confidence is low.
430      LOW = 1;
431
432      // Indicates our confidence is medium.
433      MEDIUM = 2;
434
435      // Indicates our confidence is high.
436      HIGH = 3;
437    }
438
439    // Indicates which Knowledge Document this answer was extracted from.
440    // Format: `projects/<Project ID>/knowledgeBases/<Knowledge Base
441    // ID>/documents/<Document ID>`.
442    string source = 1 [(google.api.resource_reference) = {
443      type: "dialogflow.googleapis.com/Document"
444    }];
445
446    // The corresponding FAQ question if the answer was extracted from a FAQ
447    // Document, empty otherwise.
448    string faq_question = 2;
449
450    // The piece of text from the `source` knowledge base document that answers
451    // this conversational query.
452    string answer = 3;
453
454    // The system's confidence level that this knowledge answer is a good match
455    // for this conversational query.
456    // NOTE: The confidence level for a given `<query, answer>` pair may change
457    // without notice, as it depends on models that are constantly being
458    // improved. However, it will change less frequently than the confidence
459    // score below, and should be preferred for referencing the quality of an
460    // answer.
461    MatchConfidenceLevel match_confidence_level = 4;
462
463    // The system's confidence score that this Knowledge answer is a good match
464    // for this conversational query.
465    // The range is from 0.0 (completely uncertain) to 1.0 (completely certain).
466    // Note: The confidence score is likely to vary somewhat (possibly even for
467    // identical requests), as the underlying model is under constant
468    // improvement. It may be deprecated in the future. We recommend using
469    // `match_confidence_level` which should be generally more stable.
470    float match_confidence = 5;
471  }
472
473  // A list of answers from Knowledge Connector.
474  repeated Answer answers = 1;
475}
476
477// The top-level message sent by the client to the
478// [Sessions.StreamingDetectIntent][google.cloud.dialogflow.v2beta1.Sessions.StreamingDetectIntent]
479// method.
480//
481// Multiple request messages should be sent in order:
482//
483// 1.  The first message must contain
484// [session][google.cloud.dialogflow.v2beta1.StreamingDetectIntentRequest.session],
485//     [query_input][google.cloud.dialogflow.v2beta1.StreamingDetectIntentRequest.query_input]
486//     plus optionally
487//     [query_params][google.cloud.dialogflow.v2beta1.StreamingDetectIntentRequest.query_params].
488//     If the client wants to receive an audio response, it should also contain
489//     [output_audio_config][google.cloud.dialogflow.v2beta1.StreamingDetectIntentRequest.output_audio_config].
490//     The message must not contain
491//     [input_audio][google.cloud.dialogflow.v2beta1.StreamingDetectIntentRequest.input_audio].
492// 2.  If
493// [query_input][google.cloud.dialogflow.v2beta1.StreamingDetectIntentRequest.query_input]
494// was set to
495//     [query_input.audio_config][google.cloud.dialogflow.v2beta1.InputAudioConfig],
496//     all subsequent messages must contain
497//     [input_audio][google.cloud.dialogflow.v2beta1.StreamingDetectIntentRequest.input_audio]
498//     to continue with Speech recognition. If you decide to rather detect an
499//     intent from text input after you already started Speech recognition,
500//     please send a message with
501//     [query_input.text][google.cloud.dialogflow.v2beta1.QueryInput.text].
502//
503//     However, note that:
504//
505//     * Dialogflow will bill you for the audio duration so far.
506//     * Dialogflow discards all Speech recognition results in favor of the
507//       input text.
508//     * Dialogflow will use the language code from the first message.
509//
510// After you sent all input, you must half-close or abort the request stream.
511message StreamingDetectIntentRequest {
512  // Required. The name of the session the query is sent to.
513  // Supported formats:
514  // - `projects/<Project ID>/agent/sessions/<Session ID>,
515  // - `projects/<Project ID>/locations/<Location ID>/agent/sessions/<Session
516  //   ID>`,
517  // - `projects/<Project ID>/agent/environments/<Environment ID>/users/<User
518  //   ID>/sessions/<Session ID>`,
519  // - `projects/<Project ID>/locations/<Location
520  //   ID>/agent/environments/<Environment ID>/users/<User ID>/sessions/<Session
521  //   ID>`,
522  //
523  // If `Location ID` is not specified we assume default 'us' location. If
524  // `Environment ID` is not specified, we assume default 'draft' environment.
525  // If `User ID` is not specified, we are using "-". It's up to the API caller
526  // to choose an appropriate `Session ID` and `User Id`. They can be a random
527  // number or some type of user and session identifiers (preferably hashed).
528  // The length of the `Session ID` and `User ID` must not exceed 36 characters.
529  //
530  // For more information, see the [API interactions
531  // guide](https://cloud.google.com/dialogflow/docs/api-overview).
532  //
533  // Note: Always use agent versions for production traffic.
534  // See [Versions and
535  // environments](https://cloud.google.com/dialogflow/es/docs/agents-versions).
536  string session = 1 [
537    (google.api.field_behavior) = REQUIRED,
538    (google.api.resource_reference) = {
539      type: "dialogflow.googleapis.com/Session"
540    }
541  ];
542
543  // The parameters of this query.
544  QueryParameters query_params = 2;
545
546  // Required. The input specification. It can be set to:
547  //
548  // 1.  an audio config which instructs the speech recognizer how to process
549  //     the speech audio,
550  //
551  // 2.  a conversational query in the form of text, or
552  //
553  // 3.  an event that specifies which intent to trigger.
554  QueryInput query_input = 3 [(google.api.field_behavior) = REQUIRED];
555
556  // DEPRECATED. Please use
557  // [InputAudioConfig.single_utterance][google.cloud.dialogflow.v2beta1.InputAudioConfig.single_utterance]
558  // instead. If `false` (default), recognition does not cease until the client
559  // closes the stream. If `true`, the recognizer will detect a single spoken
560  // utterance in input audio. Recognition ceases when it detects the audio's
561  // voice has stopped or paused. In this case, once a detected intent is
562  // received, the client should close the stream and start a new request with a
563  // new stream as needed. This setting is ignored when `query_input` is a piece
564  // of text or an event.
565  bool single_utterance = 4 [deprecated = true];
566
567  // Instructs the speech synthesizer how to generate the output
568  // audio. If this field is not set and agent-level speech synthesizer is not
569  // configured, no output audio is generated.
570  OutputAudioConfig output_audio_config = 5;
571
572  // Mask for
573  // [output_audio_config][google.cloud.dialogflow.v2beta1.StreamingDetectIntentRequest.output_audio_config]
574  // indicating which settings in this request-level config should override
575  // speech synthesizer settings defined at agent-level.
576  //
577  // If unspecified or empty,
578  // [output_audio_config][google.cloud.dialogflow.v2beta1.StreamingDetectIntentRequest.output_audio_config]
579  // replaces the agent-level config in its entirety.
580  google.protobuf.FieldMask output_audio_config_mask = 7;
581
582  // The input audio content to be recognized. Must be sent if
583  // `query_input` was set to a streaming input audio config. The complete audio
584  // over all streaming messages must not exceed 1 minute.
585  bytes input_audio = 6;
586
587  // If true, `StreamingDetectIntentResponse.debugging_info` will get populated.
588  bool enable_debugging_info = 8;
589}
590
591// Cloud conversation info for easier debugging.
592// It will get populated in `StreamingDetectIntentResponse` or
593// `StreamingAnalyzeContentResponse` when the flag `enable_debugging_info` is
594// set to true in corresponding requests.
595message CloudConversationDebuggingInfo {
596  // Number of input audio data chunks in streaming requests.
597  int32 audio_data_chunks = 1;
598
599  // Time offset of the end of speech utterance relative to the
600  // beginning of the first audio chunk.
601  google.protobuf.Duration result_end_time_offset = 2;
602
603  // Duration of first audio chunk.
604  google.protobuf.Duration first_audio_duration = 3;
605
606  // Whether client used single utterance mode.
607  bool single_utterance = 5;
608
609  // Time offsets of the speech partial results relative to the beginning of
610  // the stream.
611  repeated google.protobuf.Duration speech_partial_results_end_times = 6;
612
613  // Time offsets of the speech final results (is_final=true) relative to the
614  // beginning of the stream.
615  repeated google.protobuf.Duration speech_final_results_end_times = 7;
616
617  // Total number of partial responses.
618  int32 partial_responses = 8;
619
620  // Time offset of Speaker ID stream close time relative to the Speech stream
621  // close time in milliseconds. Only meaningful for conversations involving
622  // passive verification.
623  int32 speaker_id_passive_latency_ms_offset = 9;
624
625  // Whether a barge-in event is triggered in this request.
626  bool bargein_event_triggered = 10;
627
628  // Whether speech uses single utterance mode.
629  bool speech_single_utterance = 11;
630
631  // Time offsets of the DTMF partial results relative to the beginning of
632  // the stream.
633  repeated google.protobuf.Duration dtmf_partial_results_times = 12;
634
635  // Time offsets of the DTMF final results relative to the beginning of
636  // the stream.
637  repeated google.protobuf.Duration dtmf_final_results_times = 13;
638
639  // Time offset of the end-of-single-utterance signal relative to the
640  // beginning of the stream.
641  google.protobuf.Duration single_utterance_end_time_offset = 14;
642
643  // No speech timeout settings observed at runtime.
644  google.protobuf.Duration no_speech_timeout = 15;
645
646  // Whether the streaming terminates with an injected text query.
647  bool is_input_text = 16;
648
649  // Client half close time in terms of input audio duration.
650  google.protobuf.Duration client_half_close_time_offset = 17;
651
652  // Client half close time in terms of API streaming duration.
653  google.protobuf.Duration client_half_close_streaming_time_offset = 18;
654}
655
656// The top-level message returned from the
657// `StreamingDetectIntent` method.
658//
659// Multiple response messages can be returned in order:
660//
661// 1.  If the `StreamingDetectIntentRequest.input_audio` field was
662//     set, the `recognition_result` field is populated for one
663//     or more messages.
664//     See the
665//     [StreamingRecognitionResult][google.cloud.dialogflow.v2beta1.StreamingRecognitionResult]
666//     message for details about the result message sequence.
667//
668// 2.  The next message contains `response_id`, `query_result`,
669//     `alternative_query_results` and optionally `webhook_status` if a WebHook
670//     was called.
671//
672// 3.  If `output_audio_config` was specified in the request or agent-level
673//     speech synthesizer is configured, all subsequent messages contain
674//     `output_audio` and `output_audio_config`.
675message StreamingDetectIntentResponse {
676  // The unique identifier of the response. It can be used to
677  // locate a response in the training example set or for reporting issues.
678  string response_id = 1;
679
680  // The result of speech recognition.
681  StreamingRecognitionResult recognition_result = 2;
682
683  // The selected results of the conversational query or event processing.
684  // See `alternative_query_results` for additional potential results.
685  QueryResult query_result = 3;
686
687  // If Knowledge Connectors are enabled, there could be more than one result
688  // returned for a given query or event, and this field will contain all
689  // results except for the top one, which is captured in query_result. The
690  // alternative results are ordered by decreasing
691  // `QueryResult.intent_detection_confidence`. If Knowledge Connectors are
692  // disabled, this field will be empty until multiple responses for regular
693  // intents are supported, at which point those additional results will be
694  // surfaced here.
695  repeated QueryResult alternative_query_results = 7;
696
697  // Specifies the status of the webhook request.
698  google.rpc.Status webhook_status = 4;
699
700  // The audio data bytes encoded as specified in the request.
701  // Note: The output audio is generated based on the values of default platform
702  // text responses found in the `query_result.fulfillment_messages` field. If
703  // multiple default text responses exist, they will be concatenated when
704  // generating audio. If no default platform text responses exist, the
705  // generated audio content will be empty.
706  //
707  // In some scenarios, multiple output audio fields may be present in the
708  // response structure. In these cases, only the top-most-level audio output
709  // has content.
710  bytes output_audio = 5;
711
712  // The config used by the speech synthesizer to generate the output audio.
713  OutputAudioConfig output_audio_config = 6;
714
715  // Debugging info that would get populated when
716  // `StreamingDetectIntentRequest.enable_debugging_info` is set to true.
717  CloudConversationDebuggingInfo debugging_info = 8;
718}
719
720// Contains a speech recognition result corresponding to a portion of the audio
721// that is currently being processed or an indication that this is the end
722// of the single requested utterance.
723//
724// While end-user audio is being processed, Dialogflow sends a series of
725// results. Each result may contain a `transcript` value. A transcript
726// represents a portion of the utterance. While the recognizer is processing
727// audio, transcript values may be interim values or finalized values.
728// Once a transcript is finalized, the `is_final` value is set to true and
729// processing continues for the next transcript.
730//
731// If `StreamingDetectIntentRequest.query_input.audio_config.single_utterance`
732// was true, and the recognizer has completed processing audio,
733// the `message_type` value is set to `END_OF_SINGLE_UTTERANCE and the
734// following (last) result contains the last finalized transcript.
735//
736// The complete end-user utterance is determined by concatenating the
737// finalized transcript values received for the series of results.
738//
739// In the following example, single utterance is enabled. In the case where
740// single utterance is not enabled, result 7 would not occur.
741//
742// ```
743// Num | transcript              | message_type            | is_final
744// --- | ----------------------- | ----------------------- | --------
745// 1   | "tube"                  | TRANSCRIPT              | false
746// 2   | "to be a"               | TRANSCRIPT              | false
747// 3   | "to be"                 | TRANSCRIPT              | false
748// 4   | "to be or not to be"    | TRANSCRIPT              | true
749// 5   | "that's"                | TRANSCRIPT              | false
750// 6   | "that is                | TRANSCRIPT              | false
751// 7   | unset                   | END_OF_SINGLE_UTTERANCE | unset
752// 8   | " that is the question" | TRANSCRIPT              | true
753// ```
754// Concatenating the finalized transcripts with `is_final` set to true,
755// the complete utterance becomes "to be or not to be that is the question".
756message StreamingRecognitionResult {
757  // Type of the response message.
758  enum MessageType {
759    // Not specified. Should never be used.
760    MESSAGE_TYPE_UNSPECIFIED = 0;
761
762    // Message contains a (possibly partial) transcript.
763    TRANSCRIPT = 1;
764
765    // Message contains DTMF digits.
766    DTMF_DIGITS = 3;
767
768    // Event indicates that the server has detected the end of the user's speech
769    // utterance and expects no additional speech. Therefore, the server will
770    // not process additional audio (although it may subsequently return
771    // additional results). The client should stop sending additional audio
772    // data, half-close the gRPC connection, and wait for any additional results
773    // until the server closes the gRPC connection. This message is only sent if
774    // `single_utterance` was set to `true`, and is not used otherwise.
775    END_OF_SINGLE_UTTERANCE = 2;
776
777    // Message contains DTMF digits. Before a message with DTMF_DIGITS is sent,
778    // a message with PARTIAL_DTMF_DIGITS may be sent with DTMF digits collected
779    // up to the time of sending, which represents an intermediate result.
780    PARTIAL_DTMF_DIGITS = 4;
781  }
782
783  // Type of the result message.
784  MessageType message_type = 1;
785
786  // Transcript text representing the words that the user spoke.
787  // Populated if and only if `message_type` = `TRANSCRIPT`.
788  string transcript = 2;
789
790  // If `false`, the `StreamingRecognitionResult` represents an
791  // interim result that may change. If `true`, the recognizer will not return
792  // any further hypotheses about this piece of the audio. May only be populated
793  // for `message_type` = `TRANSCRIPT`.
794  bool is_final = 3;
795
796  // The Speech confidence between 0.0 and 1.0 for the current portion of audio.
797  // A higher number indicates an estimated greater likelihood that the
798  // recognized words are correct. The default of 0.0 is a sentinel value
799  // indicating that confidence was not set.
800  //
801  // This field is typically only provided if `is_final` is true and you should
802  // not rely on it being accurate or even set.
803  float confidence = 4;
804
805  // An estimate of the likelihood that the speech recognizer will
806  // not change its guess about this interim recognition result:
807  //
808  // * If the value is unspecified or 0.0, Dialogflow didn't compute the
809  //   stability. In particular, Dialogflow will only provide stability for
810  //   `TRANSCRIPT` results with `is_final = false`.
811  // * Otherwise, the value is in (0.0, 1.0] where 0.0 means completely
812  //   unstable and 1.0 means completely stable.
813  float stability = 6;
814
815  // Word-specific information for the words recognized by Speech in
816  // [transcript][google.cloud.dialogflow.v2beta1.StreamingRecognitionResult.transcript].
817  // Populated if and only if `message_type` = `TRANSCRIPT` and
818  // [InputAudioConfig.enable_word_info] is set.
819  repeated SpeechWordInfo speech_word_info = 7;
820
821  // Time offset of the end of this Speech recognition result relative to the
822  // beginning of the audio. Only populated for `message_type` = `TRANSCRIPT`.
823  google.protobuf.Duration speech_end_offset = 8;
824
825  // Detected language code for the transcript.
826  string language_code = 10;
827
828  // DTMF digits. Populated if and only if `message_type` = `DTMF_DIGITS`.
829  TelephonyDtmfEvents dtmf_digits = 5;
830}
831
832// Represents the natural language text to be processed.
833message TextInput {
834  // Required. The UTF-8 encoded natural language text to be processed.
835  // Text length must not exceed 256 characters for virtual agent interactions.
836  string text = 1;
837
838  // Required. The language of this conversational query. See [Language
839  // Support](https://cloud.google.com/dialogflow/docs/reference/language)
840  // for a list of the currently supported language codes. Note that queries in
841  // the same session do not necessarily need to specify the same language.
842  string language_code = 2;
843}
844
845// Events allow for matching intents by event name instead of the natural
846// language input. For instance, input `<event: { name: "welcome_event",
847// parameters: { name: "Sam" } }>` can trigger a personalized welcome response.
848// The parameter `name` may be used by the agent in the response:
849// `"Hello #welcome_event.name! What can I do for you today?"`.
850message EventInput {
851  // Required. The unique identifier of the event.
852  string name = 1;
853
854  // The collection of parameters associated with the event.
855  //
856  // Depending on your protocol or client library language, this is a
857  // map, associative array, symbol table, dictionary, or JSON object
858  // composed of a collection of (MapKey, MapValue) pairs:
859  //
860  // -   MapKey type: string
861  // -   MapKey value: parameter name
862  // -   MapValue type:
863  //     -   If parameter's entity type is a composite entity: map
864  //     -   Else: depending on parameter value type, could be one of string,
865  //         number, boolean, null, list or map
866  // -   MapValue value:
867  //     -   If parameter's entity type is a composite entity:
868  //         map from composite entity property names to property values
869  //     -   Else: parameter value
870  google.protobuf.Struct parameters = 2;
871
872  // Required. The language of this query. See [Language
873  // Support](https://cloud.google.com/dialogflow/docs/reference/language)
874  // for a list of the currently supported language codes. Note that queries in
875  // the same session do not necessarily need to specify the same language.
876  //
877  // This field is ignored when used in the context of a
878  // [WebhookResponse.followup_event_input][google.cloud.dialogflow.v2beta1.WebhookResponse.followup_event_input]
879  // field, because the language was already defined in the originating detect
880  // intent request.
881  string language_code = 3;
882}
883
884// Configures the types of sentiment analysis to perform.
885message SentimentAnalysisRequestConfig {
886  // Instructs the service to perform sentiment analysis on
887  // `query_text`. If not provided, sentiment analysis is not performed on
888  // `query_text`.
889  bool analyze_query_text_sentiment = 1;
890}
891
892// The result of sentiment analysis. Sentiment analysis inspects user input
893// and identifies the prevailing subjective opinion, especially to determine a
894// user's attitude as positive, negative, or neutral.
895// For [Participants.DetectIntent][], it needs to be configured in
896// [DetectIntentRequest.query_params][google.cloud.dialogflow.v2beta1.DetectIntentRequest.query_params].
897// For [Participants.StreamingDetectIntent][], it needs to be configured in
898// [StreamingDetectIntentRequest.query_params][google.cloud.dialogflow.v2beta1.StreamingDetectIntentRequest.query_params].
899// And for
900// [Participants.AnalyzeContent][google.cloud.dialogflow.v2beta1.Participants.AnalyzeContent]
901// and
902// [Participants.StreamingAnalyzeContent][google.cloud.dialogflow.v2beta1.Participants.StreamingAnalyzeContent],
903// it needs to be configured in
904// [ConversationProfile.human_agent_assistant_config][google.cloud.dialogflow.v2beta1.ConversationProfile.human_agent_assistant_config]
905message SentimentAnalysisResult {
906  // The sentiment analysis result for `query_text`.
907  Sentiment query_text_sentiment = 1;
908}
909
910// The sentiment, such as positive/negative feeling or association, for a unit
911// of analysis, such as the query text. See:
912// https://cloud.google.com/natural-language/docs/basics#interpreting_sentiment_analysis_values
913// for how to interpret the result.
914message Sentiment {
915  // Sentiment score between -1.0 (negative sentiment) and 1.0 (positive
916  // sentiment).
917  float score = 1;
918
919  // A non-negative number in the [0, +inf) range, which represents the absolute
920  // magnitude of sentiment, regardless of score (positive or negative).
921  float magnitude = 2;
922}
923