xref: /aosp_15_r20/external/googleapis/google/cloud/dialogflow/v2beta1/session.proto (revision d5c09012810ac0c9f33fe448fb6da8260d444cc9)
1// Copyright 2023 Google LLC
2//
3// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at
6//
7//     http://www.apache.org/licenses/LICENSE-2.0
8//
9// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS,
11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12// See the License for the specific language governing permissions and
13// limitations under the License.
14
15syntax = "proto3";
16
17package google.cloud.dialogflow.v2beta1;
18
19import "google/api/annotations.proto";
20import "google/api/client.proto";
21import "google/api/field_behavior.proto";
22import "google/api/resource.proto";
23import "google/cloud/dialogflow/v2beta1/agent.proto";
24import "google/cloud/dialogflow/v2beta1/audio_config.proto";
25import "google/cloud/dialogflow/v2beta1/context.proto";
26import "google/cloud/dialogflow/v2beta1/intent.proto";
27import "google/cloud/dialogflow/v2beta1/session_entity_type.proto";
28import "google/protobuf/duration.proto";
29import "google/protobuf/field_mask.proto";
30import "google/protobuf/struct.proto";
31import "google/rpc/status.proto";
32import "google/type/latlng.proto";
33
34option cc_enable_arenas = true;
35option csharp_namespace = "Google.Cloud.Dialogflow.V2Beta1";
36option go_package = "cloud.google.com/go/dialogflow/apiv2beta1/dialogflowpb;dialogflowpb";
37option java_multiple_files = true;
38option java_outer_classname = "SessionProto";
39option java_package = "com.google.cloud.dialogflow.v2beta1";
40option objc_class_prefix = "DF";
41option (google.api.resource_definition) = {
42  type: "dialogflow.googleapis.com/Session"
43  pattern: "projects/{project}/agent/sessions/{session}"
44  pattern: "projects/{project}/agent/environments/{environment}/users/{user}/sessions/{session}"
45  pattern: "projects/{project}/locations/{location}/agent/sessions/{session}"
46  pattern: "projects/{project}/locations/{location}/agent/environments/{environment}/users/{user}/sessions/{session}"
47};
48
49// A service used for session interactions.
50//
51// For more information, see the [API interactions
52// guide](https://cloud.google.com/dialogflow/docs/api-overview).
53service Sessions {
54  option (google.api.default_host) = "dialogflow.googleapis.com";
55  option (google.api.oauth_scopes) =
56      "https://www.googleapis.com/auth/cloud-platform,"
57      "https://www.googleapis.com/auth/dialogflow";
58
59  // Processes a natural language query and returns structured, actionable data
60  // as a result. This method is not idempotent, because it may cause contexts
61  // and session entity types to be updated, which in turn might affect
62  // results of future queries.
63  //
64  // If you might use
65  // [Agent Assist](https://cloud.google.com/dialogflow/docs/#aa)
66  // or other CCAI products now or in the future, consider using
67  // [AnalyzeContent][google.cloud.dialogflow.v2beta1.Participants.AnalyzeContent]
68  // instead of `DetectIntent`. `AnalyzeContent` has additional
69  // functionality for Agent Assist and other CCAI products.
70  //
71  // Note: Always use agent versions for production traffic.
72  // See [Versions and
73  // environments](https://cloud.google.com/dialogflow/es/docs/agents-versions).
74  rpc DetectIntent(DetectIntentRequest) returns (DetectIntentResponse) {
75    option (google.api.http) = {
76      post: "/v2beta1/{session=projects/*/agent/sessions/*}:detectIntent"
77      body: "*"
78      additional_bindings {
79        post: "/v2beta1/{session=projects/*/agent/environments/*/users/*/sessions/*}:detectIntent"
80        body: "*"
81      }
82      additional_bindings {
83        post: "/v2beta1/{session=projects/*/locations/*/agent/sessions/*}:detectIntent"
84        body: "*"
85      }
86      additional_bindings {
87        post: "/v2beta1/{session=projects/*/locations/*/agent/environments/*/users/*/sessions/*}:detectIntent"
88        body: "*"
89      }
90    };
91    option (google.api.method_signature) = "session,query_input";
92  }
93
94  // Processes a natural language query in audio format in a streaming fashion
95  // and returns structured, actionable data as a result. This method is only
96  // available via the gRPC API (not REST).
97  //
98  // If you might use
99  // [Agent Assist](https://cloud.google.com/dialogflow/docs/#aa)
100  // or other CCAI products now or in the future, consider using
101  // [StreamingAnalyzeContent][google.cloud.dialogflow.v2beta1.Participants.StreamingAnalyzeContent]
102  // instead of `StreamingDetectIntent`. `StreamingAnalyzeContent` has
103  // additional functionality for Agent Assist and other CCAI products.
104  //
105  // Note: Always use agent versions for production traffic.
106  // See [Versions and
107  // environments](https://cloud.google.com/dialogflow/es/docs/agents-versions).
108  rpc StreamingDetectIntent(stream StreamingDetectIntentRequest)
109      returns (stream StreamingDetectIntentResponse) {}
110}
111
112// The request to detect user's intent.
113message DetectIntentRequest {
114  // Required. The name of the session this query is sent to. Supported formats:
115  // - `projects/<Project ID>/agent/sessions/<Session ID>,
116  // - `projects/<Project ID>/locations/<Location ID>/agent/sessions/<Session
117  //   ID>`,
118  // - `projects/<Project ID>/agent/environments/<Environment ID>/users/<User
119  //   ID>/sessions/<Session ID>`,
120  // - `projects/<Project ID>/locations/<Location
121  //   ID>/agent/environments/<Environment ID>/users/<User ID>/sessions/<Session
122  //   ID>`,
123  //
124  // If `Location ID` is not specified we assume default 'us' location. If
125  // `Environment ID` is not specified, we assume default 'draft' environment
126  // (`Environment ID` might be referred to as environment name at some places).
127  // If `User ID` is not specified, we are using "-". It's up to the API caller
128  // to choose an appropriate `Session ID` and `User Id`. They can be a random
129  // number or some type of user and session identifiers (preferably hashed).
130  // The length of the `Session ID` and `User ID` must not exceed 36 characters.
131  // For more information, see the [API interactions
132  // guide](https://cloud.google.com/dialogflow/docs/api-overview).
133  //
134  // Note: Always use agent versions for production traffic.
135  // See [Versions and
136  // environments](https://cloud.google.com/dialogflow/es/docs/agents-versions).
137  string session = 1 [
138    (google.api.field_behavior) = REQUIRED,
139    (google.api.resource_reference) = {
140      type: "dialogflow.googleapis.com/Session"
141    }
142  ];
143
144  // The parameters of this query.
145  QueryParameters query_params = 2;
146
147  // Required. The input specification. It can be set to:
148  //
149  // 1. an audio config which instructs the speech recognizer how to process
150  // the speech audio,
151  //
152  // 2. a conversational query in the form of text, or
153  //
154  // 3. an event that specifies which intent to trigger.
155  QueryInput query_input = 3 [(google.api.field_behavior) = REQUIRED];
156
157  // Instructs the speech synthesizer how to generate the output
158  // audio. If this field is not set and agent-level speech synthesizer is not
159  // configured, no output audio is generated.
160  OutputAudioConfig output_audio_config = 4;
161
162  // Mask for
163  // [output_audio_config][google.cloud.dialogflow.v2beta1.DetectIntentRequest.output_audio_config]
164  // indicating which settings in this request-level config should override
165  // speech synthesizer settings defined at agent-level.
166  //
167  // If unspecified or empty,
168  // [output_audio_config][google.cloud.dialogflow.v2beta1.DetectIntentRequest.output_audio_config]
169  // replaces the agent-level config in its entirety.
170  google.protobuf.FieldMask output_audio_config_mask = 7;
171
172  // The natural language speech audio to be processed. This field
173  // should be populated iff `query_input` is set to an input audio config.
174  // A single request can contain up to 1 minute of speech audio data.
175  bytes input_audio = 5;
176}
177
178// The message returned from the DetectIntent method.
179message DetectIntentResponse {
180  // The unique identifier of the response. It can be used to
181  // locate a response in the training example set or for reporting issues.
182  string response_id = 1;
183
184  // The selected results of the conversational query or event processing.
185  // See `alternative_query_results` for additional potential results.
186  QueryResult query_result = 2;
187
188  // If Knowledge Connectors are enabled, there could be more than one result
189  // returned for a given query or event, and this field will contain all
190  // results except for the top one, which is captured in query_result. The
191  // alternative results are ordered by decreasing
192  // `QueryResult.intent_detection_confidence`. If Knowledge Connectors are
193  // disabled, this field will be empty until multiple responses for regular
194  // intents are supported, at which point those additional results will be
195  // surfaced here.
196  repeated QueryResult alternative_query_results = 5;
197
198  // Specifies the status of the webhook request.
199  google.rpc.Status webhook_status = 3;
200
201  // The audio data bytes encoded as specified in the request.
202  // Note: The output audio is generated based on the values of default platform
203  // text responses found in the `query_result.fulfillment_messages` field. If
204  // multiple default text responses exist, they will be concatenated when
205  // generating audio. If no default platform text responses exist, the
206  // generated audio content will be empty.
207  //
208  // In some scenarios, multiple output audio fields may be present in the
209  // response structure. In these cases, only the top-most-level audio output
210  // has content.
211  bytes output_audio = 4;
212
213  // The config used by the speech synthesizer to generate the output audio.
214  OutputAudioConfig output_audio_config = 6;
215}
216
217// Represents the parameters of the conversational query.
218message QueryParameters {
219  // The time zone of this conversational query from the
220  // [time zone database](https://www.iana.org/time-zones), e.g.,
221  // America/New_York, Europe/Paris. If not provided, the time zone specified in
222  // agent settings is used.
223  string time_zone = 1;
224
225  // The geo location of this conversational query.
226  google.type.LatLng geo_location = 2;
227
228  // The collection of contexts to be activated before this query is
229  // executed.
230  repeated Context contexts = 3;
231
232  // Specifies whether to delete all contexts in the current session
233  // before the new ones are activated.
234  bool reset_contexts = 4;
235
236  // Additional session entity types to replace or extend developer
237  // entity types with. The entity synonyms apply to all languages and persist
238  // for the session of this query.
239  repeated SessionEntityType session_entity_types = 5;
240
241  // This field can be used to pass custom data to your webhook.
242  // Arbitrary JSON objects are supported.
243  // If supplied, the value is used to populate the
244  // `WebhookRequest.original_detect_intent_request.payload`
245  // field sent to your webhook.
246  google.protobuf.Struct payload = 6;
247
248  // KnowledgeBases to get alternative results from. If not set, the
249  // KnowledgeBases enabled in the agent (through UI) will be used.
250  // Format:  `projects/<Project ID>/knowledgeBases/<Knowledge Base ID>`.
251  repeated string knowledge_base_names = 12;
252
253  // Configures the type of sentiment analysis to perform. If not
254  // provided, sentiment analysis is not performed.
255  // Note: Sentiment Analysis is only currently available for Essentials Edition
256  // agents.
257  SentimentAnalysisRequestConfig sentiment_analysis_request_config = 10;
258
259  // For mega agent query, directly specify which sub agents to query.
260  // If any specified sub agent is not linked to the mega agent, an error will
261  // be returned. If empty, Dialogflow will decide which sub agents to query.
262  // If specified for a non-mega-agent query, will be silently ignored.
263  repeated SubAgent sub_agents = 13;
264
265  // This field can be used to pass HTTP headers for a webhook
266  // call. These headers will be sent to webhook along with the headers that
267  // have been configured through Dialogflow web console. The headers defined
268  // within this field will overwrite the headers configured through Dialogflow
269  // console if there is a conflict. Header names are case-insensitive.
270  // Google's specified headers are not allowed. Including: "Host",
271  // "Content-Length", "Connection", "From", "User-Agent", "Accept-Encoding",
272  // "If-Modified-Since", "If-None-Match", "X-Forwarded-For", etc.
273  map<string, string> webhook_headers = 14;
274
275  // The platform of the virtual agent response messages.
276  //
277  // If not empty, only emits messages from this platform in the response.
278  // Valid values are the enum names of
279  // [platform][google.cloud.dialogflow.v2beta1.Intent.Message.platform].
280  string platform = 18;
281}
282
283// Represents the query input. It can contain either:
284//
285// 1. An audio config which instructs the speech recognizer how to process the
286// speech audio.
287//
288// 2. A conversational query in the form of text.
289//
290// 3. An event that specifies which intent to trigger.
291message QueryInput {
292  // Required. The input specification.
293  oneof input {
294    // Instructs the speech recognizer how to process the speech audio.
295    InputAudioConfig audio_config = 1;
296
297    // The natural language text to be processed.
298    TextInput text = 2;
299
300    // The event to be processed.
301    EventInput event = 3;
302
303    // The DTMF digits used to invoke intent and fill in parameter value.
304    TelephonyDtmfEvents dtmf = 4;
305  }
306}
307
308// Represents the result of conversational query or event processing.
309message QueryResult {
310  // The original conversational query text:
311  //
312  // - If natural language text was provided as input, `query_text` contains
313  //   a copy of the input.
314  // - If natural language speech audio was provided as input, `query_text`
315  //   contains the speech recognition result. If speech recognizer produced
316  //   multiple alternatives, a particular one is picked.
317  // - If automatic spell correction is enabled, `query_text` will contain the
318  //   corrected user input.
319  string query_text = 1;
320
321  // The language that was triggered during intent detection.
322  // See [Language
323  // Support](https://cloud.google.com/dialogflow/docs/reference/language)
324  // for a list of the currently supported language codes.
325  string language_code = 15;
326
327  // The Speech recognition confidence between 0.0 and 1.0. A higher number
328  // indicates an estimated greater likelihood that the recognized words are
329  // correct. The default of 0.0 is a sentinel value indicating that confidence
330  // was not set.
331  //
332  // This field is not guaranteed to be accurate or set. In particular this
333  // field isn't set for StreamingDetectIntent since the streaming endpoint has
334  // separate confidence estimates per portion of the audio in
335  // StreamingRecognitionResult.
336  float speech_recognition_confidence = 2;
337
338  // The action name from the matched intent.
339  string action = 3;
340
341  // The collection of extracted parameters.
342  //
343  // Depending on your protocol or client library language, this is a
344  // map, associative array, symbol table, dictionary, or JSON object
345  // composed of a collection of (MapKey, MapValue) pairs:
346  //
347  // * MapKey type: string
348  // * MapKey value: parameter name
349  // * MapValue type: If parameter's entity type is a composite entity then use
350  // map, otherwise, depending on the parameter value type, it could be one of
351  // string, number, boolean, null, list or map.
352  // * MapValue value: If parameter's entity type is a composite entity then use
353  // map from composite entity property names to property values, otherwise,
354  // use parameter value.
355  google.protobuf.Struct parameters = 4;
356
357  // This field is set to:
358  //
359  // - `false` if the matched intent has required parameters and not all of
360  //    the required parameter values have been collected.
361  // - `true` if all required parameter values have been collected, or if the
362  //    matched intent doesn't contain any required parameters.
363  bool all_required_params_present = 5;
364
365  // Indicates whether the conversational query triggers a cancellation for slot
366  // filling. For more information, see the [cancel slot filling
367  // documentation](https://cloud.google.com/dialogflow/es/docs/intents-actions-parameters#cancel).
368  bool cancels_slot_filling = 21;
369
370  // The text to be pronounced to the user or shown on the screen.
371  // Note: This is a legacy field, `fulfillment_messages` should be preferred.
372  string fulfillment_text = 6;
373
374  // The collection of rich messages to present to the user.
375  repeated Intent.Message fulfillment_messages = 7;
376
377  // If the query was fulfilled by a webhook call, this field is set to the
378  // value of the `source` field returned in the webhook response.
379  string webhook_source = 8;
380
381  // If the query was fulfilled by a webhook call, this field is set to the
382  // value of the `payload` field returned in the webhook response.
383  google.protobuf.Struct webhook_payload = 9;
384
385  // The collection of output contexts. If applicable,
386  // `output_contexts.parameters` contains entries with name
387  // `<parameter name>.original` containing the original parameter values
388  // before the query.
389  repeated Context output_contexts = 10;
390
391  // The intent that matched the conversational query. Some, not
392  // all fields are filled in this message, including but not limited to:
393  // `name`, `display_name`, `end_interaction` and `is_fallback`.
394  Intent intent = 11;
395
396  // The intent detection confidence. Values range from 0.0
397  // (completely uncertain) to 1.0 (completely certain).
398  // This value is for informational purpose only and is only used to
399  // help match the best intent within the classification threshold.
400  // This value may change for the same end-user expression at any time due to a
401  // model retraining or change in implementation.
402  // If there are `multiple knowledge_answers` messages, this value is set to
403  // the greatest `knowledgeAnswers.match_confidence` value in the list.
404  float intent_detection_confidence = 12;
405
406  // Free-form diagnostic information for the associated detect intent request.
407  // The fields of this data can change without notice, so you should not write
408  // code that depends on its structure.
409  // The data may contain:
410  //
411  // - webhook call latency
412  // - webhook errors
413  google.protobuf.Struct diagnostic_info = 14;
414
415  // The sentiment analysis result, which depends on the
416  // `sentiment_analysis_request_config` specified in the request.
417  SentimentAnalysisResult sentiment_analysis_result = 17;
418
419  // The result from Knowledge Connector (if any), ordered by decreasing
420  // `KnowledgeAnswers.match_confidence`.
421  KnowledgeAnswers knowledge_answers = 18;
422}
423
424// Represents the result of querying a Knowledge base.
425message KnowledgeAnswers {
426  // An answer from Knowledge Connector.
427  message Answer {
428    // Represents the system's confidence that this knowledge answer is a good
429    // match for this conversational query.
430    enum MatchConfidenceLevel {
431      // Not specified.
432      MATCH_CONFIDENCE_LEVEL_UNSPECIFIED = 0;
433
434      // Indicates that the confidence is low.
435      LOW = 1;
436
437      // Indicates our confidence is medium.
438      MEDIUM = 2;
439
440      // Indicates our confidence is high.
441      HIGH = 3;
442    }
443
444    // Indicates which Knowledge Document this answer was extracted from.
445    // Format: `projects/<Project ID>/knowledgeBases/<Knowledge Base
446    // ID>/documents/<Document ID>`.
447    string source = 1 [(google.api.resource_reference) = {
448      type: "dialogflow.googleapis.com/Document"
449    }];
450
451    // The corresponding FAQ question if the answer was extracted from a FAQ
452    // Document, empty otherwise.
453    string faq_question = 2;
454
455    // The piece of text from the `source` knowledge base document that answers
456    // this conversational query.
457    string answer = 3;
458
459    // The system's confidence level that this knowledge answer is a good match
460    // for this conversational query.
461    // NOTE: The confidence level for a given `<query, answer>` pair may change
462    // without notice, as it depends on models that are constantly being
463    // improved. However, it will change less frequently than the confidence
464    // score below, and should be preferred for referencing the quality of an
465    // answer.
466    MatchConfidenceLevel match_confidence_level = 4;
467
468    // The system's confidence score that this Knowledge answer is a good match
469    // for this conversational query.
470    // The range is from 0.0 (completely uncertain) to 1.0 (completely certain).
471    // Note: The confidence score is likely to vary somewhat (possibly even for
472    // identical requests), as the underlying model is under constant
473    // improvement. It may be deprecated in the future. We recommend using
474    // `match_confidence_level` which should be generally more stable.
475    float match_confidence = 5;
476  }
477
478  // A list of answers from Knowledge Connector.
479  repeated Answer answers = 1;
480}
481
482// The top-level message sent by the client to the
483// [Sessions.StreamingDetectIntent][google.cloud.dialogflow.v2beta1.Sessions.StreamingDetectIntent]
484// method.
485//
486// Multiple request messages should be sent in order:
487//
488// 1.  The first message must contain
489// [session][google.cloud.dialogflow.v2beta1.StreamingDetectIntentRequest.session],
490//     [query_input][google.cloud.dialogflow.v2beta1.StreamingDetectIntentRequest.query_input]
491//     plus optionally
492//     [query_params][google.cloud.dialogflow.v2beta1.StreamingDetectIntentRequest.query_params].
493//     If the client wants to receive an audio response, it should also contain
494//     [output_audio_config][google.cloud.dialogflow.v2beta1.StreamingDetectIntentRequest.output_audio_config].
495//     The message must not contain
496//     [input_audio][google.cloud.dialogflow.v2beta1.StreamingDetectIntentRequest.input_audio].
497// 2.  If
498// [query_input][google.cloud.dialogflow.v2beta1.StreamingDetectIntentRequest.query_input]
499// was set to
500//     [query_input.audio_config][google.cloud.dialogflow.v2beta1.InputAudioConfig],
501//     all subsequent messages must contain
502//     [input_audio][google.cloud.dialogflow.v2beta1.StreamingDetectIntentRequest.input_audio]
503//     to continue with Speech recognition. If you decide to rather detect an
504//     intent from text input after you already started Speech recognition,
505//     please send a message with
506//     [query_input.text][google.cloud.dialogflow.v2beta1.QueryInput.text].
507//
508//     However, note that:
509//
510//     * Dialogflow will bill you for the audio duration so far.
511//     * Dialogflow discards all Speech recognition results in favor of the
512//       input text.
513//     * Dialogflow will use the language code from the first message.
514//
515// After you sent all input, you must half-close or abort the request stream.
516message StreamingDetectIntentRequest {
517  // Required. The name of the session the query is sent to.
518  // Supported formats:
519  // - `projects/<Project ID>/agent/sessions/<Session ID>,
520  // - `projects/<Project ID>/locations/<Location ID>/agent/sessions/<Session
521  //   ID>`,
522  // - `projects/<Project ID>/agent/environments/<Environment ID>/users/<User
523  //   ID>/sessions/<Session ID>`,
524  // - `projects/<Project ID>/locations/<Location
525  //   ID>/agent/environments/<Environment ID>/users/<User ID>/sessions/<Session
526  //   ID>`,
527  //
528  // If `Location ID` is not specified we assume default 'us' location. If
529  // `Environment ID` is not specified, we assume default 'draft' environment.
530  // If `User ID` is not specified, we are using "-". It's up to the API caller
531  // to choose an appropriate `Session ID` and `User Id`. They can be a random
532  // number or some type of user and session identifiers (preferably hashed).
533  // The length of the `Session ID` and `User ID` must not exceed 36 characters.
534  //
535  // For more information, see the [API interactions
536  // guide](https://cloud.google.com/dialogflow/docs/api-overview).
537  //
538  // Note: Always use agent versions for production traffic.
539  // See [Versions and
540  // environments](https://cloud.google.com/dialogflow/es/docs/agents-versions).
541  string session = 1 [
542    (google.api.field_behavior) = REQUIRED,
543    (google.api.resource_reference) = {
544      type: "dialogflow.googleapis.com/Session"
545    }
546  ];
547
548  // The parameters of this query.
549  QueryParameters query_params = 2;
550
551  // Required. The input specification. It can be set to:
552  //
553  // 1. an audio config which instructs the speech recognizer how to process
554  // the speech audio,
555  //
556  // 2. a conversational query in the form of text, or
557  //
558  // 3. an event that specifies which intent to trigger.
559  QueryInput query_input = 3 [(google.api.field_behavior) = REQUIRED];
560
561  // DEPRECATED. Please use
562  // [InputAudioConfig.single_utterance][google.cloud.dialogflow.v2beta1.InputAudioConfig.single_utterance]
563  // instead. If `false` (default), recognition does not cease until the client
564  // closes the stream. If `true`, the recognizer will detect a single spoken
565  // utterance in input audio. Recognition ceases when it detects the audio's
566  // voice has stopped or paused. In this case, once a detected intent is
567  // received, the client should close the stream and start a new request with a
568  // new stream as needed. This setting is ignored when `query_input` is a piece
569  // of text or an event.
570  bool single_utterance = 4 [deprecated = true];
571
572  // Instructs the speech synthesizer how to generate the output
573  // audio. If this field is not set and agent-level speech synthesizer is not
574  // configured, no output audio is generated.
575  OutputAudioConfig output_audio_config = 5;
576
577  // Mask for
578  // [output_audio_config][google.cloud.dialogflow.v2beta1.StreamingDetectIntentRequest.output_audio_config]
579  // indicating which settings in this request-level config should override
580  // speech synthesizer settings defined at agent-level.
581  //
582  // If unspecified or empty,
583  // [output_audio_config][google.cloud.dialogflow.v2beta1.StreamingDetectIntentRequest.output_audio_config]
584  // replaces the agent-level config in its entirety.
585  google.protobuf.FieldMask output_audio_config_mask = 7;
586
587  // The input audio content to be recognized. Must be sent if
588  // `query_input` was set to a streaming input audio config. The complete audio
589  // over all streaming messages must not exceed 1 minute.
590  bytes input_audio = 6;
591
592  // If true, `StreamingDetectIntentResponse.debugging_info` will get populated.
593  bool enable_debugging_info = 8;
594}
595
596// Cloud conversation info for easier debugging.
597// It will get populated in `StreamingDetectIntentResponse` or
598// `StreamingAnalyzeContentResponse` when the flag `enable_debugging_info` is
599// set to true in corresponding requests.
600message CloudConversationDebuggingInfo {
601  // Number of input audio data chunks in streaming requests.
602  int32 audio_data_chunks = 1;
603
604  // Time offset of the end of speech utterance relative to the
605  // beginning of the first audio chunk.
606  google.protobuf.Duration result_end_time_offset = 2;
607
608  // Duration of first audio chunk.
609  google.protobuf.Duration first_audio_duration = 3;
610
611  // Whether client used single utterance mode.
612  bool single_utterance = 5;
613
614  // Time offsets of the speech partial results relative to the beginning of
615  // the stream.
616  repeated google.protobuf.Duration speech_partial_results_end_times = 6;
617
618  // Time offsets of the speech final results (is_final=true) relative to the
619  // beginning of the stream.
620  repeated google.protobuf.Duration speech_final_results_end_times = 7;
621
622  // Total number of partial responses.
623  int32 partial_responses = 8;
624
625  // Time offset of Speaker ID stream close time relative to the Speech stream
626  // close time in milliseconds. Only meaningful for conversations involving
627  // passive verification.
628  int32 speaker_id_passive_latency_ms_offset = 9;
629
630  // Whether a barge-in event is triggered in this request.
631  bool bargein_event_triggered = 10;
632
633  // Whether speech uses single utterance mode.
634  bool speech_single_utterance = 11;
635
636  // Time offsets of the DTMF partial results relative to the beginning of
637  // the stream.
638  repeated google.protobuf.Duration dtmf_partial_results_times = 12;
639
640  // Time offsets of the DTMF final results relative to the beginning of
641  // the stream.
642  repeated google.protobuf.Duration dtmf_final_results_times = 13;
643
644  // Time offset of the end-of-single-utterance signal relative to the
645  // beginning of the stream.
646  google.protobuf.Duration single_utterance_end_time_offset = 14;
647
648  // No speech timeout settings for the stream.
649  google.protobuf.Duration no_speech_timeout = 15;
650
651  // Speech endpointing timeout settings for the stream.
652  google.protobuf.Duration endpointing_timeout = 19;
653
654  // Whether the streaming terminates with an injected text query.
655  bool is_input_text = 16;
656
657  // Client half close time in terms of input audio duration.
658  google.protobuf.Duration client_half_close_time_offset = 17;
659
660  // Client half close time in terms of API streaming duration.
661  google.protobuf.Duration client_half_close_streaming_time_offset = 18;
662}
663
664// The top-level message returned from the
665// `StreamingDetectIntent` method.
666//
667// Multiple response messages can be returned in order:
668//
669// 1.  If the `StreamingDetectIntentRequest.input_audio` field was
670//     set, the `recognition_result` field is populated for one
671//     or more messages.
672//     See the
673//     [StreamingRecognitionResult][google.cloud.dialogflow.v2beta1.StreamingRecognitionResult]
674//     message for details about the result message sequence.
675//
676// 2.  The next message contains `response_id`, `query_result`,
677//     `alternative_query_results` and optionally `webhook_status` if a WebHook
678//     was called.
679//
680// 3.  If `output_audio_config` was specified in the request or agent-level
681//     speech synthesizer is configured, all subsequent messages contain
682//     `output_audio` and `output_audio_config`.
683message StreamingDetectIntentResponse {
684  // The unique identifier of the response. It can be used to
685  // locate a response in the training example set or for reporting issues.
686  string response_id = 1;
687
688  // The result of speech recognition.
689  StreamingRecognitionResult recognition_result = 2;
690
691  // The selected results of the conversational query or event processing.
692  // See `alternative_query_results` for additional potential results.
693  QueryResult query_result = 3;
694
695  // If Knowledge Connectors are enabled, there could be more than one result
696  // returned for a given query or event, and this field will contain all
697  // results except for the top one, which is captured in query_result. The
698  // alternative results are ordered by decreasing
699  // `QueryResult.intent_detection_confidence`. If Knowledge Connectors are
700  // disabled, this field will be empty until multiple responses for regular
701  // intents are supported, at which point those additional results will be
702  // surfaced here.
703  repeated QueryResult alternative_query_results = 7;
704
705  // Specifies the status of the webhook request.
706  google.rpc.Status webhook_status = 4;
707
708  // The audio data bytes encoded as specified in the request.
709  // Note: The output audio is generated based on the values of default platform
710  // text responses found in the `query_result.fulfillment_messages` field. If
711  // multiple default text responses exist, they will be concatenated when
712  // generating audio. If no default platform text responses exist, the
713  // generated audio content will be empty.
714  //
715  // In some scenarios, multiple output audio fields may be present in the
716  // response structure. In these cases, only the top-most-level audio output
717  // has content.
718  bytes output_audio = 5;
719
720  // The config used by the speech synthesizer to generate the output audio.
721  OutputAudioConfig output_audio_config = 6;
722
723  // Debugging info that would get populated when
724  // `StreamingDetectIntentRequest.enable_debugging_info` is set to true.
725  CloudConversationDebuggingInfo debugging_info = 8;
726}
727
728// Contains a speech recognition result corresponding to a portion of the audio
729// that is currently being processed or an indication that this is the end
730// of the single requested utterance.
731//
732// While end-user audio is being processed, Dialogflow sends a series of
733// results. Each result may contain a `transcript` value. A transcript
734// represents a portion of the utterance. While the recognizer is processing
735// audio, transcript values may be interim values or finalized values.
736// Once a transcript is finalized, the `is_final` value is set to true and
737// processing continues for the next transcript.
738//
739// If `StreamingDetectIntentRequest.query_input.audio_config.single_utterance`
740// was true, and the recognizer has completed processing audio,
741// the `message_type` value is set to `END_OF_SINGLE_UTTERANCE and the
742// following (last) result contains the last finalized transcript.
743//
744// The complete end-user utterance is determined by concatenating the
745// finalized transcript values received for the series of results.
746//
747// In the following example, single utterance is enabled. In the case where
748// single utterance is not enabled, result 7 would not occur.
749//
750// ```
751// Num | transcript              | message_type            | is_final
752// --- | ----------------------- | ----------------------- | --------
753// 1   | "tube"                  | TRANSCRIPT              | false
754// 2   | "to be a"               | TRANSCRIPT              | false
755// 3   | "to be"                 | TRANSCRIPT              | false
756// 4   | "to be or not to be"    | TRANSCRIPT              | true
757// 5   | "that's"                | TRANSCRIPT              | false
758// 6   | "that is                | TRANSCRIPT              | false
759// 7   | unset                   | END_OF_SINGLE_UTTERANCE | unset
760// 8   | " that is the question" | TRANSCRIPT              | true
761// ```
762// Concatenating the finalized transcripts with `is_final` set to true,
763// the complete utterance becomes "to be or not to be that is the question".
764message StreamingRecognitionResult {
765  // Type of the response message.
766  enum MessageType {
767    // Not specified. Should never be used.
768    MESSAGE_TYPE_UNSPECIFIED = 0;
769
770    // Message contains a (possibly partial) transcript.
771    TRANSCRIPT = 1;
772
773    // Message contains DTMF digits.
774    DTMF_DIGITS = 3;
775
776    // This event indicates that the server has detected the end of the user's
777    // speech utterance and expects no additional speech. Therefore, the server
778    // will not process additional audio (although it may subsequently return
779    // additional results). The client should stop sending additional audio
780    // data, half-close the gRPC connection, and wait for any additional results
781    // until the server closes the gRPC connection. This message is only sent if
782    // `single_utterance` was set to `true`, and is not used otherwise.
783    END_OF_SINGLE_UTTERANCE = 2;
784
785    // Message contains DTMF digits. Before a message with DTMF_DIGITS is sent,
786    // a message with PARTIAL_DTMF_DIGITS may be sent with DTMF digits collected
787    // up to the time of sending, which represents an intermediate result.
788    PARTIAL_DTMF_DIGITS = 4;
789  }
790
791  // Type of the result message.
792  MessageType message_type = 1;
793
794  // Transcript text representing the words that the user spoke.
795  // Populated if and only if `message_type` = `TRANSCRIPT`.
796  string transcript = 2;
797
798  // If `false`, the `StreamingRecognitionResult` represents an
799  // interim result that may change. If `true`, the recognizer will not return
800  // any further hypotheses about this piece of the audio. May only be populated
801  // for `message_type` = `TRANSCRIPT`.
802  bool is_final = 3;
803
804  // The Speech confidence between 0.0 and 1.0 for the current portion of audio.
805  // A higher number indicates an estimated greater likelihood that the
806  // recognized words are correct. The default of 0.0 is a sentinel value
807  // indicating that confidence was not set.
808  //
809  // This field is typically only provided if `is_final` is true and you should
810  // not rely on it being accurate or even set.
811  float confidence = 4;
812
813  // An estimate of the likelihood that the speech recognizer will
814  // not change its guess about this interim recognition result:
815  //
816  // * If the value is unspecified or 0.0, Dialogflow didn't compute the
817  //   stability. In particular, Dialogflow will only provide stability for
818  //   `TRANSCRIPT` results with `is_final = false`.
819  // * Otherwise, the value is in (0.0, 1.0] where 0.0 means completely
820  //   unstable and 1.0 means completely stable.
821  float stability = 6;
822
823  // Word-specific information for the words recognized by Speech in
824  // [transcript][google.cloud.dialogflow.v2beta1.StreamingRecognitionResult.transcript].
825  // Populated if and only if `message_type` = `TRANSCRIPT` and
826  // [InputAudioConfig.enable_word_info] is set.
827  repeated SpeechWordInfo speech_word_info = 7;
828
829  // Time offset of the end of this Speech recognition result relative to the
830  // beginning of the audio. Only populated for `message_type` = `TRANSCRIPT`.
831  google.protobuf.Duration speech_end_offset = 8;
832
833  // Detected language code for the transcript.
834  string language_code = 10;
835
836  // DTMF digits. Populated if and only if `message_type` = `DTMF_DIGITS`.
837  TelephonyDtmfEvents dtmf_digits = 5;
838}
839
840// Represents the natural language text to be processed.
841message TextInput {
842  // Required. The UTF-8 encoded natural language text to be processed.
843  // Text length must not exceed 256 characters for virtual agent interactions.
844  string text = 1;
845
846  // Required. The language of this conversational query. See [Language
847  // Support](https://cloud.google.com/dialogflow/docs/reference/language)
848  // for a list of the currently supported language codes. Note that queries in
849  // the same session do not necessarily need to specify the same language.
850  string language_code = 2;
851}
852
853// Events allow for matching intents by event name instead of the natural
854// language input. For instance, input `<event: { name: "welcome_event",
855// parameters: { name: "Sam" } }>` can trigger a personalized welcome response.
856// The parameter `name` may be used by the agent in the response:
857// `"Hello #welcome_event.name! What can I do for you today?"`.
858message EventInput {
859  // Required. The unique identifier of the event.
860  string name = 1;
861
862  // The collection of parameters associated with the event.
863  //
864  // Depending on your protocol or client library language, this is a
865  // map, associative array, symbol table, dictionary, or JSON object
866  // composed of a collection of (MapKey, MapValue) pairs:
867  //
868  // * MapKey type: string
869  // * MapKey value: parameter name
870  // * MapValue type: If parameter's entity type is a composite entity then use
871  // map, otherwise, depending on the parameter value type, it could be one of
872  // string, number, boolean, null, list or map.
873  // * MapValue value: If parameter's entity type is a composite entity then use
874  // map from composite entity property names to property values, otherwise,
875  // use parameter value.
876  google.protobuf.Struct parameters = 2;
877
878  // Required. The language of this query. See [Language
879  // Support](https://cloud.google.com/dialogflow/docs/reference/language)
880  // for a list of the currently supported language codes. Note that queries in
881  // the same session do not necessarily need to specify the same language.
882  //
883  // This field is ignored when used in the context of a
884  // [WebhookResponse.followup_event_input][google.cloud.dialogflow.v2beta1.WebhookResponse.followup_event_input]
885  // field, because the language was already defined in the originating detect
886  // intent request.
887  string language_code = 3;
888}
889
890// Configures the types of sentiment analysis to perform.
891message SentimentAnalysisRequestConfig {
892  // Instructs the service to perform sentiment analysis on
893  // `query_text`. If not provided, sentiment analysis is not performed on
894  // `query_text`.
895  bool analyze_query_text_sentiment = 1;
896}
897
898// The result of sentiment analysis. Sentiment analysis inspects user input
899// and identifies the prevailing subjective opinion, especially to determine a
900// user's attitude as positive, negative, or neutral.
901// For [Participants.DetectIntent][], it needs to be configured in
902// [DetectIntentRequest.query_params][google.cloud.dialogflow.v2beta1.DetectIntentRequest.query_params].
903// For [Participants.StreamingDetectIntent][], it needs to be configured in
904// [StreamingDetectIntentRequest.query_params][google.cloud.dialogflow.v2beta1.StreamingDetectIntentRequest.query_params].
905// And for
906// [Participants.AnalyzeContent][google.cloud.dialogflow.v2beta1.Participants.AnalyzeContent]
907// and
908// [Participants.StreamingAnalyzeContent][google.cloud.dialogflow.v2beta1.Participants.StreamingAnalyzeContent],
909// it needs to be configured in
910// [ConversationProfile.human_agent_assistant_config][google.cloud.dialogflow.v2beta1.ConversationProfile.human_agent_assistant_config]
911message SentimentAnalysisResult {
912  // The sentiment analysis result for `query_text`.
913  Sentiment query_text_sentiment = 1;
914}
915
916// The sentiment, such as positive/negative feeling or association, for a unit
917// of analysis, such as the query text. See:
918// https://cloud.google.com/natural-language/docs/basics#interpreting_sentiment_analysis_values
919// for how to interpret the result.
920message Sentiment {
921  // Sentiment score between -1.0 (negative sentiment) and 1.0 (positive
922  // sentiment).
923  float score = 1;
924
925  // A non-negative number in the [0, +inf) range, which represents the absolute
926  // magnitude of sentiment, regardless of score (positive or negative).
927  float magnitude = 2;
928}
929