1// Copyright 2023 Google LLC 2// 3// Licensed under the Apache License, Version 2.0 (the "License"); 4// you may not use this file except in compliance with the License. 5// You may obtain a copy of the License at 6// 7// http://www.apache.org/licenses/LICENSE-2.0 8// 9// Unless required by applicable law or agreed to in writing, software 10// distributed under the License is distributed on an "AS IS" BASIS, 11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12// See the License for the specific language governing permissions and 13// limitations under the License. 14 15syntax = "proto3"; 16 17package google.cloud.dialogflow.v2beta1; 18 19import "google/api/annotations.proto"; 20import "google/api/client.proto"; 21import "google/api/field_behavior.proto"; 22import "google/api/resource.proto"; 23import "google/cloud/dialogflow/v2beta1/agent.proto"; 24import "google/cloud/dialogflow/v2beta1/audio_config.proto"; 25import "google/cloud/dialogflow/v2beta1/context.proto"; 26import "google/cloud/dialogflow/v2beta1/intent.proto"; 27import "google/cloud/dialogflow/v2beta1/session_entity_type.proto"; 28import "google/protobuf/duration.proto"; 29import "google/protobuf/field_mask.proto"; 30import "google/protobuf/struct.proto"; 31import "google/rpc/status.proto"; 32import "google/type/latlng.proto"; 33 34option cc_enable_arenas = true; 35option csharp_namespace = "Google.Cloud.Dialogflow.V2Beta1"; 36option go_package = "cloud.google.com/go/dialogflow/apiv2beta1/dialogflowpb;dialogflowpb"; 37option java_multiple_files = true; 38option java_outer_classname = "SessionProto"; 39option java_package = "com.google.cloud.dialogflow.v2beta1"; 40option objc_class_prefix = "DF"; 41option (google.api.resource_definition) = { 42 type: "dialogflow.googleapis.com/Session" 43 pattern: "projects/{project}/agent/sessions/{session}" 44 pattern: "projects/{project}/agent/environments/{environment}/users/{user}/sessions/{session}" 45 pattern: "projects/{project}/locations/{location}/agent/sessions/{session}" 46 pattern: "projects/{project}/locations/{location}/agent/environments/{environment}/users/{user}/sessions/{session}" 47}; 48 49// A service used for session interactions. 50// 51// For more information, see the [API interactions 52// guide](https://cloud.google.com/dialogflow/docs/api-overview). 53service Sessions { 54 option (google.api.default_host) = "dialogflow.googleapis.com"; 55 option (google.api.oauth_scopes) = 56 "https://www.googleapis.com/auth/cloud-platform," 57 "https://www.googleapis.com/auth/dialogflow"; 58 59 // Processes a natural language query and returns structured, actionable data 60 // as a result. This method is not idempotent, because it may cause contexts 61 // and session entity types to be updated, which in turn might affect 62 // results of future queries. 63 // 64 // If you might use 65 // [Agent Assist](https://cloud.google.com/dialogflow/docs/#aa) 66 // or other CCAI products now or in the future, consider using 67 // [AnalyzeContent][google.cloud.dialogflow.v2beta1.Participants.AnalyzeContent] 68 // instead of `DetectIntent`. `AnalyzeContent` has additional 69 // functionality for Agent Assist and other CCAI products. 70 // 71 // Note: Always use agent versions for production traffic. 72 // See [Versions and 73 // environments](https://cloud.google.com/dialogflow/es/docs/agents-versions). 74 rpc DetectIntent(DetectIntentRequest) returns (DetectIntentResponse) { 75 option (google.api.http) = { 76 post: "/v2beta1/{session=projects/*/agent/sessions/*}:detectIntent" 77 body: "*" 78 additional_bindings { 79 post: "/v2beta1/{session=projects/*/agent/environments/*/users/*/sessions/*}:detectIntent" 80 body: "*" 81 } 82 additional_bindings { 83 post: "/v2beta1/{session=projects/*/locations/*/agent/sessions/*}:detectIntent" 84 body: "*" 85 } 86 additional_bindings { 87 post: "/v2beta1/{session=projects/*/locations/*/agent/environments/*/users/*/sessions/*}:detectIntent" 88 body: "*" 89 } 90 }; 91 option (google.api.method_signature) = "session,query_input"; 92 } 93 94 // Processes a natural language query in audio format in a streaming fashion 95 // and returns structured, actionable data as a result. This method is only 96 // available via the gRPC API (not REST). 97 // 98 // If you might use 99 // [Agent Assist](https://cloud.google.com/dialogflow/docs/#aa) 100 // or other CCAI products now or in the future, consider using 101 // [StreamingAnalyzeContent][google.cloud.dialogflow.v2beta1.Participants.StreamingAnalyzeContent] 102 // instead of `StreamingDetectIntent`. `StreamingAnalyzeContent` has 103 // additional functionality for Agent Assist and other CCAI products. 104 // 105 // Note: Always use agent versions for production traffic. 106 // See [Versions and 107 // environments](https://cloud.google.com/dialogflow/es/docs/agents-versions). 108 rpc StreamingDetectIntent(stream StreamingDetectIntentRequest) 109 returns (stream StreamingDetectIntentResponse) {} 110} 111 112// The request to detect user's intent. 113message DetectIntentRequest { 114 // Required. The name of the session this query is sent to. Supported formats: 115 // - `projects/<Project ID>/agent/sessions/<Session ID>, 116 // - `projects/<Project ID>/locations/<Location ID>/agent/sessions/<Session 117 // ID>`, 118 // - `projects/<Project ID>/agent/environments/<Environment ID>/users/<User 119 // ID>/sessions/<Session ID>`, 120 // - `projects/<Project ID>/locations/<Location 121 // ID>/agent/environments/<Environment ID>/users/<User ID>/sessions/<Session 122 // ID>`, 123 // 124 // If `Location ID` is not specified we assume default 'us' location. If 125 // `Environment ID` is not specified, we assume default 'draft' environment 126 // (`Environment ID` might be referred to as environment name at some places). 127 // If `User ID` is not specified, we are using "-". It's up to the API caller 128 // to choose an appropriate `Session ID` and `User Id`. They can be a random 129 // number or some type of user and session identifiers (preferably hashed). 130 // The length of the `Session ID` and `User ID` must not exceed 36 characters. 131 // For more information, see the [API interactions 132 // guide](https://cloud.google.com/dialogflow/docs/api-overview). 133 // 134 // Note: Always use agent versions for production traffic. 135 // See [Versions and 136 // environments](https://cloud.google.com/dialogflow/es/docs/agents-versions). 137 string session = 1 [ 138 (google.api.field_behavior) = REQUIRED, 139 (google.api.resource_reference) = { 140 type: "dialogflow.googleapis.com/Session" 141 } 142 ]; 143 144 // The parameters of this query. 145 QueryParameters query_params = 2; 146 147 // Required. The input specification. It can be set to: 148 // 149 // 1. an audio config which instructs the speech recognizer how to process 150 // the speech audio, 151 // 152 // 2. a conversational query in the form of text, or 153 // 154 // 3. an event that specifies which intent to trigger. 155 QueryInput query_input = 3 [(google.api.field_behavior) = REQUIRED]; 156 157 // Instructs the speech synthesizer how to generate the output 158 // audio. If this field is not set and agent-level speech synthesizer is not 159 // configured, no output audio is generated. 160 OutputAudioConfig output_audio_config = 4; 161 162 // Mask for 163 // [output_audio_config][google.cloud.dialogflow.v2beta1.DetectIntentRequest.output_audio_config] 164 // indicating which settings in this request-level config should override 165 // speech synthesizer settings defined at agent-level. 166 // 167 // If unspecified or empty, 168 // [output_audio_config][google.cloud.dialogflow.v2beta1.DetectIntentRequest.output_audio_config] 169 // replaces the agent-level config in its entirety. 170 google.protobuf.FieldMask output_audio_config_mask = 7; 171 172 // The natural language speech audio to be processed. This field 173 // should be populated iff `query_input` is set to an input audio config. 174 // A single request can contain up to 1 minute of speech audio data. 175 bytes input_audio = 5; 176} 177 178// The message returned from the DetectIntent method. 179message DetectIntentResponse { 180 // The unique identifier of the response. It can be used to 181 // locate a response in the training example set or for reporting issues. 182 string response_id = 1; 183 184 // The selected results of the conversational query or event processing. 185 // See `alternative_query_results` for additional potential results. 186 QueryResult query_result = 2; 187 188 // If Knowledge Connectors are enabled, there could be more than one result 189 // returned for a given query or event, and this field will contain all 190 // results except for the top one, which is captured in query_result. The 191 // alternative results are ordered by decreasing 192 // `QueryResult.intent_detection_confidence`. If Knowledge Connectors are 193 // disabled, this field will be empty until multiple responses for regular 194 // intents are supported, at which point those additional results will be 195 // surfaced here. 196 repeated QueryResult alternative_query_results = 5; 197 198 // Specifies the status of the webhook request. 199 google.rpc.Status webhook_status = 3; 200 201 // The audio data bytes encoded as specified in the request. 202 // Note: The output audio is generated based on the values of default platform 203 // text responses found in the `query_result.fulfillment_messages` field. If 204 // multiple default text responses exist, they will be concatenated when 205 // generating audio. If no default platform text responses exist, the 206 // generated audio content will be empty. 207 // 208 // In some scenarios, multiple output audio fields may be present in the 209 // response structure. In these cases, only the top-most-level audio output 210 // has content. 211 bytes output_audio = 4; 212 213 // The config used by the speech synthesizer to generate the output audio. 214 OutputAudioConfig output_audio_config = 6; 215} 216 217// Represents the parameters of the conversational query. 218message QueryParameters { 219 // The time zone of this conversational query from the 220 // [time zone database](https://www.iana.org/time-zones), e.g., 221 // America/New_York, Europe/Paris. If not provided, the time zone specified in 222 // agent settings is used. 223 string time_zone = 1; 224 225 // The geo location of this conversational query. 226 google.type.LatLng geo_location = 2; 227 228 // The collection of contexts to be activated before this query is 229 // executed. 230 repeated Context contexts = 3; 231 232 // Specifies whether to delete all contexts in the current session 233 // before the new ones are activated. 234 bool reset_contexts = 4; 235 236 // Additional session entity types to replace or extend developer 237 // entity types with. The entity synonyms apply to all languages and persist 238 // for the session of this query. 239 repeated SessionEntityType session_entity_types = 5; 240 241 // This field can be used to pass custom data to your webhook. 242 // Arbitrary JSON objects are supported. 243 // If supplied, the value is used to populate the 244 // `WebhookRequest.original_detect_intent_request.payload` 245 // field sent to your webhook. 246 google.protobuf.Struct payload = 6; 247 248 // KnowledgeBases to get alternative results from. If not set, the 249 // KnowledgeBases enabled in the agent (through UI) will be used. 250 // Format: `projects/<Project ID>/knowledgeBases/<Knowledge Base ID>`. 251 repeated string knowledge_base_names = 12; 252 253 // Configures the type of sentiment analysis to perform. If not 254 // provided, sentiment analysis is not performed. 255 // Note: Sentiment Analysis is only currently available for Essentials Edition 256 // agents. 257 SentimentAnalysisRequestConfig sentiment_analysis_request_config = 10; 258 259 // For mega agent query, directly specify which sub agents to query. 260 // If any specified sub agent is not linked to the mega agent, an error will 261 // be returned. If empty, Dialogflow will decide which sub agents to query. 262 // If specified for a non-mega-agent query, will be silently ignored. 263 repeated SubAgent sub_agents = 13; 264 265 // This field can be used to pass HTTP headers for a webhook 266 // call. These headers will be sent to webhook along with the headers that 267 // have been configured through Dialogflow web console. The headers defined 268 // within this field will overwrite the headers configured through Dialogflow 269 // console if there is a conflict. Header names are case-insensitive. 270 // Google's specified headers are not allowed. Including: "Host", 271 // "Content-Length", "Connection", "From", "User-Agent", "Accept-Encoding", 272 // "If-Modified-Since", "If-None-Match", "X-Forwarded-For", etc. 273 map<string, string> webhook_headers = 14; 274 275 // The platform of the virtual agent response messages. 276 // 277 // If not empty, only emits messages from this platform in the response. 278 // Valid values are the enum names of 279 // [platform][google.cloud.dialogflow.v2beta1.Intent.Message.platform]. 280 string platform = 18; 281} 282 283// Represents the query input. It can contain either: 284// 285// 1. An audio config which instructs the speech recognizer how to process the 286// speech audio. 287// 288// 2. A conversational query in the form of text. 289// 290// 3. An event that specifies which intent to trigger. 291message QueryInput { 292 // Required. The input specification. 293 oneof input { 294 // Instructs the speech recognizer how to process the speech audio. 295 InputAudioConfig audio_config = 1; 296 297 // The natural language text to be processed. 298 TextInput text = 2; 299 300 // The event to be processed. 301 EventInput event = 3; 302 303 // The DTMF digits used to invoke intent and fill in parameter value. 304 TelephonyDtmfEvents dtmf = 4; 305 } 306} 307 308// Represents the result of conversational query or event processing. 309message QueryResult { 310 // The original conversational query text: 311 // 312 // - If natural language text was provided as input, `query_text` contains 313 // a copy of the input. 314 // - If natural language speech audio was provided as input, `query_text` 315 // contains the speech recognition result. If speech recognizer produced 316 // multiple alternatives, a particular one is picked. 317 // - If automatic spell correction is enabled, `query_text` will contain the 318 // corrected user input. 319 string query_text = 1; 320 321 // The language that was triggered during intent detection. 322 // See [Language 323 // Support](https://cloud.google.com/dialogflow/docs/reference/language) 324 // for a list of the currently supported language codes. 325 string language_code = 15; 326 327 // The Speech recognition confidence between 0.0 and 1.0. A higher number 328 // indicates an estimated greater likelihood that the recognized words are 329 // correct. The default of 0.0 is a sentinel value indicating that confidence 330 // was not set. 331 // 332 // This field is not guaranteed to be accurate or set. In particular this 333 // field isn't set for StreamingDetectIntent since the streaming endpoint has 334 // separate confidence estimates per portion of the audio in 335 // StreamingRecognitionResult. 336 float speech_recognition_confidence = 2; 337 338 // The action name from the matched intent. 339 string action = 3; 340 341 // The collection of extracted parameters. 342 // 343 // Depending on your protocol or client library language, this is a 344 // map, associative array, symbol table, dictionary, or JSON object 345 // composed of a collection of (MapKey, MapValue) pairs: 346 // 347 // * MapKey type: string 348 // * MapKey value: parameter name 349 // * MapValue type: If parameter's entity type is a composite entity then use 350 // map, otherwise, depending on the parameter value type, it could be one of 351 // string, number, boolean, null, list or map. 352 // * MapValue value: If parameter's entity type is a composite entity then use 353 // map from composite entity property names to property values, otherwise, 354 // use parameter value. 355 google.protobuf.Struct parameters = 4; 356 357 // This field is set to: 358 // 359 // - `false` if the matched intent has required parameters and not all of 360 // the required parameter values have been collected. 361 // - `true` if all required parameter values have been collected, or if the 362 // matched intent doesn't contain any required parameters. 363 bool all_required_params_present = 5; 364 365 // Indicates whether the conversational query triggers a cancellation for slot 366 // filling. For more information, see the [cancel slot filling 367 // documentation](https://cloud.google.com/dialogflow/es/docs/intents-actions-parameters#cancel). 368 bool cancels_slot_filling = 21; 369 370 // The text to be pronounced to the user or shown on the screen. 371 // Note: This is a legacy field, `fulfillment_messages` should be preferred. 372 string fulfillment_text = 6; 373 374 // The collection of rich messages to present to the user. 375 repeated Intent.Message fulfillment_messages = 7; 376 377 // If the query was fulfilled by a webhook call, this field is set to the 378 // value of the `source` field returned in the webhook response. 379 string webhook_source = 8; 380 381 // If the query was fulfilled by a webhook call, this field is set to the 382 // value of the `payload` field returned in the webhook response. 383 google.protobuf.Struct webhook_payload = 9; 384 385 // The collection of output contexts. If applicable, 386 // `output_contexts.parameters` contains entries with name 387 // `<parameter name>.original` containing the original parameter values 388 // before the query. 389 repeated Context output_contexts = 10; 390 391 // The intent that matched the conversational query. Some, not 392 // all fields are filled in this message, including but not limited to: 393 // `name`, `display_name`, `end_interaction` and `is_fallback`. 394 Intent intent = 11; 395 396 // The intent detection confidence. Values range from 0.0 397 // (completely uncertain) to 1.0 (completely certain). 398 // This value is for informational purpose only and is only used to 399 // help match the best intent within the classification threshold. 400 // This value may change for the same end-user expression at any time due to a 401 // model retraining or change in implementation. 402 // If there are `multiple knowledge_answers` messages, this value is set to 403 // the greatest `knowledgeAnswers.match_confidence` value in the list. 404 float intent_detection_confidence = 12; 405 406 // Free-form diagnostic information for the associated detect intent request. 407 // The fields of this data can change without notice, so you should not write 408 // code that depends on its structure. 409 // The data may contain: 410 // 411 // - webhook call latency 412 // - webhook errors 413 google.protobuf.Struct diagnostic_info = 14; 414 415 // The sentiment analysis result, which depends on the 416 // `sentiment_analysis_request_config` specified in the request. 417 SentimentAnalysisResult sentiment_analysis_result = 17; 418 419 // The result from Knowledge Connector (if any), ordered by decreasing 420 // `KnowledgeAnswers.match_confidence`. 421 KnowledgeAnswers knowledge_answers = 18; 422} 423 424// Represents the result of querying a Knowledge base. 425message KnowledgeAnswers { 426 // An answer from Knowledge Connector. 427 message Answer { 428 // Represents the system's confidence that this knowledge answer is a good 429 // match for this conversational query. 430 enum MatchConfidenceLevel { 431 // Not specified. 432 MATCH_CONFIDENCE_LEVEL_UNSPECIFIED = 0; 433 434 // Indicates that the confidence is low. 435 LOW = 1; 436 437 // Indicates our confidence is medium. 438 MEDIUM = 2; 439 440 // Indicates our confidence is high. 441 HIGH = 3; 442 } 443 444 // Indicates which Knowledge Document this answer was extracted from. 445 // Format: `projects/<Project ID>/knowledgeBases/<Knowledge Base 446 // ID>/documents/<Document ID>`. 447 string source = 1 [(google.api.resource_reference) = { 448 type: "dialogflow.googleapis.com/Document" 449 }]; 450 451 // The corresponding FAQ question if the answer was extracted from a FAQ 452 // Document, empty otherwise. 453 string faq_question = 2; 454 455 // The piece of text from the `source` knowledge base document that answers 456 // this conversational query. 457 string answer = 3; 458 459 // The system's confidence level that this knowledge answer is a good match 460 // for this conversational query. 461 // NOTE: The confidence level for a given `<query, answer>` pair may change 462 // without notice, as it depends on models that are constantly being 463 // improved. However, it will change less frequently than the confidence 464 // score below, and should be preferred for referencing the quality of an 465 // answer. 466 MatchConfidenceLevel match_confidence_level = 4; 467 468 // The system's confidence score that this Knowledge answer is a good match 469 // for this conversational query. 470 // The range is from 0.0 (completely uncertain) to 1.0 (completely certain). 471 // Note: The confidence score is likely to vary somewhat (possibly even for 472 // identical requests), as the underlying model is under constant 473 // improvement. It may be deprecated in the future. We recommend using 474 // `match_confidence_level` which should be generally more stable. 475 float match_confidence = 5; 476 } 477 478 // A list of answers from Knowledge Connector. 479 repeated Answer answers = 1; 480} 481 482// The top-level message sent by the client to the 483// [Sessions.StreamingDetectIntent][google.cloud.dialogflow.v2beta1.Sessions.StreamingDetectIntent] 484// method. 485// 486// Multiple request messages should be sent in order: 487// 488// 1. The first message must contain 489// [session][google.cloud.dialogflow.v2beta1.StreamingDetectIntentRequest.session], 490// [query_input][google.cloud.dialogflow.v2beta1.StreamingDetectIntentRequest.query_input] 491// plus optionally 492// [query_params][google.cloud.dialogflow.v2beta1.StreamingDetectIntentRequest.query_params]. 493// If the client wants to receive an audio response, it should also contain 494// [output_audio_config][google.cloud.dialogflow.v2beta1.StreamingDetectIntentRequest.output_audio_config]. 495// The message must not contain 496// [input_audio][google.cloud.dialogflow.v2beta1.StreamingDetectIntentRequest.input_audio]. 497// 2. If 498// [query_input][google.cloud.dialogflow.v2beta1.StreamingDetectIntentRequest.query_input] 499// was set to 500// [query_input.audio_config][google.cloud.dialogflow.v2beta1.InputAudioConfig], 501// all subsequent messages must contain 502// [input_audio][google.cloud.dialogflow.v2beta1.StreamingDetectIntentRequest.input_audio] 503// to continue with Speech recognition. If you decide to rather detect an 504// intent from text input after you already started Speech recognition, 505// please send a message with 506// [query_input.text][google.cloud.dialogflow.v2beta1.QueryInput.text]. 507// 508// However, note that: 509// 510// * Dialogflow will bill you for the audio duration so far. 511// * Dialogflow discards all Speech recognition results in favor of the 512// input text. 513// * Dialogflow will use the language code from the first message. 514// 515// After you sent all input, you must half-close or abort the request stream. 516message StreamingDetectIntentRequest { 517 // Required. The name of the session the query is sent to. 518 // Supported formats: 519 // - `projects/<Project ID>/agent/sessions/<Session ID>, 520 // - `projects/<Project ID>/locations/<Location ID>/agent/sessions/<Session 521 // ID>`, 522 // - `projects/<Project ID>/agent/environments/<Environment ID>/users/<User 523 // ID>/sessions/<Session ID>`, 524 // - `projects/<Project ID>/locations/<Location 525 // ID>/agent/environments/<Environment ID>/users/<User ID>/sessions/<Session 526 // ID>`, 527 // 528 // If `Location ID` is not specified we assume default 'us' location. If 529 // `Environment ID` is not specified, we assume default 'draft' environment. 530 // If `User ID` is not specified, we are using "-". It's up to the API caller 531 // to choose an appropriate `Session ID` and `User Id`. They can be a random 532 // number or some type of user and session identifiers (preferably hashed). 533 // The length of the `Session ID` and `User ID` must not exceed 36 characters. 534 // 535 // For more information, see the [API interactions 536 // guide](https://cloud.google.com/dialogflow/docs/api-overview). 537 // 538 // Note: Always use agent versions for production traffic. 539 // See [Versions and 540 // environments](https://cloud.google.com/dialogflow/es/docs/agents-versions). 541 string session = 1 [ 542 (google.api.field_behavior) = REQUIRED, 543 (google.api.resource_reference) = { 544 type: "dialogflow.googleapis.com/Session" 545 } 546 ]; 547 548 // The parameters of this query. 549 QueryParameters query_params = 2; 550 551 // Required. The input specification. It can be set to: 552 // 553 // 1. an audio config which instructs the speech recognizer how to process 554 // the speech audio, 555 // 556 // 2. a conversational query in the form of text, or 557 // 558 // 3. an event that specifies which intent to trigger. 559 QueryInput query_input = 3 [(google.api.field_behavior) = REQUIRED]; 560 561 // DEPRECATED. Please use 562 // [InputAudioConfig.single_utterance][google.cloud.dialogflow.v2beta1.InputAudioConfig.single_utterance] 563 // instead. If `false` (default), recognition does not cease until the client 564 // closes the stream. If `true`, the recognizer will detect a single spoken 565 // utterance in input audio. Recognition ceases when it detects the audio's 566 // voice has stopped or paused. In this case, once a detected intent is 567 // received, the client should close the stream and start a new request with a 568 // new stream as needed. This setting is ignored when `query_input` is a piece 569 // of text or an event. 570 bool single_utterance = 4 [deprecated = true]; 571 572 // Instructs the speech synthesizer how to generate the output 573 // audio. If this field is not set and agent-level speech synthesizer is not 574 // configured, no output audio is generated. 575 OutputAudioConfig output_audio_config = 5; 576 577 // Mask for 578 // [output_audio_config][google.cloud.dialogflow.v2beta1.StreamingDetectIntentRequest.output_audio_config] 579 // indicating which settings in this request-level config should override 580 // speech synthesizer settings defined at agent-level. 581 // 582 // If unspecified or empty, 583 // [output_audio_config][google.cloud.dialogflow.v2beta1.StreamingDetectIntentRequest.output_audio_config] 584 // replaces the agent-level config in its entirety. 585 google.protobuf.FieldMask output_audio_config_mask = 7; 586 587 // The input audio content to be recognized. Must be sent if 588 // `query_input` was set to a streaming input audio config. The complete audio 589 // over all streaming messages must not exceed 1 minute. 590 bytes input_audio = 6; 591 592 // If true, `StreamingDetectIntentResponse.debugging_info` will get populated. 593 bool enable_debugging_info = 8; 594} 595 596// Cloud conversation info for easier debugging. 597// It will get populated in `StreamingDetectIntentResponse` or 598// `StreamingAnalyzeContentResponse` when the flag `enable_debugging_info` is 599// set to true in corresponding requests. 600message CloudConversationDebuggingInfo { 601 // Number of input audio data chunks in streaming requests. 602 int32 audio_data_chunks = 1; 603 604 // Time offset of the end of speech utterance relative to the 605 // beginning of the first audio chunk. 606 google.protobuf.Duration result_end_time_offset = 2; 607 608 // Duration of first audio chunk. 609 google.protobuf.Duration first_audio_duration = 3; 610 611 // Whether client used single utterance mode. 612 bool single_utterance = 5; 613 614 // Time offsets of the speech partial results relative to the beginning of 615 // the stream. 616 repeated google.protobuf.Duration speech_partial_results_end_times = 6; 617 618 // Time offsets of the speech final results (is_final=true) relative to the 619 // beginning of the stream. 620 repeated google.protobuf.Duration speech_final_results_end_times = 7; 621 622 // Total number of partial responses. 623 int32 partial_responses = 8; 624 625 // Time offset of Speaker ID stream close time relative to the Speech stream 626 // close time in milliseconds. Only meaningful for conversations involving 627 // passive verification. 628 int32 speaker_id_passive_latency_ms_offset = 9; 629 630 // Whether a barge-in event is triggered in this request. 631 bool bargein_event_triggered = 10; 632 633 // Whether speech uses single utterance mode. 634 bool speech_single_utterance = 11; 635 636 // Time offsets of the DTMF partial results relative to the beginning of 637 // the stream. 638 repeated google.protobuf.Duration dtmf_partial_results_times = 12; 639 640 // Time offsets of the DTMF final results relative to the beginning of 641 // the stream. 642 repeated google.protobuf.Duration dtmf_final_results_times = 13; 643 644 // Time offset of the end-of-single-utterance signal relative to the 645 // beginning of the stream. 646 google.protobuf.Duration single_utterance_end_time_offset = 14; 647 648 // No speech timeout settings for the stream. 649 google.protobuf.Duration no_speech_timeout = 15; 650 651 // Speech endpointing timeout settings for the stream. 652 google.protobuf.Duration endpointing_timeout = 19; 653 654 // Whether the streaming terminates with an injected text query. 655 bool is_input_text = 16; 656 657 // Client half close time in terms of input audio duration. 658 google.protobuf.Duration client_half_close_time_offset = 17; 659 660 // Client half close time in terms of API streaming duration. 661 google.protobuf.Duration client_half_close_streaming_time_offset = 18; 662} 663 664// The top-level message returned from the 665// `StreamingDetectIntent` method. 666// 667// Multiple response messages can be returned in order: 668// 669// 1. If the `StreamingDetectIntentRequest.input_audio` field was 670// set, the `recognition_result` field is populated for one 671// or more messages. 672// See the 673// [StreamingRecognitionResult][google.cloud.dialogflow.v2beta1.StreamingRecognitionResult] 674// message for details about the result message sequence. 675// 676// 2. The next message contains `response_id`, `query_result`, 677// `alternative_query_results` and optionally `webhook_status` if a WebHook 678// was called. 679// 680// 3. If `output_audio_config` was specified in the request or agent-level 681// speech synthesizer is configured, all subsequent messages contain 682// `output_audio` and `output_audio_config`. 683message StreamingDetectIntentResponse { 684 // The unique identifier of the response. It can be used to 685 // locate a response in the training example set or for reporting issues. 686 string response_id = 1; 687 688 // The result of speech recognition. 689 StreamingRecognitionResult recognition_result = 2; 690 691 // The selected results of the conversational query or event processing. 692 // See `alternative_query_results` for additional potential results. 693 QueryResult query_result = 3; 694 695 // If Knowledge Connectors are enabled, there could be more than one result 696 // returned for a given query or event, and this field will contain all 697 // results except for the top one, which is captured in query_result. The 698 // alternative results are ordered by decreasing 699 // `QueryResult.intent_detection_confidence`. If Knowledge Connectors are 700 // disabled, this field will be empty until multiple responses for regular 701 // intents are supported, at which point those additional results will be 702 // surfaced here. 703 repeated QueryResult alternative_query_results = 7; 704 705 // Specifies the status of the webhook request. 706 google.rpc.Status webhook_status = 4; 707 708 // The audio data bytes encoded as specified in the request. 709 // Note: The output audio is generated based on the values of default platform 710 // text responses found in the `query_result.fulfillment_messages` field. If 711 // multiple default text responses exist, they will be concatenated when 712 // generating audio. If no default platform text responses exist, the 713 // generated audio content will be empty. 714 // 715 // In some scenarios, multiple output audio fields may be present in the 716 // response structure. In these cases, only the top-most-level audio output 717 // has content. 718 bytes output_audio = 5; 719 720 // The config used by the speech synthesizer to generate the output audio. 721 OutputAudioConfig output_audio_config = 6; 722 723 // Debugging info that would get populated when 724 // `StreamingDetectIntentRequest.enable_debugging_info` is set to true. 725 CloudConversationDebuggingInfo debugging_info = 8; 726} 727 728// Contains a speech recognition result corresponding to a portion of the audio 729// that is currently being processed or an indication that this is the end 730// of the single requested utterance. 731// 732// While end-user audio is being processed, Dialogflow sends a series of 733// results. Each result may contain a `transcript` value. A transcript 734// represents a portion of the utterance. While the recognizer is processing 735// audio, transcript values may be interim values or finalized values. 736// Once a transcript is finalized, the `is_final` value is set to true and 737// processing continues for the next transcript. 738// 739// If `StreamingDetectIntentRequest.query_input.audio_config.single_utterance` 740// was true, and the recognizer has completed processing audio, 741// the `message_type` value is set to `END_OF_SINGLE_UTTERANCE and the 742// following (last) result contains the last finalized transcript. 743// 744// The complete end-user utterance is determined by concatenating the 745// finalized transcript values received for the series of results. 746// 747// In the following example, single utterance is enabled. In the case where 748// single utterance is not enabled, result 7 would not occur. 749// 750// ``` 751// Num | transcript | message_type | is_final 752// --- | ----------------------- | ----------------------- | -------- 753// 1 | "tube" | TRANSCRIPT | false 754// 2 | "to be a" | TRANSCRIPT | false 755// 3 | "to be" | TRANSCRIPT | false 756// 4 | "to be or not to be" | TRANSCRIPT | true 757// 5 | "that's" | TRANSCRIPT | false 758// 6 | "that is | TRANSCRIPT | false 759// 7 | unset | END_OF_SINGLE_UTTERANCE | unset 760// 8 | " that is the question" | TRANSCRIPT | true 761// ``` 762// Concatenating the finalized transcripts with `is_final` set to true, 763// the complete utterance becomes "to be or not to be that is the question". 764message StreamingRecognitionResult { 765 // Type of the response message. 766 enum MessageType { 767 // Not specified. Should never be used. 768 MESSAGE_TYPE_UNSPECIFIED = 0; 769 770 // Message contains a (possibly partial) transcript. 771 TRANSCRIPT = 1; 772 773 // Message contains DTMF digits. 774 DTMF_DIGITS = 3; 775 776 // This event indicates that the server has detected the end of the user's 777 // speech utterance and expects no additional speech. Therefore, the server 778 // will not process additional audio (although it may subsequently return 779 // additional results). The client should stop sending additional audio 780 // data, half-close the gRPC connection, and wait for any additional results 781 // until the server closes the gRPC connection. This message is only sent if 782 // `single_utterance` was set to `true`, and is not used otherwise. 783 END_OF_SINGLE_UTTERANCE = 2; 784 785 // Message contains DTMF digits. Before a message with DTMF_DIGITS is sent, 786 // a message with PARTIAL_DTMF_DIGITS may be sent with DTMF digits collected 787 // up to the time of sending, which represents an intermediate result. 788 PARTIAL_DTMF_DIGITS = 4; 789 } 790 791 // Type of the result message. 792 MessageType message_type = 1; 793 794 // Transcript text representing the words that the user spoke. 795 // Populated if and only if `message_type` = `TRANSCRIPT`. 796 string transcript = 2; 797 798 // If `false`, the `StreamingRecognitionResult` represents an 799 // interim result that may change. If `true`, the recognizer will not return 800 // any further hypotheses about this piece of the audio. May only be populated 801 // for `message_type` = `TRANSCRIPT`. 802 bool is_final = 3; 803 804 // The Speech confidence between 0.0 and 1.0 for the current portion of audio. 805 // A higher number indicates an estimated greater likelihood that the 806 // recognized words are correct. The default of 0.0 is a sentinel value 807 // indicating that confidence was not set. 808 // 809 // This field is typically only provided if `is_final` is true and you should 810 // not rely on it being accurate or even set. 811 float confidence = 4; 812 813 // An estimate of the likelihood that the speech recognizer will 814 // not change its guess about this interim recognition result: 815 // 816 // * If the value is unspecified or 0.0, Dialogflow didn't compute the 817 // stability. In particular, Dialogflow will only provide stability for 818 // `TRANSCRIPT` results with `is_final = false`. 819 // * Otherwise, the value is in (0.0, 1.0] where 0.0 means completely 820 // unstable and 1.0 means completely stable. 821 float stability = 6; 822 823 // Word-specific information for the words recognized by Speech in 824 // [transcript][google.cloud.dialogflow.v2beta1.StreamingRecognitionResult.transcript]. 825 // Populated if and only if `message_type` = `TRANSCRIPT` and 826 // [InputAudioConfig.enable_word_info] is set. 827 repeated SpeechWordInfo speech_word_info = 7; 828 829 // Time offset of the end of this Speech recognition result relative to the 830 // beginning of the audio. Only populated for `message_type` = `TRANSCRIPT`. 831 google.protobuf.Duration speech_end_offset = 8; 832 833 // Detected language code for the transcript. 834 string language_code = 10; 835 836 // DTMF digits. Populated if and only if `message_type` = `DTMF_DIGITS`. 837 TelephonyDtmfEvents dtmf_digits = 5; 838} 839 840// Represents the natural language text to be processed. 841message TextInput { 842 // Required. The UTF-8 encoded natural language text to be processed. 843 // Text length must not exceed 256 characters for virtual agent interactions. 844 string text = 1; 845 846 // Required. The language of this conversational query. See [Language 847 // Support](https://cloud.google.com/dialogflow/docs/reference/language) 848 // for a list of the currently supported language codes. Note that queries in 849 // the same session do not necessarily need to specify the same language. 850 string language_code = 2; 851} 852 853// Events allow for matching intents by event name instead of the natural 854// language input. For instance, input `<event: { name: "welcome_event", 855// parameters: { name: "Sam" } }>` can trigger a personalized welcome response. 856// The parameter `name` may be used by the agent in the response: 857// `"Hello #welcome_event.name! What can I do for you today?"`. 858message EventInput { 859 // Required. The unique identifier of the event. 860 string name = 1; 861 862 // The collection of parameters associated with the event. 863 // 864 // Depending on your protocol or client library language, this is a 865 // map, associative array, symbol table, dictionary, or JSON object 866 // composed of a collection of (MapKey, MapValue) pairs: 867 // 868 // * MapKey type: string 869 // * MapKey value: parameter name 870 // * MapValue type: If parameter's entity type is a composite entity then use 871 // map, otherwise, depending on the parameter value type, it could be one of 872 // string, number, boolean, null, list or map. 873 // * MapValue value: If parameter's entity type is a composite entity then use 874 // map from composite entity property names to property values, otherwise, 875 // use parameter value. 876 google.protobuf.Struct parameters = 2; 877 878 // Required. The language of this query. See [Language 879 // Support](https://cloud.google.com/dialogflow/docs/reference/language) 880 // for a list of the currently supported language codes. Note that queries in 881 // the same session do not necessarily need to specify the same language. 882 // 883 // This field is ignored when used in the context of a 884 // [WebhookResponse.followup_event_input][google.cloud.dialogflow.v2beta1.WebhookResponse.followup_event_input] 885 // field, because the language was already defined in the originating detect 886 // intent request. 887 string language_code = 3; 888} 889 890// Configures the types of sentiment analysis to perform. 891message SentimentAnalysisRequestConfig { 892 // Instructs the service to perform sentiment analysis on 893 // `query_text`. If not provided, sentiment analysis is not performed on 894 // `query_text`. 895 bool analyze_query_text_sentiment = 1; 896} 897 898// The result of sentiment analysis. Sentiment analysis inspects user input 899// and identifies the prevailing subjective opinion, especially to determine a 900// user's attitude as positive, negative, or neutral. 901// For [Participants.DetectIntent][], it needs to be configured in 902// [DetectIntentRequest.query_params][google.cloud.dialogflow.v2beta1.DetectIntentRequest.query_params]. 903// For [Participants.StreamingDetectIntent][], it needs to be configured in 904// [StreamingDetectIntentRequest.query_params][google.cloud.dialogflow.v2beta1.StreamingDetectIntentRequest.query_params]. 905// And for 906// [Participants.AnalyzeContent][google.cloud.dialogflow.v2beta1.Participants.AnalyzeContent] 907// and 908// [Participants.StreamingAnalyzeContent][google.cloud.dialogflow.v2beta1.Participants.StreamingAnalyzeContent], 909// it needs to be configured in 910// [ConversationProfile.human_agent_assistant_config][google.cloud.dialogflow.v2beta1.ConversationProfile.human_agent_assistant_config] 911message SentimentAnalysisResult { 912 // The sentiment analysis result for `query_text`. 913 Sentiment query_text_sentiment = 1; 914} 915 916// The sentiment, such as positive/negative feeling or association, for a unit 917// of analysis, such as the query text. See: 918// https://cloud.google.com/natural-language/docs/basics#interpreting_sentiment_analysis_values 919// for how to interpret the result. 920message Sentiment { 921 // Sentiment score between -1.0 (negative sentiment) and 1.0 (positive 922 // sentiment). 923 float score = 1; 924 925 // A non-negative number in the [0, +inf) range, which represents the absolute 926 // magnitude of sentiment, regardless of score (positive or negative). 927 float magnitude = 2; 928} 929