1// Copyright 2022 Google LLC 2// 3// Licensed under the Apache License, Version 2.0 (the "License"); 4// you may not use this file except in compliance with the License. 5// You may obtain a copy of the License at 6// 7// http://www.apache.org/licenses/LICENSE-2.0 8// 9// Unless required by applicable law or agreed to in writing, software 10// distributed under the License is distributed on an "AS IS" BASIS, 11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12// See the License for the specific language governing permissions and 13// limitations under the License. 14 15syntax = "proto3"; 16 17package google.cloud.discoveryengine.v1alpha; 18 19import "google/api/annotations.proto"; 20import "google/api/client.proto"; 21import "google/api/field_behavior.proto"; 22import "google/api/resource.proto"; 23import "google/cloud/discoveryengine/v1alpha/chunk.proto"; 24import "google/cloud/discoveryengine/v1alpha/common.proto"; 25import "google/cloud/discoveryengine/v1alpha/document.proto"; 26import "google/protobuf/struct.proto"; 27 28option csharp_namespace = "Google.Cloud.DiscoveryEngine.V1Alpha"; 29option go_package = "cloud.google.com/go/discoveryengine/apiv1alpha/discoveryenginepb;discoveryenginepb"; 30option java_multiple_files = true; 31option java_outer_classname = "SearchServiceProto"; 32option java_package = "com.google.cloud.discoveryengine.v1alpha"; 33option objc_class_prefix = "DISCOVERYENGINE"; 34option php_namespace = "Google\\Cloud\\DiscoveryEngine\\V1alpha"; 35option ruby_package = "Google::Cloud::DiscoveryEngine::V1alpha"; 36 37// Service for search. 38service SearchService { 39 option (google.api.default_host) = "discoveryengine.googleapis.com"; 40 option (google.api.oauth_scopes) = 41 "https://www.googleapis.com/auth/cloud-platform"; 42 43 // Performs a search. 44 rpc Search(SearchRequest) returns (SearchResponse) { 45 option (google.api.http) = { 46 post: "/v1alpha/{serving_config=projects/*/locations/*/dataStores/*/servingConfigs/*}:search" 47 body: "*" 48 additional_bindings { 49 post: "/v1alpha/{serving_config=projects/*/locations/*/collections/*/dataStores/*/servingConfigs/*}:search" 50 body: "*" 51 } 52 additional_bindings { 53 post: "/v1alpha/{serving_config=projects/*/locations/*/collections/*/engines/*/servingConfigs/*}:search" 54 body: "*" 55 } 56 }; 57 } 58} 59 60// Request message for 61// [SearchService.Search][google.cloud.discoveryengine.v1alpha.SearchService.Search] 62// method. 63message SearchRequest { 64 // Specifies the image query input. 65 message ImageQuery { 66 oneof image { 67 // Base64 encoded image bytes. Supported image formats: JPEG, PNG, and 68 // BMP. 69 string image_bytes = 1; 70 } 71 } 72 73 // A struct to define data stores to filter on in a search call. 74 message DataStoreSpec { 75 // Required. Full resource name of 76 // [DataStore][google.cloud.discoveryengine.v1alpha.DataStore], such as 77 // `projects/{project}/locations/{location}/collections/{collection_id}/dataStores/{data_store_id}`. 78 string data_store = 1 [ 79 (google.api.field_behavior) = REQUIRED, 80 (google.api.resource_reference) = { 81 type: "discoveryengine.googleapis.com/DataStore" 82 } 83 ]; 84 } 85 86 // A facet specification to perform faceted search. 87 message FacetSpec { 88 // Specifies how a facet is computed. 89 message FacetKey { 90 // Required. Supported textual and numerical facet keys in 91 // [Document][google.cloud.discoveryengine.v1alpha.Document] object, over 92 // which the facet values are computed. Facet key is case-sensitive. 93 string key = 1 [(google.api.field_behavior) = REQUIRED]; 94 95 // Set only if values should be bucketed into intervals. Must be set 96 // for facets with numerical values. Must not be set for facet with text 97 // values. Maximum number of intervals is 30. 98 repeated Interval intervals = 2; 99 100 // Only get facet for the given restricted values. Only supported on 101 // textual fields. For example, suppose "category" has three values 102 // "Action > 2022", "Action > 2021" and "Sci-Fi > 2022". If set 103 // "restricted_values" to "Action > 2022", the "category" facet only 104 // contains "Action > 2022". Only supported on textual fields. Maximum 105 // is 10. 106 repeated string restricted_values = 3; 107 108 // Only get facet values that start with the given string prefix. For 109 // example, suppose "category" has three values "Action > 2022", 110 // "Action > 2021" and "Sci-Fi > 2022". If set "prefixes" to "Action", the 111 // "category" facet only contains "Action > 2022" and "Action > 2021". 112 // Only supported on textual fields. Maximum is 10. 113 repeated string prefixes = 4; 114 115 // Only get facet values that contains the given strings. For example, 116 // suppose "category" has three values "Action > 2022", 117 // "Action > 2021" and "Sci-Fi > 2022". If set "contains" to "2022", the 118 // "category" facet only contains "Action > 2022" and "Sci-Fi > 2022". 119 // Only supported on textual fields. Maximum is 10. 120 repeated string contains = 5; 121 122 // True to make facet keys case insensitive when getting faceting 123 // values with prefixes or contains; false otherwise. 124 bool case_insensitive = 6; 125 126 // The order in which documents are returned. 127 // 128 // Allowed values are: 129 // 130 // * "count desc", which means order by 131 // [SearchResponse.Facet.values.count][google.cloud.discoveryengine.v1alpha.SearchResponse.Facet.FacetValue.count] 132 // descending. 133 // 134 // * "value desc", which means order by 135 // [SearchResponse.Facet.values.value][google.cloud.discoveryengine.v1alpha.SearchResponse.Facet.FacetValue.value] 136 // descending. 137 // Only applies to textual facets. 138 // 139 // If not set, textual values are sorted in [natural 140 // order](https://en.wikipedia.org/wiki/Natural_sort_order); numerical 141 // intervals are sorted in the order given by 142 // [FacetSpec.FacetKey.intervals][google.cloud.discoveryengine.v1alpha.SearchRequest.FacetSpec.FacetKey.intervals]. 143 string order_by = 7; 144 } 145 146 // Required. The facet key specification. 147 FacetKey facet_key = 1 [(google.api.field_behavior) = REQUIRED]; 148 149 // Maximum of facet values that should be returned for this facet. If 150 // unspecified, defaults to 20. The maximum allowed value is 300. Values 151 // above 300 are coerced to 300. 152 // 153 // If this field is negative, an `INVALID_ARGUMENT` is returned. 154 int32 limit = 2; 155 156 // List of keys to exclude when faceting. 157 // 158 // 159 // By default, 160 // [FacetKey.key][google.cloud.discoveryengine.v1alpha.SearchRequest.FacetSpec.FacetKey.key] 161 // is not excluded from the filter unless it is listed in this field. 162 // 163 // Listing a facet key in this field allows its values to appear as facet 164 // results, even when they are filtered out of search results. Using this 165 // field does not affect what search results are returned. 166 // 167 // For example, suppose there are 100 documents with the color facet "Red" 168 // and 200 documents with the color facet "Blue". A query containing the 169 // filter "color:ANY("Red")" and having "color" as 170 // [FacetKey.key][google.cloud.discoveryengine.v1alpha.SearchRequest.FacetSpec.FacetKey.key] 171 // would by default return only "Red" documents in the search results, and 172 // also return "Red" with count 100 as the only color facet. Although there 173 // are also blue documents available, "Blue" would not be shown as an 174 // available facet value. 175 // 176 // If "color" is listed in "excludedFilterKeys", then the query returns the 177 // facet values "Red" with count 100 and "Blue" with count 200, because the 178 // "color" key is now excluded from the filter. Because this field doesn't 179 // affect search results, the search results are still correctly filtered to 180 // return only "Red" documents. 181 // 182 // A maximum of 100 values are allowed. Otherwise, an `INVALID_ARGUMENT` 183 // error is returned. 184 repeated string excluded_filter_keys = 3; 185 186 // Enables dynamic position for this facet. If set to true, the position of 187 // this facet among all facets in the response is determined automatically. 188 // If dynamic facets are enabled, it is ordered together. 189 // If set to false, the position of this facet in the 190 // response is the same as in the request, and it is ranked before 191 // the facets with dynamic position enable and all dynamic facets. 192 // 193 // For example, you may always want to have rating facet returned in 194 // the response, but it's not necessarily to always display the rating facet 195 // at the top. In that case, you can set enable_dynamic_position to true so 196 // that the position of rating facet in response is determined 197 // automatically. 198 // 199 // Another example, assuming you have the following facets in the request: 200 // 201 // * "rating", enable_dynamic_position = true 202 // 203 // * "price", enable_dynamic_position = false 204 // 205 // * "brands", enable_dynamic_position = false 206 // 207 // And also you have a dynamic facets enabled, which generates a facet 208 // `gender`. Then the final order of the facets in the response can be 209 // ("price", "brands", "rating", "gender") or ("price", "brands", "gender", 210 // "rating") depends on how API orders "gender" and "rating" facets. 211 // However, notice that "price" and "brands" are always 212 // ranked at first and second position because their enable_dynamic_position 213 // is false. 214 bool enable_dynamic_position = 4; 215 } 216 217 // Boost specification to boost certain documents. 218 message BoostSpec { 219 // Boost applies to documents which match a condition. 220 message ConditionBoostSpec { 221 // Specification for custom ranking based on customer specified attribute 222 // value. It provides more controls for customized ranking than the simple 223 // (condition, boost) combination above. 224 message BoostControlSpec { 225 // The control points used to define the curve. The curve defined 226 // through these control points can only be monotonically increasing 227 // or decreasing(constant values are acceptable). 228 message ControlPoint { 229 // Can be one of: 230 // 1. The numerical field value. 231 // 2. The duration spec for freshness: 232 // The value must be formatted as an XSD `dayTimeDuration` value (a 233 // restricted subset of an ISO 8601 duration value). The pattern for 234 // this is: `[nD][T[nH][nM][nS]]`. 235 string attribute_value = 1; 236 237 // The value between -1 to 1 by which to boost the score if the 238 // attribute_value evaluates to the value specified above. 239 float boost_amount = 2; 240 } 241 242 // The attribute(or function) for which the custom ranking is to be 243 // applied. 244 enum AttributeType { 245 // Unspecified AttributeType. 246 ATTRIBUTE_TYPE_UNSPECIFIED = 0; 247 248 // The value of the numerical field will be used to dynamically update 249 // the boost amount. In this case, the attribute_value (the x value) 250 // of the control point will be the actual value of the numerical 251 // field for which the boost_amount is specified. 252 NUMERICAL = 1; 253 254 // For the freshness use case the attribute value will be the duration 255 // between the current time and the date in the datetime field 256 // specified. The value must be formatted as an XSD `dayTimeDuration` 257 // value (a restricted subset of an ISO 8601 duration value). The 258 // pattern for this is: `[nD][T[nH][nM][nS]]`. 259 // E.g. `5D`, `3DT12H30M`, `T24H`. 260 FRESHNESS = 2; 261 } 262 263 // The interpolation type to be applied. Default will be linear 264 // (Piecewise Linear). 265 enum InterpolationType { 266 // Interpolation type is unspecified. In this case, it defaults to 267 // Linear. 268 INTERPOLATION_TYPE_UNSPECIFIED = 0; 269 270 // Piecewise linear interpolation will be applied. 271 LINEAR = 1; 272 } 273 274 // The name of the field whose value will be used to determine the 275 // boost amount. 276 string field_name = 1; 277 278 // The attribute type to be used to determine the boost amount. The 279 // attribute value can be derived from the field value of the specified 280 // field_name. In the case of numerical it is straightforward i.e. 281 // attribute_value = numerical_field_value. In the case of freshness 282 // however, attribute_value = (time.now() - datetime_field_value). 283 AttributeType attribute_type = 2; 284 285 // The interpolation type to be applied to connect the control points 286 // listed below. 287 InterpolationType interpolation_type = 3; 288 289 // The control points used to define the curve. The monotonic function 290 // (defined through the interpolation_type above) passes through the 291 // control points listed here. 292 repeated ControlPoint control_points = 4; 293 } 294 295 // An expression which specifies a boost condition. The syntax and 296 // supported fields are the same as a filter expression. See 297 // [SearchRequest.filter][google.cloud.discoveryengine.v1alpha.SearchRequest.filter] 298 // for detail syntax and limitations. 299 // 300 // Examples: 301 // 302 // * To boost documents with document ID "doc_1" or "doc_2", and 303 // color "Red" or "Blue": 304 // `(document_id: ANY("doc_1", "doc_2")) AND (color: ANY("Red", "Blue"))` 305 string condition = 1; 306 307 // Strength of the condition boost, which should be in [-1, 1]. Negative 308 // boost means demotion. Default is 0.0. 309 // 310 // Setting to 1.0 gives the document a big promotion. However, it does 311 // not necessarily mean that the boosted document will be the top result 312 // at all times, nor that other documents will be excluded. Results 313 // could still be shown even when none of them matches the condition. 314 // And results that are significantly more relevant to the search query 315 // can still trump your heavily favored but irrelevant documents. 316 // 317 // Setting to -1.0 gives the document a big demotion. However, results 318 // that are deeply relevant might still be shown. The document will have 319 // an upstream battle to get a fairly high ranking, but it is not 320 // blocked out completely. 321 // 322 // Setting to 0.0 means no boost applied. The boosting condition is 323 // ignored. Only one of the (condition, boost) combination or the 324 // boost_control_spec below are set. If both are set then the global boost 325 // is ignored and the more fine-grained boost_control_spec is applied. 326 float boost = 2; 327 328 // Complex specification for custom ranking based on customer defined 329 // attribute value. 330 BoostControlSpec boost_control_spec = 3; 331 } 332 333 // Condition boost specifications. If a document matches multiple conditions 334 // in the specifictions, boost scores from these specifications are all 335 // applied and combined in a non-linear way. Maximum number of 336 // specifications is 20. 337 repeated ConditionBoostSpec condition_boost_specs = 1; 338 } 339 340 // Specification to determine under which conditions query expansion should 341 // occur. 342 message QueryExpansionSpec { 343 // Enum describing under which condition query expansion should occur. 344 enum Condition { 345 // Unspecified query expansion condition. In this case, server behavior 346 // defaults to 347 // [Condition.DISABLED][google.cloud.discoveryengine.v1alpha.SearchRequest.QueryExpansionSpec.Condition.DISABLED]. 348 CONDITION_UNSPECIFIED = 0; 349 350 // Disabled query expansion. Only the exact search query is used, even if 351 // [SearchResponse.total_size][google.cloud.discoveryengine.v1alpha.SearchResponse.total_size] 352 // is zero. 353 DISABLED = 1; 354 355 // Automatic query expansion built by the Search API. 356 AUTO = 2; 357 } 358 359 // The condition under which query expansion should occur. Default to 360 // [Condition.DISABLED][google.cloud.discoveryengine.v1alpha.SearchRequest.QueryExpansionSpec.Condition.DISABLED]. 361 Condition condition = 1; 362 363 // Whether to pin unexpanded results. If this field is set to true, 364 // unexpanded products are always at the top of the search results, followed 365 // by the expanded results. 366 bool pin_unexpanded_results = 2; 367 } 368 369 // The specification for query spell correction. 370 message SpellCorrectionSpec { 371 // Enum describing under which mode spell correction should occur. 372 enum Mode { 373 // Unspecified spell correction mode. In this case, server behavior 374 // defaults to 375 // [Mode.AUTO][google.cloud.discoveryengine.v1alpha.SearchRequest.SpellCorrectionSpec.Mode.AUTO]. 376 MODE_UNSPECIFIED = 0; 377 378 // Search API will try to find a spell suggestion if there 379 // is any and put in the 380 // [SearchResponse.corrected_query][google.cloud.discoveryengine.v1alpha.SearchResponse.corrected_query]. 381 // The spell suggestion will not be used as the search query. 382 SUGGESTION_ONLY = 1; 383 384 // Automatic spell correction built by the Search API. Search will 385 // be based on the corrected query if found. 386 AUTO = 2; 387 } 388 389 // The mode under which spell correction should take effect to 390 // replace the original search query. Default to 391 // [Mode.AUTO][google.cloud.discoveryengine.v1alpha.SearchRequest.SpellCorrectionSpec.Mode.AUTO]. 392 Mode mode = 1; 393 } 394 395 // A specification for configuring the behavior of content search. 396 message ContentSearchSpec { 397 // A specification for configuring snippets in a search response. 398 message SnippetSpec { 399 // [DEPRECATED] This field is deprecated. To control snippet return, use 400 // `return_snippet` field. For backwards compatibility, we will return 401 // snippet if max_snippet_count > 0. 402 int32 max_snippet_count = 1 [deprecated = true]; 403 404 // [DEPRECATED] This field is deprecated and will have no affect on the 405 // snippet. 406 bool reference_only = 2 [deprecated = true]; 407 408 // If `true`, then return snippet. If no snippet can be generated, we 409 // return "No snippet is available for this page." A `snippet_status` with 410 // `SUCCESS` or `NO_SNIPPET_AVAILABLE` will also be returned. 411 bool return_snippet = 3; 412 } 413 414 // A specification for configuring a summary returned in a search 415 // response. 416 message SummarySpec { 417 // Specification of the prompt to use with the model. 418 message ModelPromptSpec { 419 // Text at the beginning of the prompt that instructs the assistant. 420 // Examples are available in the user guide. 421 string preamble = 1; 422 } 423 424 // Specification of the model. 425 message ModelSpec { 426 // The model version used to generate the summary. 427 // 428 // Supported values are: 429 // 430 // * `stable`: string. Default value when no value is specified. Uses a 431 // generally available, fine-tuned model. For more information, see 432 // [Answer generation model versions and 433 // lifecycle](https://cloud.google.com/generative-ai-app-builder/docs/answer-generation-models). 434 // * `preview`: string. (Public preview) Uses a preview model. For more 435 // information, see 436 // [Answer generation model versions and 437 // lifecycle](https://cloud.google.com/generative-ai-app-builder/docs/answer-generation-models). 438 string version = 1; 439 } 440 441 // The number of top results to generate the summary from. If the number 442 // of results returned is less than `summaryResultCount`, the summary is 443 // generated from all of the results. 444 // 445 // At most 10 results for documents mode, or 50 for chunks mode, can be 446 // used to generate a summary. The chunks mode is used when 447 // [SearchRequest.ContentSearchSpec.search_result_mode][google.cloud.discoveryengine.v1alpha.SearchRequest.ContentSearchSpec.search_result_mode] 448 // is set to 449 // [CHUNKS][google.cloud.discoveryengine.v1alpha.SearchRequest.ContentSearchSpec.SearchResultMode.CHUNKS]. 450 int32 summary_result_count = 1; 451 452 // Specifies whether to include citations in the summary. The default 453 // value is `false`. 454 // 455 // When this field is set to `true`, summaries include in-line citation 456 // numbers. 457 // 458 // Example summary including citations: 459 // 460 // BigQuery is Google Cloud's fully managed and completely serverless 461 // enterprise data warehouse [1]. BigQuery supports all data types, works 462 // across clouds, and has built-in machine learning and business 463 // intelligence, all within a unified platform [2, 3]. 464 // 465 // The citation numbers refer to the returned search results and are 466 // 1-indexed. For example, [1] means that the sentence is attributed to 467 // the first search result. [2, 3] means that the sentence is attributed 468 // to both the second and third search results. 469 bool include_citations = 2; 470 471 // Specifies whether to filter out adversarial queries. The default value 472 // is `false`. 473 // 474 // Google employs search-query classification to detect adversarial 475 // queries. No summary is returned if the search query is classified as an 476 // adversarial query. For example, a user might ask a question regarding 477 // negative comments about the company or submit a query designed to 478 // generate unsafe, policy-violating output. If this field is set to 479 // `true`, we skip generating summaries for adversarial queries and return 480 // fallback messages instead. 481 bool ignore_adversarial_query = 3; 482 483 // Specifies whether to filter out queries that are not summary-seeking. 484 // The default value is `false`. 485 // 486 // Google employs search-query classification to detect summary-seeking 487 // queries. No summary is returned if the search query is classified as a 488 // non-summary seeking query. For example, `why is the sky blue` and `Who 489 // is the best soccer player in the world?` are summary-seeking queries, 490 // but `SFO airport` and `world cup 2026` are not. They are most likely 491 // navigational queries. If this field is set to `true`, we skip 492 // generating summaries for non-summary seeking queries and return 493 // fallback messages instead. 494 bool ignore_non_summary_seeking_query = 4; 495 496 // If specified, the spec will be used to modify the prompt provided to 497 // the LLM. 498 ModelPromptSpec model_prompt_spec = 5; 499 500 // Language code for Summary. Use language tags defined by 501 // [BCP47](https://www.rfc-editor.org/rfc/bcp/bcp47.txt). 502 // Note: This is an experimental feature. 503 string language_code = 6; 504 505 // If specified, the spec will be used to modify the model specification 506 // provided to the LLM. 507 ModelSpec model_spec = 7; 508 509 // If true, answer will be generated from most relevant chunks from top 510 // search results. This feature will improve summary quality. 511 // Note that with this feature enabled, not all top search results 512 // will be referenced and included in the reference list, so the citation 513 // source index only points to the search results listed in the reference 514 // list. 515 bool use_semantic_chunks = 8; 516 } 517 518 // A specification for configuring the extractive content in a search 519 // response. 520 message ExtractiveContentSpec { 521 // The maximum number of extractive answers returned in each search 522 // result. 523 // 524 // An extractive answer is a verbatim answer extracted from the original 525 // document, which provides a precise and contextually relevant answer to 526 // the search query. 527 // 528 // If the number of matching answers is less than the 529 // `max_extractive_answer_count`, return all of the answers. Otherwise, 530 // return the `max_extractive_answer_count`. 531 // 532 // At most five answers are returned for each 533 // [SearchResult][google.cloud.discoveryengine.v1alpha.SearchResponse.SearchResult]. 534 int32 max_extractive_answer_count = 1; 535 536 // The max number of extractive segments returned in each search result. 537 // Only applied if the 538 // [DataStore][google.cloud.discoveryengine.v1alpha.DataStore] is set to 539 // [DataStore.ContentConfig.CONTENT_REQUIRED][google.cloud.discoveryengine.v1alpha.DataStore.ContentConfig.CONTENT_REQUIRED] 540 // or 541 // [DataStore.solution_types][google.cloud.discoveryengine.v1alpha.DataStore.solution_types] 542 // is 543 // [SOLUTION_TYPE_CHAT][google.cloud.discoveryengine.v1alpha.SolutionType.SOLUTION_TYPE_CHAT]. 544 // 545 // An extractive segment is a text segment extracted from the original 546 // document that is relevant to the search query, and, in general, more 547 // verbose than an extractive answer. The segment could then be used as 548 // input for LLMs to generate summaries and answers. 549 // 550 // If the number of matching segments is less than 551 // `max_extractive_segment_count`, return all of the segments. Otherwise, 552 // return the `max_extractive_segment_count`. 553 int32 max_extractive_segment_count = 2; 554 555 // Specifies whether to return the confidence score from the extractive 556 // segments in each search result. This feature is available only for new 557 // or allowlisted data stores. To allowlist your data store, 558 // contact your Customer Engineer. The default value is `false`. 559 bool return_extractive_segment_score = 3; 560 561 // Specifies whether to also include the adjacent from each selected 562 // segments. 563 // Return at most `num_previous_segments` segments before each selected 564 // segments. 565 int32 num_previous_segments = 4; 566 567 // Return at most `num_next_segments` segments after each selected 568 // segments. 569 int32 num_next_segments = 5; 570 } 571 572 // Specifies the chunk spec to be returned from the search response. 573 // Only available if the 574 // [SearchRequest.ContentSearchSpec.search_result_mode][google.cloud.discoveryengine.v1alpha.SearchRequest.ContentSearchSpec.search_result_mode] 575 // is set to 576 // [CHUNKS][google.cloud.discoveryengine.v1alpha.SearchRequest.ContentSearchSpec.SearchResultMode.CHUNKS] 577 message ChunkSpec { 578 // The number of previous chunks to be returned of the current chunk. The 579 // maximum allowed value is 3. 580 // If not specified, no previous chunks will be returned. 581 int32 num_previous_chunks = 1; 582 583 // The number of next chunks to be returned of the current chunk. The 584 // maximum allowed value is 3. 585 // If not specified, no next chunks will be returned. 586 int32 num_next_chunks = 2; 587 } 588 589 // Specifies the search result mode. If unspecified, the 590 // search result mode is based on 591 // [DataStore.DocumentProcessingConfig.chunking_config][]: 592 // * If [DataStore.DocumentProcessingConfig.chunking_config][] is specified, 593 // it defaults to `CHUNKS`. 594 // * Otherwise, it defaults to `DOCUMENTS`. 595 enum SearchResultMode { 596 // Default value. 597 SEARCH_RESULT_MODE_UNSPECIFIED = 0; 598 599 // Returns documents in the search result. 600 DOCUMENTS = 1; 601 602 // Returns chunks in the search result. Only available if the 603 // [DataStore.DocumentProcessingConfig.chunking_config][] is specified. 604 CHUNKS = 2; 605 } 606 607 // If `snippetSpec` is not specified, snippets are not included in the 608 // search response. 609 SnippetSpec snippet_spec = 1; 610 611 // If `summarySpec` is not specified, summaries are not included in the 612 // search response. 613 SummarySpec summary_spec = 2; 614 615 // If there is no extractive_content_spec provided, there will be no 616 // extractive answer in the search response. 617 ExtractiveContentSpec extractive_content_spec = 3; 618 619 // Specifies the search result mode. If unspecified, the 620 // search result mode is based on 621 // [DataStore.DocumentProcessingConfig.chunking_config][]: 622 // * If [DataStore.DocumentProcessingConfig.chunking_config][] is specified, 623 // it defaults to `CHUNKS`. 624 // * Otherwise, it defaults to `DOCUMENTS`. 625 SearchResultMode search_result_mode = 4; 626 627 // Specifies the chunk spec to be returned from the search response. 628 // Only available if the 629 // [SearchRequest.ContentSearchSpec.search_result_mode][google.cloud.discoveryengine.v1alpha.SearchRequest.ContentSearchSpec.search_result_mode] 630 // is set to 631 // [CHUNKS][google.cloud.discoveryengine.v1alpha.SearchRequest.ContentSearchSpec.SearchResultMode.CHUNKS] 632 ChunkSpec chunk_spec = 5; 633 } 634 635 // The specification that uses customized query embedding vector to do 636 // semantic document retrieval. 637 message EmbeddingSpec { 638 // Embedding vector. 639 message EmbeddingVector { 640 // Embedding field path in schema. 641 string field_path = 1; 642 643 // Query embedding vector. 644 repeated float vector = 2; 645 } 646 647 // The embedding vector used for retrieval. Limit to 1. 648 repeated EmbeddingVector embedding_vectors = 1; 649 } 650 651 // Required. The resource name of the Search serving config, such as 652 // `projects/*/locations/global/collections/default_collection/engines/*/servingConfigs/default_serving_config`, 653 // or 654 // `projects/*/locations/global/collections/default_collection/dataStores/default_data_store/servingConfigs/default_serving_config`. 655 // This field is used to identify the serving configuration name, set 656 // of models used to make the search. 657 string serving_config = 1 [ 658 (google.api.field_behavior) = REQUIRED, 659 (google.api.resource_reference) = { 660 type: "discoveryengine.googleapis.com/ServingConfig" 661 } 662 ]; 663 664 // The branch resource name, such as 665 // `projects/*/locations/global/collections/default_collection/dataStores/default_data_store/branches/0`. 666 // 667 // Use `default_branch` as the branch ID or leave this field empty, to search 668 // documents under the default branch. 669 string branch = 2 [(google.api.resource_reference) = { 670 type: "discoveryengine.googleapis.com/Branch" 671 }]; 672 673 // Raw search query. 674 string query = 3; 675 676 // Raw image query. 677 ImageQuery image_query = 19; 678 679 // Maximum number of 680 // [Document][google.cloud.discoveryengine.v1alpha.Document]s to return. The 681 // maximum allowed value depends on the data type. Values above the maximum 682 // value are coerced to the maximum value. 683 // 684 // * Websites with basic indexing: Default `10`, Maximum `25`. 685 // * Websites with advanced indexing: Default `25`, Maximum `50`. 686 // * Other: Default `50`, Maximum `100`. 687 // 688 // If this field is negative, an `INVALID_ARGUMENT` is returned. 689 int32 page_size = 4; 690 691 // A page token received from a previous 692 // [SearchService.Search][google.cloud.discoveryengine.v1alpha.SearchService.Search] 693 // call. Provide this to retrieve the subsequent page. 694 // 695 // When paginating, all other parameters provided to 696 // [SearchService.Search][google.cloud.discoveryengine.v1alpha.SearchService.Search] 697 // must match the call that provided the page token. Otherwise, an 698 // `INVALID_ARGUMENT` error is returned. 699 string page_token = 5; 700 701 // A 0-indexed integer that specifies the current offset (that is, starting 702 // result location, amongst the 703 // [Document][google.cloud.discoveryengine.v1alpha.Document]s deemed by the 704 // API as relevant) in search results. This field is only considered if 705 // [page_token][google.cloud.discoveryengine.v1alpha.SearchRequest.page_token] 706 // is unset. 707 // 708 // If this field is negative, an `INVALID_ARGUMENT` is returned. 709 int32 offset = 6; 710 711 // A list of data store specs to apply on a search call. 712 repeated DataStoreSpec data_store_specs = 32; 713 714 // The filter syntax consists of an expression language for constructing a 715 // predicate from one or more fields of the documents being filtered. Filter 716 // expression is case-sensitive. 717 // 718 // If this field is unrecognizable, an `INVALID_ARGUMENT` is returned. 719 // 720 // Filtering in Vertex AI Search is done by mapping the LHS filter key to a 721 // key property defined in the Vertex AI Search backend -- this mapping is 722 // defined by the customer in their schema. For example a media customer might 723 // have a field 'name' in their schema. In this case the filter would look 724 // like this: filter --> name:'ANY("king kong")' 725 // 726 // For more information about filtering including syntax and filter 727 // operators, see 728 // [Filter](https://cloud.google.com/generative-ai-app-builder/docs/filter-search-metadata) 729 string filter = 7; 730 731 // The default filter that is applied when a user performs a search without 732 // checking any filters on the search page. 733 // 734 // The filter applied to every search request when quality improvement such as 735 // query expansion is needed. In the case a query does not have a sufficient 736 // amount of results this filter will be used to determine whether or not to 737 // enable the query expansion flow. The original filter will still be used for 738 // the query expanded search. 739 // This field is strongly recommended to achieve high search quality. 740 // 741 // For more information about filter syntax, see 742 // [SearchRequest.filter][google.cloud.discoveryengine.v1alpha.SearchRequest.filter]. 743 string canonical_filter = 29; 744 745 // The order in which documents are returned. Documents can be ordered by 746 // a field in an [Document][google.cloud.discoveryengine.v1alpha.Document] 747 // object. Leave it unset if ordered by relevance. `order_by` expression is 748 // case-sensitive. For more information on ordering, see 749 // [Ordering](https://cloud.google.com/retail/docs/filter-and-order#order) 750 // 751 // If this field is unrecognizable, an `INVALID_ARGUMENT` is returned. 752 string order_by = 8; 753 754 // Information about the end user. 755 // Highly recommended for analytics. 756 // [UserInfo.user_agent][google.cloud.discoveryengine.v1alpha.UserInfo.user_agent] 757 // is used to deduce `device_type` for analytics. 758 UserInfo user_info = 21; 759 760 // Facet specifications for faceted search. If empty, no facets are returned. 761 // 762 // A maximum of 100 values are allowed. Otherwise, an `INVALID_ARGUMENT` 763 // error is returned. 764 repeated FacetSpec facet_specs = 9; 765 766 // Boost specification to boost certain documents. 767 // For more information on boosting, see 768 // [Boosting](https://cloud.google.com/retail/docs/boosting#boost) 769 BoostSpec boost_spec = 10; 770 771 // Additional search parameters. 772 // 773 // For public website search only, supported values are: 774 // 775 // * `user_country_code`: string. Default empty. If set to non-empty, results 776 // are restricted or boosted based on the location provided. 777 // Example: 778 // user_country_code: "au" 779 // 780 // For available codes see [Country 781 // Codes](https://developers.google.com/custom-search/docs/json_api_reference#countryCodes) 782 // 783 // * `search_type`: double. Default empty. Enables non-webpage searching 784 // depending on the value. The only valid non-default value is 1, 785 // which enables image searching. 786 // Example: 787 // search_type: 1 788 map<string, google.protobuf.Value> params = 11; 789 790 // The query expansion specification that specifies the conditions under which 791 // query expansion occurs. 792 QueryExpansionSpec query_expansion_spec = 13; 793 794 // The spell correction specification that specifies the mode under 795 // which spell correction takes effect. 796 SpellCorrectionSpec spell_correction_spec = 14; 797 798 // A unique identifier for tracking visitors. For example, this could be 799 // implemented with an HTTP cookie, which should be able to uniquely identify 800 // a visitor on a single device. This unique identifier should not change if 801 // the visitor logs in or out of the website. 802 // 803 // This field should NOT have a fixed value such as `unknown_visitor`. 804 // 805 // This should be the same identifier as 806 // [UserEvent.user_pseudo_id][google.cloud.discoveryengine.v1alpha.UserEvent.user_pseudo_id] 807 // and 808 // [CompleteQueryRequest.user_pseudo_id][google.cloud.discoveryengine.v1alpha.CompleteQueryRequest.user_pseudo_id] 809 // 810 // The field must be a UTF-8 encoded string with a length limit of 128 811 // characters. Otherwise, an `INVALID_ARGUMENT` error is returned. 812 string user_pseudo_id = 15; 813 814 // A specification for configuring the behavior of content search. 815 ContentSearchSpec content_search_spec = 24; 816 817 // Uses the provided embedding to do additional semantic document retrieval. 818 // The retrieval is based on the dot product of 819 // [SearchRequest.EmbeddingSpec.EmbeddingVector.vector][google.cloud.discoveryengine.v1alpha.SearchRequest.EmbeddingSpec.EmbeddingVector.vector] 820 // and the document embedding that is provided in 821 // [SearchRequest.EmbeddingSpec.EmbeddingVector.field_path][google.cloud.discoveryengine.v1alpha.SearchRequest.EmbeddingSpec.EmbeddingVector.field_path]. 822 // 823 // If 824 // [SearchRequest.EmbeddingSpec.EmbeddingVector.field_path][google.cloud.discoveryengine.v1alpha.SearchRequest.EmbeddingSpec.EmbeddingVector.field_path] 825 // is not provided, it will use 826 // [ServingConfig.EmbeddingConfig.field_path][google.cloud.discoveryengine.v1alpha.ServingConfig.embedding_config]. 827 EmbeddingSpec embedding_spec = 23; 828 829 // The ranking expression controls the customized ranking on retrieval 830 // documents. This overrides 831 // [ServingConfig.ranking_expression][google.cloud.discoveryengine.v1alpha.ServingConfig.ranking_expression]. 832 // The ranking expression is a single function or multiple functions that are 833 // joint by "+". 834 // * ranking_expression = function, { " + ", function }; 835 // Supported functions: 836 // * double * relevance_score 837 // * double * dotProduct(embedding_field_path) 838 // Function variables: 839 // `relevance_score`: pre-defined keywords, used for measure relevance 840 // between query and document. 841 // `embedding_field_path`: the document embedding field 842 // used with query embedding vector. 843 // `dotProduct`: embedding function between embedding_field_path and query 844 // embedding vector. 845 // 846 // Example ranking expression: 847 // If document has an embedding field doc_embedding, the ranking expression 848 // could be `0.5 * relevance_score + 0.3 * dotProduct(doc_embedding)`. 849 string ranking_expression = 26; 850 851 // Whether to turn on safe search. This is only supported for 852 // website search. 853 bool safe_search = 20; 854 855 // The user labels applied to a resource must meet the following requirements: 856 // 857 // * Each resource can have multiple labels, up to a maximum of 64. 858 // * Each label must be a key-value pair. 859 // * Keys have a minimum length of 1 character and a maximum length of 63 860 // characters and cannot be empty. Values can be empty and have a maximum 861 // length of 63 characters. 862 // * Keys and values can contain only lowercase letters, numeric characters, 863 // underscores, and dashes. All characters must use UTF-8 encoding, and 864 // international characters are allowed. 865 // * The key portion of a label must be unique. However, you can use the same 866 // key with multiple resources. 867 // * Keys must start with a lowercase letter or international character. 868 // 869 // See [Google Cloud 870 // Document](https://cloud.google.com/resource-manager/docs/creating-managing-labels#requirements) 871 // for more details. 872 map<string, string> user_labels = 22; 873 874 // Custom fine tuning configs. 875 CustomFineTuningSpec custom_fine_tuning_spec = 34; 876} 877 878// Response message for 879// [SearchService.Search][google.cloud.discoveryengine.v1alpha.SearchService.Search] 880// method. 881message SearchResponse { 882 // Represents the search results. 883 message SearchResult { 884 // [Document.id][google.cloud.discoveryengine.v1alpha.Document.id] of the 885 // searched [Document][google.cloud.discoveryengine.v1alpha.Document]. 886 string id = 1; 887 888 // The document data snippet in the search response. Only fields that are 889 // marked as retrievable are populated. 890 Document document = 2; 891 892 // The chunk data in the search response if the 893 // [SearchRequest.ContentSearchSpec.search_result_mode][google.cloud.discoveryengine.v1alpha.SearchRequest.ContentSearchSpec.search_result_mode] 894 // is set to 895 // [CHUNKS][google.cloud.discoveryengine.v1alpha.SearchRequest.ContentSearchSpec.SearchResultMode.CHUNKS]. 896 Chunk chunk = 18; 897 898 // Google provided available scores. 899 map<string, DoubleList> model_scores = 4; 900 } 901 902 // A facet result. 903 message Facet { 904 // A facet value which contains value names and their count. 905 message FacetValue { 906 // A facet value which contains values. 907 oneof facet_value { 908 // Text value of a facet, such as "Black" for facet "colors". 909 string value = 1; 910 911 // Interval value for a facet, such as [10, 20) for facet "price". It 912 // matches 913 // [SearchRequest.FacetSpec.FacetKey.intervals][google.cloud.discoveryengine.v1alpha.SearchRequest.FacetSpec.FacetKey.intervals]. 914 Interval interval = 2; 915 } 916 917 // Number of items that have this facet value. 918 int64 count = 3; 919 } 920 921 // The key for this facet. E.g., "colors" or "price". It matches 922 // [SearchRequest.FacetSpec.FacetKey.key][google.cloud.discoveryengine.v1alpha.SearchRequest.FacetSpec.FacetKey.key]. 923 string key = 1; 924 925 // The facet values for this field. 926 repeated FacetValue values = 2; 927 928 // Whether the facet is dynamically generated. 929 bool dynamic_facet = 3; 930 } 931 932 // Guided search result. The guided search helps user to refine the search 933 // results and narrow down to the real needs from a broaded search results. 934 message GuidedSearchResult { 935 // Useful attribute for search result refinements. 936 message RefinementAttribute { 937 // Attribute key used to refine the results e.g. 'movie_type'. 938 string attribute_key = 1; 939 940 // Attribute value used to refine the results e.g. 'drama'. 941 string attribute_value = 2; 942 } 943 944 // A list of ranked refinement attributes. 945 repeated RefinementAttribute refinement_attributes = 1; 946 947 // Suggested follow-up questions. 948 repeated string follow_up_questions = 2; 949 } 950 951 // Summary of the top N search result specified by the summary spec. 952 message Summary { 953 // Safety Attribute categories and their associated confidence scores. 954 message SafetyAttributes { 955 // The display names of Safety Attribute categories associated with the 956 // generated content. Order matches the Scores. 957 repeated string categories = 1; 958 959 // The confidence scores of the each category, higher 960 // value means higher confidence. Order matches the Categories. 961 repeated float scores = 2; 962 } 963 964 // Citation metadata. 965 message CitationMetadata { 966 // Citations for segments. 967 repeated Citation citations = 1; 968 } 969 970 // Citation info for a segment. 971 message Citation { 972 // Index indicates the start of the segment, measured in bytes/unicode. 973 int64 start_index = 1; 974 975 // End of the attributed segment, exclusive. 976 int64 end_index = 2; 977 978 // Citation sources for the attributed segment. 979 repeated CitationSource sources = 3; 980 } 981 982 // Citation source. 983 message CitationSource { 984 // Document reference index from SummaryWithMetadata.references. 985 // It is 0-indexed and the value will be zero if the reference_index is 986 // not set explicitly. 987 int64 reference_index = 4; 988 } 989 990 // Document reference. 991 message Reference { 992 // Chunk content. 993 message ChunkContent { 994 // Chunk textual content. 995 string content = 1; 996 997 // Page identifier. 998 string page_identifier = 2; 999 } 1000 1001 // Title of the document. 1002 string title = 1; 1003 1004 // Required. 1005 // [Document.name][google.cloud.discoveryengine.v1alpha.Document.name] of 1006 // the document. Full resource name of the referenced document, in the 1007 // format 1008 // `projects/*/locations/*/collections/*/dataStores/*/branches/*/documents/*`. 1009 string document = 2 [ 1010 (google.api.field_behavior) = REQUIRED, 1011 (google.api.resource_reference) = { 1012 type: "discoveryengine.googleapis.com/Document" 1013 } 1014 ]; 1015 1016 // Cloud Storage or HTTP uri for the document. 1017 string uri = 3; 1018 1019 // List of cited chunk contents derived from document content. 1020 repeated ChunkContent chunk_contents = 4; 1021 } 1022 1023 // Summary with metadata information. 1024 message SummaryWithMetadata { 1025 // Summary text with no citation information. 1026 string summary = 1; 1027 1028 // Citation metadata for given summary. 1029 CitationMetadata citation_metadata = 2; 1030 1031 // Document References. 1032 repeated Reference references = 3; 1033 } 1034 1035 // An Enum for summary-skipped reasons. 1036 enum SummarySkippedReason { 1037 // Default value. The summary skipped reason is not specified. 1038 SUMMARY_SKIPPED_REASON_UNSPECIFIED = 0; 1039 1040 // The adversarial query ignored case. 1041 // 1042 // Only populated when 1043 // [SummarySpec.ignore_adversarial_query][google.cloud.discoveryengine.v1alpha.SearchRequest.ContentSearchSpec.SummarySpec.ignore_adversarial_query] 1044 // is set to `true`. 1045 ADVERSARIAL_QUERY_IGNORED = 1; 1046 1047 // The non-summary seeking query ignored case. 1048 // 1049 // Only populated when 1050 // [SummarySpec.ignore_non_summary_seeking_query][google.cloud.discoveryengine.v1alpha.SearchRequest.ContentSearchSpec.SummarySpec.ignore_non_summary_seeking_query] 1051 // is set to `true`. 1052 NON_SUMMARY_SEEKING_QUERY_IGNORED = 2; 1053 1054 // The out-of-domain query ignored case. 1055 // 1056 // Google skips the summary if there are no high-relevance search results. 1057 // For example, the data store contains facts about company A but the 1058 // user query is asking questions about company B. 1059 OUT_OF_DOMAIN_QUERY_IGNORED = 3; 1060 1061 // The potential policy violation case. 1062 // 1063 // Google skips the summary if there is a potential policy violation 1064 // detected. This includes content that may be violent or toxic. 1065 POTENTIAL_POLICY_VIOLATION = 4; 1066 1067 // The LLM addon not enabled case. 1068 // 1069 // Google skips the summary if the LLM addon is not enabled. 1070 LLM_ADDON_NOT_ENABLED = 5; 1071 } 1072 1073 // The summary content. 1074 string summary_text = 1; 1075 1076 // Additional summary-skipped reasons. This provides the reason for ignored 1077 // cases. If nothing is skipped, this field is not set. 1078 repeated SummarySkippedReason summary_skipped_reasons = 2; 1079 1080 // A collection of Safety Attribute categories and their associated 1081 // confidence scores. 1082 SafetyAttributes safety_attributes = 3; 1083 1084 // Summary with metadata information. 1085 SummaryWithMetadata summary_with_metadata = 4; 1086 } 1087 1088 // Debug information specifically related to forward geocoding issues arising 1089 // from Geolocation Search. 1090 message GeoSearchDebugInfo { 1091 // The address from which forward geocoding ingestion produced issues. 1092 string original_address_query = 1; 1093 1094 // The error produced. 1095 string error_message = 2; 1096 } 1097 1098 // Information describing query expansion including whether expansion has 1099 // occurred. 1100 message QueryExpansionInfo { 1101 // Bool describing whether query expansion has occurred. 1102 bool expanded_query = 1; 1103 1104 // Number of pinned results. This field will only be set when expansion 1105 // happens and 1106 // [SearchRequest.QueryExpansionSpec.pin_unexpanded_results][google.cloud.discoveryengine.v1alpha.SearchRequest.QueryExpansionSpec.pin_unexpanded_results] 1107 // is set to true. 1108 int64 pinned_result_count = 2; 1109 } 1110 1111 // A list of matched documents. The order represents the ranking. 1112 repeated SearchResult results = 1; 1113 1114 // Results of facets requested by user. 1115 repeated Facet facets = 2; 1116 1117 // Guided search result. 1118 GuidedSearchResult guided_search_result = 8; 1119 1120 // The estimated total count of matched items irrespective of pagination. The 1121 // count of 1122 // [results][google.cloud.discoveryengine.v1alpha.SearchResponse.results] 1123 // returned by pagination may be less than the 1124 // [total_size][google.cloud.discoveryengine.v1alpha.SearchResponse.total_size] 1125 // that matches. 1126 int32 total_size = 3; 1127 1128 // A unique search token. This should be included in the 1129 // [UserEvent][google.cloud.discoveryengine.v1alpha.UserEvent] logs resulting 1130 // from this search, which enables accurate attribution of search model 1131 // performance. 1132 string attribution_token = 4; 1133 1134 // The URI of a customer-defined redirect page. If redirect action is 1135 // triggered, no search is performed, and only 1136 // [redirect_uri][google.cloud.discoveryengine.v1alpha.SearchResponse.redirect_uri] 1137 // and 1138 // [attribution_token][google.cloud.discoveryengine.v1alpha.SearchResponse.attribution_token] 1139 // are set in the response. 1140 string redirect_uri = 12; 1141 1142 // A token that can be sent as 1143 // [SearchRequest.page_token][google.cloud.discoveryengine.v1alpha.SearchRequest.page_token] 1144 // to retrieve the next page. If this field is omitted, there are no 1145 // subsequent pages. 1146 string next_page_token = 5; 1147 1148 // Contains the spell corrected query, if found. If the spell correction type 1149 // is AUTOMATIC, then the search results are based on corrected_query. 1150 // Otherwise the original query is used for search. 1151 string corrected_query = 7; 1152 1153 // A summary as part of the search results. 1154 // This field is only returned if 1155 // [SearchRequest.ContentSearchSpec.summary_spec][google.cloud.discoveryengine.v1alpha.SearchRequest.ContentSearchSpec.summary_spec] 1156 // is set. 1157 Summary summary = 9; 1158 1159 // Controls applied as part of the Control service. 1160 repeated string applied_controls = 10; 1161 1162 repeated GeoSearchDebugInfo geo_search_debug_info = 16; 1163 1164 // Query expansion information for the returned results. 1165 QueryExpansionInfo query_expansion_info = 14; 1166} 1167