xref: /aosp_15_r20/external/googleapis/google/cloud/discoveryengine/v1alpha/search_service.proto (revision d5c09012810ac0c9f33fe448fb6da8260d444cc9)
1// Copyright 2022 Google LLC
2//
3// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at
6//
7//     http://www.apache.org/licenses/LICENSE-2.0
8//
9// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS,
11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12// See the License for the specific language governing permissions and
13// limitations under the License.
14
15syntax = "proto3";
16
17package google.cloud.discoveryengine.v1alpha;
18
19import "google/api/annotations.proto";
20import "google/api/client.proto";
21import "google/api/field_behavior.proto";
22import "google/api/resource.proto";
23import "google/cloud/discoveryengine/v1alpha/chunk.proto";
24import "google/cloud/discoveryengine/v1alpha/common.proto";
25import "google/cloud/discoveryengine/v1alpha/document.proto";
26import "google/protobuf/struct.proto";
27
28option csharp_namespace = "Google.Cloud.DiscoveryEngine.V1Alpha";
29option go_package = "cloud.google.com/go/discoveryengine/apiv1alpha/discoveryenginepb;discoveryenginepb";
30option java_multiple_files = true;
31option java_outer_classname = "SearchServiceProto";
32option java_package = "com.google.cloud.discoveryengine.v1alpha";
33option objc_class_prefix = "DISCOVERYENGINE";
34option php_namespace = "Google\\Cloud\\DiscoveryEngine\\V1alpha";
35option ruby_package = "Google::Cloud::DiscoveryEngine::V1alpha";
36
37// Service for search.
38service SearchService {
39  option (google.api.default_host) = "discoveryengine.googleapis.com";
40  option (google.api.oauth_scopes) =
41      "https://www.googleapis.com/auth/cloud-platform";
42
43  // Performs a search.
44  rpc Search(SearchRequest) returns (SearchResponse) {
45    option (google.api.http) = {
46      post: "/v1alpha/{serving_config=projects/*/locations/*/dataStores/*/servingConfigs/*}:search"
47      body: "*"
48      additional_bindings {
49        post: "/v1alpha/{serving_config=projects/*/locations/*/collections/*/dataStores/*/servingConfigs/*}:search"
50        body: "*"
51      }
52      additional_bindings {
53        post: "/v1alpha/{serving_config=projects/*/locations/*/collections/*/engines/*/servingConfigs/*}:search"
54        body: "*"
55      }
56    };
57  }
58}
59
60// Request message for
61// [SearchService.Search][google.cloud.discoveryengine.v1alpha.SearchService.Search]
62// method.
63message SearchRequest {
64  // Specifies the image query input.
65  message ImageQuery {
66    oneof image {
67      // Base64 encoded image bytes. Supported image formats: JPEG, PNG, and
68      // BMP.
69      string image_bytes = 1;
70    }
71  }
72
73  // A struct to define data stores to filter on in a search call.
74  message DataStoreSpec {
75    // Required. Full resource name of
76    // [DataStore][google.cloud.discoveryengine.v1alpha.DataStore], such as
77    // `projects/{project}/locations/{location}/collections/{collection_id}/dataStores/{data_store_id}`.
78    string data_store = 1 [
79      (google.api.field_behavior) = REQUIRED,
80      (google.api.resource_reference) = {
81        type: "discoveryengine.googleapis.com/DataStore"
82      }
83    ];
84  }
85
86  // A facet specification to perform faceted search.
87  message FacetSpec {
88    // Specifies how a facet is computed.
89    message FacetKey {
90      // Required. Supported textual and numerical facet keys in
91      // [Document][google.cloud.discoveryengine.v1alpha.Document] object, over
92      // which the facet values are computed. Facet key is case-sensitive.
93      string key = 1 [(google.api.field_behavior) = REQUIRED];
94
95      // Set only if values should be bucketed into intervals. Must be set
96      // for facets with numerical values. Must not be set for facet with text
97      // values. Maximum number of intervals is 30.
98      repeated Interval intervals = 2;
99
100      // Only get facet for the given restricted values. Only supported on
101      // textual fields. For example, suppose "category" has three values
102      // "Action > 2022", "Action > 2021" and "Sci-Fi > 2022". If set
103      // "restricted_values" to "Action > 2022", the "category" facet only
104      // contains "Action > 2022". Only supported on textual fields. Maximum
105      // is 10.
106      repeated string restricted_values = 3;
107
108      // Only get facet values that start with the given string prefix. For
109      // example, suppose "category" has three values "Action > 2022",
110      // "Action > 2021" and "Sci-Fi > 2022". If set "prefixes" to "Action", the
111      // "category" facet only contains "Action > 2022" and "Action > 2021".
112      // Only supported on textual fields. Maximum is 10.
113      repeated string prefixes = 4;
114
115      // Only get facet values that contains the given strings. For example,
116      // suppose "category" has three values "Action > 2022",
117      // "Action > 2021" and "Sci-Fi > 2022". If set "contains" to "2022", the
118      // "category" facet only contains "Action > 2022" and "Sci-Fi > 2022".
119      // Only supported on textual fields. Maximum is 10.
120      repeated string contains = 5;
121
122      // True to make facet keys case insensitive when getting faceting
123      // values with prefixes or contains; false otherwise.
124      bool case_insensitive = 6;
125
126      // The order in which documents are returned.
127      //
128      // Allowed values are:
129      //
130      // * "count desc", which means order by
131      // [SearchResponse.Facet.values.count][google.cloud.discoveryengine.v1alpha.SearchResponse.Facet.FacetValue.count]
132      // descending.
133      //
134      // * "value desc", which means order by
135      // [SearchResponse.Facet.values.value][google.cloud.discoveryengine.v1alpha.SearchResponse.Facet.FacetValue.value]
136      // descending.
137      //   Only applies to textual facets.
138      //
139      // If not set, textual values are sorted in [natural
140      // order](https://en.wikipedia.org/wiki/Natural_sort_order); numerical
141      // intervals are sorted in the order given by
142      // [FacetSpec.FacetKey.intervals][google.cloud.discoveryengine.v1alpha.SearchRequest.FacetSpec.FacetKey.intervals].
143      string order_by = 7;
144    }
145
146    // Required. The facet key specification.
147    FacetKey facet_key = 1 [(google.api.field_behavior) = REQUIRED];
148
149    // Maximum of facet values that should be returned for this facet. If
150    // unspecified, defaults to 20. The maximum allowed value is 300. Values
151    // above 300 are coerced to 300.
152    //
153    // If this field is negative, an  `INVALID_ARGUMENT`  is returned.
154    int32 limit = 2;
155
156    // List of keys to exclude when faceting.
157    //
158    //
159    // By default,
160    // [FacetKey.key][google.cloud.discoveryengine.v1alpha.SearchRequest.FacetSpec.FacetKey.key]
161    // is not excluded from the filter unless it is listed in this field.
162    //
163    // Listing a facet key in this field allows its values to appear as facet
164    // results, even when they are filtered out of search results. Using this
165    // field does not affect what search results are returned.
166    //
167    // For example, suppose there are 100 documents with the color facet "Red"
168    // and 200 documents with the color facet "Blue". A query containing the
169    // filter "color:ANY("Red")" and having "color" as
170    // [FacetKey.key][google.cloud.discoveryengine.v1alpha.SearchRequest.FacetSpec.FacetKey.key]
171    // would by default return only "Red" documents in the search results, and
172    // also return "Red" with count 100 as the only color facet. Although there
173    // are also blue documents available, "Blue" would not be shown as an
174    // available facet value.
175    //
176    // If "color" is listed in "excludedFilterKeys", then the query returns the
177    // facet values "Red" with count 100 and "Blue" with count 200, because the
178    // "color" key is now excluded from the filter. Because this field doesn't
179    // affect search results, the search results are still correctly filtered to
180    // return only "Red" documents.
181    //
182    // A maximum of 100 values are allowed. Otherwise, an  `INVALID_ARGUMENT`
183    // error is returned.
184    repeated string excluded_filter_keys = 3;
185
186    // Enables dynamic position for this facet. If set to true, the position of
187    // this facet among all facets in the response is determined automatically.
188    // If dynamic facets are enabled, it is ordered together.
189    // If set to false, the position of this facet in the
190    // response is the same as in the request, and it is ranked before
191    // the facets with dynamic position enable and all dynamic facets.
192    //
193    // For example, you may always want to have rating facet returned in
194    // the response, but it's not necessarily to always display the rating facet
195    // at the top. In that case, you can set enable_dynamic_position to true so
196    // that the position of rating facet in response is determined
197    // automatically.
198    //
199    // Another example, assuming you have the following facets in the request:
200    //
201    // * "rating", enable_dynamic_position = true
202    //
203    // * "price", enable_dynamic_position = false
204    //
205    // * "brands", enable_dynamic_position = false
206    //
207    // And also you have a dynamic facets enabled, which generates a facet
208    // `gender`. Then the final order of the facets in the response can be
209    // ("price", "brands", "rating", "gender") or ("price", "brands", "gender",
210    // "rating") depends on how API orders "gender" and "rating" facets.
211    // However, notice that "price" and "brands" are always
212    // ranked at first and second position because their enable_dynamic_position
213    // is false.
214    bool enable_dynamic_position = 4;
215  }
216
217  // Boost specification to boost certain documents.
218  message BoostSpec {
219    // Boost applies to documents which match a condition.
220    message ConditionBoostSpec {
221      // Specification for custom ranking based on customer specified attribute
222      // value. It provides more controls for customized ranking than the simple
223      // (condition, boost) combination above.
224      message BoostControlSpec {
225        // The control points used to define the curve. The curve defined
226        // through these control points can only be monotonically increasing
227        // or decreasing(constant values are acceptable).
228        message ControlPoint {
229          // Can be one of:
230          // 1. The numerical field value.
231          // 2. The duration spec for freshness:
232          // The value must be formatted as an XSD `dayTimeDuration` value (a
233          // restricted subset of an ISO 8601 duration value). The pattern for
234          // this is: `[nD][T[nH][nM][nS]]`.
235          string attribute_value = 1;
236
237          // The value between -1 to 1 by which to boost the score if the
238          // attribute_value evaluates to the value specified above.
239          float boost_amount = 2;
240        }
241
242        // The attribute(or function) for which the custom ranking is to be
243        // applied.
244        enum AttributeType {
245          // Unspecified AttributeType.
246          ATTRIBUTE_TYPE_UNSPECIFIED = 0;
247
248          // The value of the numerical field will be used to dynamically update
249          // the boost amount. In this case, the attribute_value (the x value)
250          // of the control point will be the actual value of the numerical
251          // field for which the boost_amount is specified.
252          NUMERICAL = 1;
253
254          // For the freshness use case the attribute value will be the duration
255          // between the current time and the date in the datetime field
256          // specified. The value must be formatted as an XSD `dayTimeDuration`
257          // value (a restricted subset of an ISO 8601 duration value). The
258          // pattern for this is: `[nD][T[nH][nM][nS]]`.
259          // E.g. `5D`, `3DT12H30M`, `T24H`.
260          FRESHNESS = 2;
261        }
262
263        // The interpolation type to be applied. Default will be linear
264        // (Piecewise Linear).
265        enum InterpolationType {
266          // Interpolation type is unspecified. In this case, it defaults to
267          // Linear.
268          INTERPOLATION_TYPE_UNSPECIFIED = 0;
269
270          // Piecewise linear interpolation will be applied.
271          LINEAR = 1;
272        }
273
274        // The name of the field whose value will be used to determine the
275        // boost amount.
276        string field_name = 1;
277
278        // The attribute type to be used to determine the boost amount. The
279        // attribute value can be derived from the field value of the specified
280        // field_name. In the case of numerical it is straightforward i.e.
281        // attribute_value = numerical_field_value. In the case of freshness
282        // however, attribute_value = (time.now() - datetime_field_value).
283        AttributeType attribute_type = 2;
284
285        // The interpolation type to be applied to connect the control points
286        // listed below.
287        InterpolationType interpolation_type = 3;
288
289        // The control points used to define the curve. The monotonic function
290        // (defined through the interpolation_type above) passes through the
291        // control points listed here.
292        repeated ControlPoint control_points = 4;
293      }
294
295      // An expression which specifies a boost condition. The syntax and
296      // supported fields are the same as a filter expression. See
297      // [SearchRequest.filter][google.cloud.discoveryengine.v1alpha.SearchRequest.filter]
298      // for detail syntax and limitations.
299      //
300      // Examples:
301      //
302      // * To boost documents with document ID "doc_1" or "doc_2", and
303      // color "Red" or "Blue":
304      // `(document_id: ANY("doc_1", "doc_2")) AND (color: ANY("Red", "Blue"))`
305      string condition = 1;
306
307      // Strength of the condition boost, which should be in [-1, 1]. Negative
308      // boost means demotion. Default is 0.0.
309      //
310      // Setting to 1.0 gives the document a big promotion. However, it does
311      // not necessarily mean that the boosted document will be the top result
312      // at all times, nor that other documents will be excluded. Results
313      // could still be shown even when none of them matches the condition.
314      // And results that are significantly more relevant to the search query
315      // can still trump your heavily favored but irrelevant documents.
316      //
317      // Setting to -1.0 gives the document a big demotion. However, results
318      // that are deeply relevant might still be shown. The document will have
319      // an upstream battle to get a fairly high ranking, but it is not
320      // blocked out completely.
321      //
322      // Setting to 0.0 means no boost applied. The boosting condition is
323      // ignored. Only one of the (condition, boost) combination or the
324      // boost_control_spec below are set. If both are set then the global boost
325      // is ignored and the more fine-grained boost_control_spec is applied.
326      float boost = 2;
327
328      // Complex specification for custom ranking based on customer defined
329      // attribute value.
330      BoostControlSpec boost_control_spec = 3;
331    }
332
333    // Condition boost specifications. If a document matches multiple conditions
334    // in the specifictions, boost scores from these specifications are all
335    // applied and combined in a non-linear way. Maximum number of
336    // specifications is 20.
337    repeated ConditionBoostSpec condition_boost_specs = 1;
338  }
339
340  // Specification to determine under which conditions query expansion should
341  // occur.
342  message QueryExpansionSpec {
343    // Enum describing under which condition query expansion should occur.
344    enum Condition {
345      // Unspecified query expansion condition. In this case, server behavior
346      // defaults to
347      // [Condition.DISABLED][google.cloud.discoveryengine.v1alpha.SearchRequest.QueryExpansionSpec.Condition.DISABLED].
348      CONDITION_UNSPECIFIED = 0;
349
350      // Disabled query expansion. Only the exact search query is used, even if
351      // [SearchResponse.total_size][google.cloud.discoveryengine.v1alpha.SearchResponse.total_size]
352      // is zero.
353      DISABLED = 1;
354
355      // Automatic query expansion built by the Search API.
356      AUTO = 2;
357    }
358
359    // The condition under which query expansion should occur. Default to
360    // [Condition.DISABLED][google.cloud.discoveryengine.v1alpha.SearchRequest.QueryExpansionSpec.Condition.DISABLED].
361    Condition condition = 1;
362
363    // Whether to pin unexpanded results. If this field is set to true,
364    // unexpanded products are always at the top of the search results, followed
365    // by the expanded results.
366    bool pin_unexpanded_results = 2;
367  }
368
369  // The specification for query spell correction.
370  message SpellCorrectionSpec {
371    // Enum describing under which mode spell correction should occur.
372    enum Mode {
373      // Unspecified spell correction mode. In this case, server behavior
374      // defaults to
375      // [Mode.AUTO][google.cloud.discoveryengine.v1alpha.SearchRequest.SpellCorrectionSpec.Mode.AUTO].
376      MODE_UNSPECIFIED = 0;
377
378      // Search API will try to find a spell suggestion if there
379      // is any and put in the
380      // [SearchResponse.corrected_query][google.cloud.discoveryengine.v1alpha.SearchResponse.corrected_query].
381      // The spell suggestion will not be used as the search query.
382      SUGGESTION_ONLY = 1;
383
384      // Automatic spell correction built by the Search API. Search will
385      // be based on the corrected query if found.
386      AUTO = 2;
387    }
388
389    // The mode under which spell correction should take effect to
390    // replace the original search query. Default to
391    // [Mode.AUTO][google.cloud.discoveryengine.v1alpha.SearchRequest.SpellCorrectionSpec.Mode.AUTO].
392    Mode mode = 1;
393  }
394
395  // A specification for configuring the behavior of content search.
396  message ContentSearchSpec {
397    // A specification for configuring snippets in a search response.
398    message SnippetSpec {
399      // [DEPRECATED] This field is deprecated. To control snippet return, use
400      // `return_snippet` field. For backwards compatibility, we will return
401      // snippet if max_snippet_count > 0.
402      int32 max_snippet_count = 1 [deprecated = true];
403
404      // [DEPRECATED] This field is deprecated and will have no affect on the
405      // snippet.
406      bool reference_only = 2 [deprecated = true];
407
408      // If `true`, then return snippet. If no snippet can be generated, we
409      // return "No snippet is available for this page." A `snippet_status` with
410      // `SUCCESS` or `NO_SNIPPET_AVAILABLE` will also be returned.
411      bool return_snippet = 3;
412    }
413
414    // A specification for configuring a summary returned in a search
415    // response.
416    message SummarySpec {
417      // Specification of the prompt to use with the model.
418      message ModelPromptSpec {
419        // Text at the beginning of the prompt that instructs the assistant.
420        // Examples are available in the user guide.
421        string preamble = 1;
422      }
423
424      // Specification of the model.
425      message ModelSpec {
426        // The model version used to generate the summary.
427        //
428        // Supported values are:
429        //
430        // * `stable`: string. Default value when no value is specified. Uses a
431        //    generally available, fine-tuned model. For more information, see
432        //    [Answer generation model versions and
433        //    lifecycle](https://cloud.google.com/generative-ai-app-builder/docs/answer-generation-models).
434        // * `preview`: string. (Public preview) Uses a preview model. For more
435        //    information, see
436        //    [Answer generation model versions and
437        //    lifecycle](https://cloud.google.com/generative-ai-app-builder/docs/answer-generation-models).
438        string version = 1;
439      }
440
441      // The number of top results to generate the summary from. If the number
442      // of results returned is less than `summaryResultCount`, the summary is
443      // generated from all of the results.
444      //
445      // At most 10 results for documents mode, or 50 for chunks mode, can be
446      // used to generate a summary. The chunks mode is used when
447      // [SearchRequest.ContentSearchSpec.search_result_mode][google.cloud.discoveryengine.v1alpha.SearchRequest.ContentSearchSpec.search_result_mode]
448      // is set to
449      // [CHUNKS][google.cloud.discoveryengine.v1alpha.SearchRequest.ContentSearchSpec.SearchResultMode.CHUNKS].
450      int32 summary_result_count = 1;
451
452      // Specifies whether to include citations in the summary. The default
453      // value is `false`.
454      //
455      // When this field is set to `true`, summaries include in-line citation
456      // numbers.
457      //
458      // Example summary including citations:
459      //
460      // BigQuery is Google Cloud's fully managed and completely serverless
461      // enterprise data warehouse [1]. BigQuery supports all data types, works
462      // across clouds, and has built-in machine learning and business
463      // intelligence, all within a unified platform [2, 3].
464      //
465      // The citation numbers refer to the returned search results and are
466      // 1-indexed. For example, [1] means that the sentence is attributed to
467      // the first search result. [2, 3] means that the sentence is attributed
468      // to both the second and third search results.
469      bool include_citations = 2;
470
471      // Specifies whether to filter out adversarial queries. The default value
472      // is `false`.
473      //
474      // Google employs search-query classification to detect adversarial
475      // queries. No summary is returned if the search query is classified as an
476      // adversarial query. For example, a user might ask a question regarding
477      // negative comments about the company or submit a query designed to
478      // generate unsafe, policy-violating output. If this field is set to
479      // `true`, we skip generating summaries for adversarial queries and return
480      // fallback messages instead.
481      bool ignore_adversarial_query = 3;
482
483      // Specifies whether to filter out queries that are not summary-seeking.
484      // The default value is `false`.
485      //
486      // Google employs search-query classification to detect summary-seeking
487      // queries. No summary is returned if the search query is classified as a
488      // non-summary seeking query. For example, `why is the sky blue` and `Who
489      // is the best soccer player in the world?` are summary-seeking queries,
490      // but `SFO airport` and `world cup 2026` are not. They are most likely
491      // navigational queries. If this field is set to `true`, we skip
492      // generating summaries for non-summary seeking queries and return
493      // fallback messages instead.
494      bool ignore_non_summary_seeking_query = 4;
495
496      // If specified, the spec will be used to modify the prompt provided to
497      // the LLM.
498      ModelPromptSpec model_prompt_spec = 5;
499
500      // Language code for Summary. Use language tags defined by
501      // [BCP47](https://www.rfc-editor.org/rfc/bcp/bcp47.txt).
502      // Note: This is an experimental feature.
503      string language_code = 6;
504
505      // If specified, the spec will be used to modify the model specification
506      // provided to the LLM.
507      ModelSpec model_spec = 7;
508
509      // If true, answer will be generated from most relevant chunks from top
510      // search results. This feature will improve summary quality.
511      // Note that with this feature enabled, not all top search results
512      // will be referenced and included in the reference list, so the citation
513      // source index only points to the search results listed in the reference
514      // list.
515      bool use_semantic_chunks = 8;
516    }
517
518    // A specification for configuring the extractive content in a search
519    // response.
520    message ExtractiveContentSpec {
521      // The maximum number of extractive answers returned in each search
522      // result.
523      //
524      // An extractive answer is a verbatim answer extracted from the original
525      // document, which provides a precise and contextually relevant answer to
526      // the search query.
527      //
528      // If the number of matching answers is less than the
529      // `max_extractive_answer_count`, return all of the answers. Otherwise,
530      // return the `max_extractive_answer_count`.
531      //
532      // At most five answers are returned for each
533      // [SearchResult][google.cloud.discoveryengine.v1alpha.SearchResponse.SearchResult].
534      int32 max_extractive_answer_count = 1;
535
536      // The max number of extractive segments returned in each search result.
537      // Only applied if the
538      // [DataStore][google.cloud.discoveryengine.v1alpha.DataStore] is set to
539      // [DataStore.ContentConfig.CONTENT_REQUIRED][google.cloud.discoveryengine.v1alpha.DataStore.ContentConfig.CONTENT_REQUIRED]
540      // or
541      // [DataStore.solution_types][google.cloud.discoveryengine.v1alpha.DataStore.solution_types]
542      // is
543      // [SOLUTION_TYPE_CHAT][google.cloud.discoveryengine.v1alpha.SolutionType.SOLUTION_TYPE_CHAT].
544      //
545      // An extractive segment is a text segment extracted from the original
546      // document that is relevant to the search query, and, in general, more
547      // verbose than an extractive answer. The segment could then be used as
548      // input for LLMs to generate summaries and answers.
549      //
550      // If the number of matching segments is less than
551      // `max_extractive_segment_count`, return all of the segments. Otherwise,
552      // return the `max_extractive_segment_count`.
553      int32 max_extractive_segment_count = 2;
554
555      // Specifies whether to return the confidence score from the extractive
556      // segments in each search result. This feature is available only for new
557      // or allowlisted data stores. To allowlist your data store,
558      // contact your Customer Engineer. The default value is `false`.
559      bool return_extractive_segment_score = 3;
560
561      // Specifies whether to also include the adjacent from each selected
562      // segments.
563      // Return at most `num_previous_segments` segments before each selected
564      // segments.
565      int32 num_previous_segments = 4;
566
567      // Return at most `num_next_segments` segments after each selected
568      // segments.
569      int32 num_next_segments = 5;
570    }
571
572    // Specifies the chunk spec to be returned from the search response.
573    // Only available if the
574    // [SearchRequest.ContentSearchSpec.search_result_mode][google.cloud.discoveryengine.v1alpha.SearchRequest.ContentSearchSpec.search_result_mode]
575    // is set to
576    // [CHUNKS][google.cloud.discoveryengine.v1alpha.SearchRequest.ContentSearchSpec.SearchResultMode.CHUNKS]
577    message ChunkSpec {
578      // The number of previous chunks to be returned of the current chunk. The
579      // maximum allowed value is 3.
580      // If not specified, no previous chunks will be returned.
581      int32 num_previous_chunks = 1;
582
583      // The number of next chunks to be returned of the current chunk. The
584      // maximum allowed value is 3.
585      // If not specified, no next chunks will be returned.
586      int32 num_next_chunks = 2;
587    }
588
589    // Specifies the search result mode. If unspecified, the
590    // search result mode is based on
591    // [DataStore.DocumentProcessingConfig.chunking_config][]:
592    // * If [DataStore.DocumentProcessingConfig.chunking_config][] is specified,
593    //   it defaults to `CHUNKS`.
594    // * Otherwise, it defaults to `DOCUMENTS`.
595    enum SearchResultMode {
596      // Default value.
597      SEARCH_RESULT_MODE_UNSPECIFIED = 0;
598
599      // Returns documents in the search result.
600      DOCUMENTS = 1;
601
602      // Returns chunks in the search result. Only available if the
603      // [DataStore.DocumentProcessingConfig.chunking_config][] is specified.
604      CHUNKS = 2;
605    }
606
607    // If `snippetSpec` is not specified, snippets are not included in the
608    // search response.
609    SnippetSpec snippet_spec = 1;
610
611    // If `summarySpec` is not specified, summaries are not included in the
612    // search response.
613    SummarySpec summary_spec = 2;
614
615    // If there is no extractive_content_spec provided, there will be no
616    // extractive answer in the search response.
617    ExtractiveContentSpec extractive_content_spec = 3;
618
619    // Specifies the search result mode. If unspecified, the
620    // search result mode is based on
621    // [DataStore.DocumentProcessingConfig.chunking_config][]:
622    // * If [DataStore.DocumentProcessingConfig.chunking_config][] is specified,
623    //   it defaults to `CHUNKS`.
624    // * Otherwise, it defaults to `DOCUMENTS`.
625    SearchResultMode search_result_mode = 4;
626
627    // Specifies the chunk spec to be returned from the search response.
628    // Only available if the
629    // [SearchRequest.ContentSearchSpec.search_result_mode][google.cloud.discoveryengine.v1alpha.SearchRequest.ContentSearchSpec.search_result_mode]
630    // is set to
631    // [CHUNKS][google.cloud.discoveryengine.v1alpha.SearchRequest.ContentSearchSpec.SearchResultMode.CHUNKS]
632    ChunkSpec chunk_spec = 5;
633  }
634
635  // The specification that uses customized query embedding vector to do
636  // semantic document retrieval.
637  message EmbeddingSpec {
638    // Embedding vector.
639    message EmbeddingVector {
640      // Embedding field path in schema.
641      string field_path = 1;
642
643      // Query embedding vector.
644      repeated float vector = 2;
645    }
646
647    // The embedding vector used for retrieval. Limit to 1.
648    repeated EmbeddingVector embedding_vectors = 1;
649  }
650
651  // Required. The resource name of the Search serving config, such as
652  // `projects/*/locations/global/collections/default_collection/engines/*/servingConfigs/default_serving_config`,
653  // or
654  // `projects/*/locations/global/collections/default_collection/dataStores/default_data_store/servingConfigs/default_serving_config`.
655  // This field is used to identify the serving configuration name, set
656  // of models used to make the search.
657  string serving_config = 1 [
658    (google.api.field_behavior) = REQUIRED,
659    (google.api.resource_reference) = {
660      type: "discoveryengine.googleapis.com/ServingConfig"
661    }
662  ];
663
664  // The branch resource name, such as
665  // `projects/*/locations/global/collections/default_collection/dataStores/default_data_store/branches/0`.
666  //
667  // Use `default_branch` as the branch ID or leave this field empty, to search
668  // documents under the default branch.
669  string branch = 2 [(google.api.resource_reference) = {
670    type: "discoveryengine.googleapis.com/Branch"
671  }];
672
673  // Raw search query.
674  string query = 3;
675
676  // Raw image query.
677  ImageQuery image_query = 19;
678
679  // Maximum number of
680  // [Document][google.cloud.discoveryengine.v1alpha.Document]s to return. The
681  // maximum allowed value depends on the data type. Values above the maximum
682  // value are coerced to the maximum value.
683  //
684  // * Websites with basic indexing: Default `10`, Maximum `25`.
685  // * Websites with advanced indexing: Default `25`, Maximum `50`.
686  // * Other: Default `50`, Maximum `100`.
687  //
688  // If this field is negative, an  `INVALID_ARGUMENT` is returned.
689  int32 page_size = 4;
690
691  // A page token received from a previous
692  // [SearchService.Search][google.cloud.discoveryengine.v1alpha.SearchService.Search]
693  // call. Provide this to retrieve the subsequent page.
694  //
695  // When paginating, all other parameters provided to
696  // [SearchService.Search][google.cloud.discoveryengine.v1alpha.SearchService.Search]
697  // must match the call that provided the page token. Otherwise, an
698  //  `INVALID_ARGUMENT`  error is returned.
699  string page_token = 5;
700
701  // A 0-indexed integer that specifies the current offset (that is, starting
702  // result location, amongst the
703  // [Document][google.cloud.discoveryengine.v1alpha.Document]s deemed by the
704  // API as relevant) in search results. This field is only considered if
705  // [page_token][google.cloud.discoveryengine.v1alpha.SearchRequest.page_token]
706  // is unset.
707  //
708  // If this field is negative, an  `INVALID_ARGUMENT`  is returned.
709  int32 offset = 6;
710
711  // A list of data store specs to apply on a search call.
712  repeated DataStoreSpec data_store_specs = 32;
713
714  // The filter syntax consists of an expression language for constructing a
715  // predicate from one or more fields of the documents being filtered. Filter
716  // expression is case-sensitive.
717  //
718  // If this field is unrecognizable, an  `INVALID_ARGUMENT`  is returned.
719  //
720  // Filtering in Vertex AI Search is done by mapping the LHS filter key to a
721  // key property defined in the Vertex AI Search backend -- this mapping is
722  // defined by the customer in their schema. For example a media customer might
723  // have a field 'name' in their schema. In this case the filter would look
724  // like this: filter --> name:'ANY("king kong")'
725  //
726  // For more information about filtering including syntax and filter
727  // operators, see
728  // [Filter](https://cloud.google.com/generative-ai-app-builder/docs/filter-search-metadata)
729  string filter = 7;
730
731  // The default filter that is applied when a user performs a search without
732  // checking any filters on the search page.
733  //
734  // The filter applied to every search request when quality improvement such as
735  // query expansion is needed. In the case a query does not have a sufficient
736  // amount of results this filter will be used to determine whether or not to
737  // enable the query expansion flow. The original filter will still be used for
738  // the query expanded search.
739  // This field is strongly recommended to achieve high search quality.
740  //
741  // For more information about filter syntax, see
742  // [SearchRequest.filter][google.cloud.discoveryengine.v1alpha.SearchRequest.filter].
743  string canonical_filter = 29;
744
745  // The order in which documents are returned. Documents can be ordered by
746  // a field in an [Document][google.cloud.discoveryengine.v1alpha.Document]
747  // object. Leave it unset if ordered by relevance. `order_by` expression is
748  // case-sensitive. For more information on ordering, see
749  // [Ordering](https://cloud.google.com/retail/docs/filter-and-order#order)
750  //
751  // If this field is unrecognizable, an `INVALID_ARGUMENT` is returned.
752  string order_by = 8;
753
754  // Information about the end user.
755  // Highly recommended for analytics.
756  // [UserInfo.user_agent][google.cloud.discoveryengine.v1alpha.UserInfo.user_agent]
757  // is used to deduce `device_type` for analytics.
758  UserInfo user_info = 21;
759
760  // Facet specifications for faceted search. If empty, no facets are returned.
761  //
762  // A maximum of 100 values are allowed. Otherwise, an  `INVALID_ARGUMENT`
763  // error is returned.
764  repeated FacetSpec facet_specs = 9;
765
766  // Boost specification to boost certain documents.
767  // For more information on boosting, see
768  // [Boosting](https://cloud.google.com/retail/docs/boosting#boost)
769  BoostSpec boost_spec = 10;
770
771  // Additional search parameters.
772  //
773  // For public website search only, supported values are:
774  //
775  // * `user_country_code`: string. Default empty. If set to non-empty, results
776  //    are restricted or boosted based on the location provided.
777  //    Example:
778  //    user_country_code: "au"
779  //
780  //    For available codes see [Country
781  //    Codes](https://developers.google.com/custom-search/docs/json_api_reference#countryCodes)
782  //
783  // * `search_type`: double. Default empty. Enables non-webpage searching
784  //    depending on the value. The only valid non-default value is 1,
785  //    which enables image searching.
786  //    Example:
787  //    search_type: 1
788  map<string, google.protobuf.Value> params = 11;
789
790  // The query expansion specification that specifies the conditions under which
791  // query expansion occurs.
792  QueryExpansionSpec query_expansion_spec = 13;
793
794  // The spell correction specification that specifies the mode under
795  // which spell correction takes effect.
796  SpellCorrectionSpec spell_correction_spec = 14;
797
798  // A unique identifier for tracking visitors. For example, this could be
799  // implemented with an HTTP cookie, which should be able to uniquely identify
800  // a visitor on a single device. This unique identifier should not change if
801  // the visitor logs in or out of the website.
802  //
803  // This field should NOT have a fixed value such as `unknown_visitor`.
804  //
805  // This should be the same identifier as
806  // [UserEvent.user_pseudo_id][google.cloud.discoveryengine.v1alpha.UserEvent.user_pseudo_id]
807  // and
808  // [CompleteQueryRequest.user_pseudo_id][google.cloud.discoveryengine.v1alpha.CompleteQueryRequest.user_pseudo_id]
809  //
810  // The field must be a UTF-8 encoded string with a length limit of 128
811  // characters. Otherwise, an  `INVALID_ARGUMENT`  error is returned.
812  string user_pseudo_id = 15;
813
814  // A specification for configuring the behavior of content search.
815  ContentSearchSpec content_search_spec = 24;
816
817  // Uses the provided embedding to do additional semantic document retrieval.
818  // The retrieval is based on the dot product of
819  // [SearchRequest.EmbeddingSpec.EmbeddingVector.vector][google.cloud.discoveryengine.v1alpha.SearchRequest.EmbeddingSpec.EmbeddingVector.vector]
820  // and the document embedding that is provided in
821  // [SearchRequest.EmbeddingSpec.EmbeddingVector.field_path][google.cloud.discoveryengine.v1alpha.SearchRequest.EmbeddingSpec.EmbeddingVector.field_path].
822  //
823  // If
824  // [SearchRequest.EmbeddingSpec.EmbeddingVector.field_path][google.cloud.discoveryengine.v1alpha.SearchRequest.EmbeddingSpec.EmbeddingVector.field_path]
825  // is not provided, it will use
826  // [ServingConfig.EmbeddingConfig.field_path][google.cloud.discoveryengine.v1alpha.ServingConfig.embedding_config].
827  EmbeddingSpec embedding_spec = 23;
828
829  // The ranking expression controls the customized ranking on retrieval
830  // documents. This overrides
831  // [ServingConfig.ranking_expression][google.cloud.discoveryengine.v1alpha.ServingConfig.ranking_expression].
832  // The ranking expression is a single function or multiple functions that are
833  // joint by "+".
834  //   * ranking_expression = function, { " + ", function };
835  // Supported functions:
836  //   * double * relevance_score
837  //   * double * dotProduct(embedding_field_path)
838  // Function variables:
839  //   `relevance_score`: pre-defined keywords, used for measure relevance
840  //   between query and document.
841  //   `embedding_field_path`: the document embedding field
842  //   used with query embedding vector.
843  //   `dotProduct`: embedding function between embedding_field_path and query
844  //   embedding vector.
845  //
846  //  Example ranking expression:
847  //    If document has an embedding field doc_embedding, the ranking expression
848  //    could be `0.5 * relevance_score + 0.3 * dotProduct(doc_embedding)`.
849  string ranking_expression = 26;
850
851  // Whether to turn on safe search. This is only supported for
852  // website search.
853  bool safe_search = 20;
854
855  // The user labels applied to a resource must meet the following requirements:
856  //
857  // * Each resource can have multiple labels, up to a maximum of 64.
858  // * Each label must be a key-value pair.
859  // * Keys have a minimum length of 1 character and a maximum length of 63
860  //   characters and cannot be empty. Values can be empty and have a maximum
861  //   length of 63 characters.
862  // * Keys and values can contain only lowercase letters, numeric characters,
863  //   underscores, and dashes. All characters must use UTF-8 encoding, and
864  //   international characters are allowed.
865  // * The key portion of a label must be unique. However, you can use the same
866  //   key with multiple resources.
867  // * Keys must start with a lowercase letter or international character.
868  //
869  // See [Google Cloud
870  // Document](https://cloud.google.com/resource-manager/docs/creating-managing-labels#requirements)
871  // for more details.
872  map<string, string> user_labels = 22;
873
874  // Custom fine tuning configs.
875  CustomFineTuningSpec custom_fine_tuning_spec = 34;
876}
877
878// Response message for
879// [SearchService.Search][google.cloud.discoveryengine.v1alpha.SearchService.Search]
880// method.
881message SearchResponse {
882  // Represents the search results.
883  message SearchResult {
884    // [Document.id][google.cloud.discoveryengine.v1alpha.Document.id] of the
885    // searched [Document][google.cloud.discoveryengine.v1alpha.Document].
886    string id = 1;
887
888    // The document data snippet in the search response. Only fields that are
889    // marked as retrievable are populated.
890    Document document = 2;
891
892    // The chunk data in the search response if the
893    // [SearchRequest.ContentSearchSpec.search_result_mode][google.cloud.discoveryengine.v1alpha.SearchRequest.ContentSearchSpec.search_result_mode]
894    // is set to
895    // [CHUNKS][google.cloud.discoveryengine.v1alpha.SearchRequest.ContentSearchSpec.SearchResultMode.CHUNKS].
896    Chunk chunk = 18;
897
898    // Google provided available scores.
899    map<string, DoubleList> model_scores = 4;
900  }
901
902  // A facet result.
903  message Facet {
904    // A facet value which contains value names and their count.
905    message FacetValue {
906      // A facet value which contains values.
907      oneof facet_value {
908        // Text value of a facet, such as "Black" for facet "colors".
909        string value = 1;
910
911        // Interval value for a facet, such as [10, 20) for facet "price". It
912        // matches
913        // [SearchRequest.FacetSpec.FacetKey.intervals][google.cloud.discoveryengine.v1alpha.SearchRequest.FacetSpec.FacetKey.intervals].
914        Interval interval = 2;
915      }
916
917      // Number of items that have this facet value.
918      int64 count = 3;
919    }
920
921    // The key for this facet. E.g., "colors" or "price". It matches
922    // [SearchRequest.FacetSpec.FacetKey.key][google.cloud.discoveryengine.v1alpha.SearchRequest.FacetSpec.FacetKey.key].
923    string key = 1;
924
925    // The facet values for this field.
926    repeated FacetValue values = 2;
927
928    // Whether the facet is dynamically generated.
929    bool dynamic_facet = 3;
930  }
931
932  // Guided search result. The guided search helps user to refine the search
933  // results and narrow down to the real needs from a broaded search results.
934  message GuidedSearchResult {
935    // Useful attribute for search result refinements.
936    message RefinementAttribute {
937      // Attribute key used to refine the results e.g. 'movie_type'.
938      string attribute_key = 1;
939
940      // Attribute value used to refine the results e.g. 'drama'.
941      string attribute_value = 2;
942    }
943
944    // A list of ranked refinement attributes.
945    repeated RefinementAttribute refinement_attributes = 1;
946
947    // Suggested follow-up questions.
948    repeated string follow_up_questions = 2;
949  }
950
951  // Summary of the top N search result specified by the summary spec.
952  message Summary {
953    // Safety Attribute categories and their associated confidence scores.
954    message SafetyAttributes {
955      // The display names of Safety Attribute categories associated with the
956      // generated content. Order matches the Scores.
957      repeated string categories = 1;
958
959      // The confidence scores of the each category, higher
960      // value means higher confidence. Order matches the Categories.
961      repeated float scores = 2;
962    }
963
964    // Citation metadata.
965    message CitationMetadata {
966      // Citations for segments.
967      repeated Citation citations = 1;
968    }
969
970    // Citation info for a segment.
971    message Citation {
972      // Index indicates the start of the segment, measured in bytes/unicode.
973      int64 start_index = 1;
974
975      // End of the attributed segment, exclusive.
976      int64 end_index = 2;
977
978      // Citation sources for the attributed segment.
979      repeated CitationSource sources = 3;
980    }
981
982    // Citation source.
983    message CitationSource {
984      // Document reference index from SummaryWithMetadata.references.
985      // It is 0-indexed and the value will be zero if the reference_index is
986      // not set explicitly.
987      int64 reference_index = 4;
988    }
989
990    // Document reference.
991    message Reference {
992      // Chunk content.
993      message ChunkContent {
994        // Chunk textual content.
995        string content = 1;
996
997        // Page identifier.
998        string page_identifier = 2;
999      }
1000
1001      // Title of the document.
1002      string title = 1;
1003
1004      // Required.
1005      // [Document.name][google.cloud.discoveryengine.v1alpha.Document.name] of
1006      // the document. Full resource name of the referenced document, in the
1007      // format
1008      // `projects/*/locations/*/collections/*/dataStores/*/branches/*/documents/*`.
1009      string document = 2 [
1010        (google.api.field_behavior) = REQUIRED,
1011        (google.api.resource_reference) = {
1012          type: "discoveryengine.googleapis.com/Document"
1013        }
1014      ];
1015
1016      // Cloud Storage or HTTP uri for the document.
1017      string uri = 3;
1018
1019      // List of cited chunk contents derived from document content.
1020      repeated ChunkContent chunk_contents = 4;
1021    }
1022
1023    // Summary with metadata information.
1024    message SummaryWithMetadata {
1025      // Summary text with no citation information.
1026      string summary = 1;
1027
1028      // Citation metadata for given summary.
1029      CitationMetadata citation_metadata = 2;
1030
1031      // Document References.
1032      repeated Reference references = 3;
1033    }
1034
1035    // An Enum for summary-skipped reasons.
1036    enum SummarySkippedReason {
1037      // Default value. The summary skipped reason is not specified.
1038      SUMMARY_SKIPPED_REASON_UNSPECIFIED = 0;
1039
1040      // The adversarial query ignored case.
1041      //
1042      // Only populated when
1043      // [SummarySpec.ignore_adversarial_query][google.cloud.discoveryengine.v1alpha.SearchRequest.ContentSearchSpec.SummarySpec.ignore_adversarial_query]
1044      // is set to `true`.
1045      ADVERSARIAL_QUERY_IGNORED = 1;
1046
1047      // The non-summary seeking query ignored case.
1048      //
1049      // Only populated when
1050      // [SummarySpec.ignore_non_summary_seeking_query][google.cloud.discoveryengine.v1alpha.SearchRequest.ContentSearchSpec.SummarySpec.ignore_non_summary_seeking_query]
1051      // is set to `true`.
1052      NON_SUMMARY_SEEKING_QUERY_IGNORED = 2;
1053
1054      // The out-of-domain query ignored case.
1055      //
1056      // Google skips the summary if there are no high-relevance search results.
1057      // For example, the data store contains facts about company A but the
1058      // user query is asking questions about company B.
1059      OUT_OF_DOMAIN_QUERY_IGNORED = 3;
1060
1061      // The potential policy violation case.
1062      //
1063      // Google skips the summary if there is a potential policy violation
1064      // detected. This includes content that may be violent or toxic.
1065      POTENTIAL_POLICY_VIOLATION = 4;
1066
1067      // The LLM addon not enabled case.
1068      //
1069      // Google skips the summary if the LLM addon is not enabled.
1070      LLM_ADDON_NOT_ENABLED = 5;
1071    }
1072
1073    // The summary content.
1074    string summary_text = 1;
1075
1076    // Additional summary-skipped reasons. This provides the reason for ignored
1077    // cases. If nothing is skipped, this field is not set.
1078    repeated SummarySkippedReason summary_skipped_reasons = 2;
1079
1080    // A collection of Safety Attribute categories and their associated
1081    // confidence scores.
1082    SafetyAttributes safety_attributes = 3;
1083
1084    // Summary with metadata information.
1085    SummaryWithMetadata summary_with_metadata = 4;
1086  }
1087
1088  // Debug information specifically related to forward geocoding issues arising
1089  // from Geolocation Search.
1090  message GeoSearchDebugInfo {
1091    // The address from which forward geocoding ingestion produced issues.
1092    string original_address_query = 1;
1093
1094    // The error produced.
1095    string error_message = 2;
1096  }
1097
1098  // Information describing query expansion including whether expansion has
1099  // occurred.
1100  message QueryExpansionInfo {
1101    // Bool describing whether query expansion has occurred.
1102    bool expanded_query = 1;
1103
1104    // Number of pinned results. This field will only be set when expansion
1105    // happens and
1106    // [SearchRequest.QueryExpansionSpec.pin_unexpanded_results][google.cloud.discoveryengine.v1alpha.SearchRequest.QueryExpansionSpec.pin_unexpanded_results]
1107    // is set to true.
1108    int64 pinned_result_count = 2;
1109  }
1110
1111  // A list of matched documents. The order represents the ranking.
1112  repeated SearchResult results = 1;
1113
1114  // Results of facets requested by user.
1115  repeated Facet facets = 2;
1116
1117  // Guided search result.
1118  GuidedSearchResult guided_search_result = 8;
1119
1120  // The estimated total count of matched items irrespective of pagination. The
1121  // count of
1122  // [results][google.cloud.discoveryengine.v1alpha.SearchResponse.results]
1123  // returned by pagination may be less than the
1124  // [total_size][google.cloud.discoveryengine.v1alpha.SearchResponse.total_size]
1125  // that matches.
1126  int32 total_size = 3;
1127
1128  // A unique search token. This should be included in the
1129  // [UserEvent][google.cloud.discoveryengine.v1alpha.UserEvent] logs resulting
1130  // from this search, which enables accurate attribution of search model
1131  // performance.
1132  string attribution_token = 4;
1133
1134  // The URI of a customer-defined redirect page. If redirect action is
1135  // triggered, no search is performed, and only
1136  // [redirect_uri][google.cloud.discoveryengine.v1alpha.SearchResponse.redirect_uri]
1137  // and
1138  // [attribution_token][google.cloud.discoveryengine.v1alpha.SearchResponse.attribution_token]
1139  // are set in the response.
1140  string redirect_uri = 12;
1141
1142  // A token that can be sent as
1143  // [SearchRequest.page_token][google.cloud.discoveryengine.v1alpha.SearchRequest.page_token]
1144  // to retrieve the next page. If this field is omitted, there are no
1145  // subsequent pages.
1146  string next_page_token = 5;
1147
1148  // Contains the spell corrected query, if found. If the spell correction type
1149  // is AUTOMATIC, then the search results are based on corrected_query.
1150  // Otherwise the original query is used for search.
1151  string corrected_query = 7;
1152
1153  // A summary as part of the search results.
1154  // This field is only returned if
1155  // [SearchRequest.ContentSearchSpec.summary_spec][google.cloud.discoveryengine.v1alpha.SearchRequest.ContentSearchSpec.summary_spec]
1156  // is set.
1157  Summary summary = 9;
1158
1159  // Controls applied as part of the Control service.
1160  repeated string applied_controls = 10;
1161
1162  repeated GeoSearchDebugInfo geo_search_debug_info = 16;
1163
1164  // Query expansion information for the returned results.
1165  QueryExpansionInfo query_expansion_info = 14;
1166}
1167