xref: /aosp_15_r20/external/googleapis/google/ai/generativelanguage/v1beta/retriever.proto (revision d5c09012810ac0c9f33fe448fb6da8260d444cc9)
1*d5c09012SAndroid Build Coastguard Worker// Copyright 2023 Google LLC
2*d5c09012SAndroid Build Coastguard Worker//
3*d5c09012SAndroid Build Coastguard Worker// Licensed under the Apache License, Version 2.0 (the "License");
4*d5c09012SAndroid Build Coastguard Worker// you may not use this file except in compliance with the License.
5*d5c09012SAndroid Build Coastguard Worker// You may obtain a copy of the License at
6*d5c09012SAndroid Build Coastguard Worker//
7*d5c09012SAndroid Build Coastguard Worker//     http://www.apache.org/licenses/LICENSE-2.0
8*d5c09012SAndroid Build Coastguard Worker//
9*d5c09012SAndroid Build Coastguard Worker// Unless required by applicable law or agreed to in writing, software
10*d5c09012SAndroid Build Coastguard Worker// distributed under the License is distributed on an "AS IS" BASIS,
11*d5c09012SAndroid Build Coastguard Worker// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12*d5c09012SAndroid Build Coastguard Worker// See the License for the specific language governing permissions and
13*d5c09012SAndroid Build Coastguard Worker// limitations under the License.
14*d5c09012SAndroid Build Coastguard Worker
15*d5c09012SAndroid Build Coastguard Workersyntax = "proto3";
16*d5c09012SAndroid Build Coastguard Worker
17*d5c09012SAndroid Build Coastguard Workerpackage google.ai.generativelanguage.v1beta;
18*d5c09012SAndroid Build Coastguard Worker
19*d5c09012SAndroid Build Coastguard Workerimport "google/api/field_behavior.proto";
20*d5c09012SAndroid Build Coastguard Workerimport "google/api/resource.proto";
21*d5c09012SAndroid Build Coastguard Workerimport "google/protobuf/timestamp.proto";
22*d5c09012SAndroid Build Coastguard Worker
23*d5c09012SAndroid Build Coastguard Workeroption go_package = "cloud.google.com/go/ai/generativelanguage/apiv1beta/generativelanguagepb;generativelanguagepb";
24*d5c09012SAndroid Build Coastguard Workeroption java_multiple_files = true;
25*d5c09012SAndroid Build Coastguard Workeroption java_outer_classname = "RetrieverProto";
26*d5c09012SAndroid Build Coastguard Workeroption java_package = "com.google.ai.generativelanguage.v1beta";
27*d5c09012SAndroid Build Coastguard Worker
28*d5c09012SAndroid Build Coastguard Worker// A `Corpus` is a collection of `Document`s.
29*d5c09012SAndroid Build Coastguard Worker// A project can create up to 5 corpora.
30*d5c09012SAndroid Build Coastguard Workermessage Corpus {
31*d5c09012SAndroid Build Coastguard Worker  option (google.api.resource) = {
32*d5c09012SAndroid Build Coastguard Worker    type: "generativelanguage.googleapis.com/Corpus"
33*d5c09012SAndroid Build Coastguard Worker    pattern: "corpora/{corpus}"
34*d5c09012SAndroid Build Coastguard Worker    plural: "corpora"
35*d5c09012SAndroid Build Coastguard Worker    singular: "corpus"
36*d5c09012SAndroid Build Coastguard Worker  };
37*d5c09012SAndroid Build Coastguard Worker
38*d5c09012SAndroid Build Coastguard Worker  // Immutable. Identifier. The `Corpus` resource name. The ID (name excluding
39*d5c09012SAndroid Build Coastguard Worker  // the "corpora/" prefix) can contain up to 40 characters that are lowercase
40*d5c09012SAndroid Build Coastguard Worker  // alphanumeric or dashes
41*d5c09012SAndroid Build Coastguard Worker  // (-). The ID cannot start or end with a dash. If the name is empty on
42*d5c09012SAndroid Build Coastguard Worker  // create, a unique name will be derived from `display_name` along with a 12
43*d5c09012SAndroid Build Coastguard Worker  // character random suffix.
44*d5c09012SAndroid Build Coastguard Worker  // Example: `corpora/my-awesome-corpora-123a456b789c`
45*d5c09012SAndroid Build Coastguard Worker  string name = 1 [
46*d5c09012SAndroid Build Coastguard Worker    (google.api.field_behavior) = IDENTIFIER,
47*d5c09012SAndroid Build Coastguard Worker    (google.api.field_behavior) = IMMUTABLE
48*d5c09012SAndroid Build Coastguard Worker  ];
49*d5c09012SAndroid Build Coastguard Worker
50*d5c09012SAndroid Build Coastguard Worker  // Optional. The human-readable display name for the `Corpus`. The display
51*d5c09012SAndroid Build Coastguard Worker  // name must be no more than 512 characters in length, including spaces.
52*d5c09012SAndroid Build Coastguard Worker  // Example: "Docs on Semantic Retriever"
53*d5c09012SAndroid Build Coastguard Worker  string display_name = 2 [(google.api.field_behavior) = OPTIONAL];
54*d5c09012SAndroid Build Coastguard Worker
55*d5c09012SAndroid Build Coastguard Worker  // Output only. The Timestamp of when the `Corpus` was created.
56*d5c09012SAndroid Build Coastguard Worker  google.protobuf.Timestamp create_time = 3
57*d5c09012SAndroid Build Coastguard Worker      [(google.api.field_behavior) = OUTPUT_ONLY];
58*d5c09012SAndroid Build Coastguard Worker
59*d5c09012SAndroid Build Coastguard Worker  // Output only. The Timestamp of when the `Corpus` was last updated.
60*d5c09012SAndroid Build Coastguard Worker  google.protobuf.Timestamp update_time = 4
61*d5c09012SAndroid Build Coastguard Worker      [(google.api.field_behavior) = OUTPUT_ONLY];
62*d5c09012SAndroid Build Coastguard Worker}
63*d5c09012SAndroid Build Coastguard Worker
64*d5c09012SAndroid Build Coastguard Worker// A `Document` is a collection of `Chunk`s.
65*d5c09012SAndroid Build Coastguard Worker// A `Corpus` can have a maximum of 10,000 `Document`s.
66*d5c09012SAndroid Build Coastguard Workermessage Document {
67*d5c09012SAndroid Build Coastguard Worker  option (google.api.resource) = {
68*d5c09012SAndroid Build Coastguard Worker    type: "generativelanguage.googleapis.com/Document"
69*d5c09012SAndroid Build Coastguard Worker    pattern: "corpora/{corpus}/documents/{document}"
70*d5c09012SAndroid Build Coastguard Worker    plural: "documents"
71*d5c09012SAndroid Build Coastguard Worker    singular: "document"
72*d5c09012SAndroid Build Coastguard Worker  };
73*d5c09012SAndroid Build Coastguard Worker
74*d5c09012SAndroid Build Coastguard Worker  // Immutable. Identifier. The `Document` resource name. The ID (name excluding
75*d5c09012SAndroid Build Coastguard Worker  // the "corpora/*/documents/" prefix) can contain up to 40 characters that are
76*d5c09012SAndroid Build Coastguard Worker  // lowercase alphanumeric or dashes (-). The ID cannot start or end with a
77*d5c09012SAndroid Build Coastguard Worker  // dash. If the name is empty on create, a unique name will be derived from
78*d5c09012SAndroid Build Coastguard Worker  // `display_name` along with a 12 character random suffix.
79*d5c09012SAndroid Build Coastguard Worker  // Example: `corpora/{corpus_id}/documents/my-awesome-doc-123a456b789c`
80*d5c09012SAndroid Build Coastguard Worker  string name = 1 [
81*d5c09012SAndroid Build Coastguard Worker    (google.api.field_behavior) = IDENTIFIER,
82*d5c09012SAndroid Build Coastguard Worker    (google.api.field_behavior) = IMMUTABLE
83*d5c09012SAndroid Build Coastguard Worker  ];
84*d5c09012SAndroid Build Coastguard Worker
85*d5c09012SAndroid Build Coastguard Worker  // Optional. The human-readable display name for the `Document`. The display
86*d5c09012SAndroid Build Coastguard Worker  // name must be no more than 512 characters in length, including spaces.
87*d5c09012SAndroid Build Coastguard Worker  // Example: "Semantic Retriever Documentation"
88*d5c09012SAndroid Build Coastguard Worker  string display_name = 2 [(google.api.field_behavior) = OPTIONAL];
89*d5c09012SAndroid Build Coastguard Worker
90*d5c09012SAndroid Build Coastguard Worker  // Optional. User provided custom metadata stored as key-value pairs used for
91*d5c09012SAndroid Build Coastguard Worker  // querying. A `Document` can have a maximum of 20 `CustomMetadata`.
92*d5c09012SAndroid Build Coastguard Worker  repeated CustomMetadata custom_metadata = 3
93*d5c09012SAndroid Build Coastguard Worker      [(google.api.field_behavior) = OPTIONAL];
94*d5c09012SAndroid Build Coastguard Worker
95*d5c09012SAndroid Build Coastguard Worker  // Output only. The Timestamp of when the `Document` was last updated.
96*d5c09012SAndroid Build Coastguard Worker  google.protobuf.Timestamp update_time = 4
97*d5c09012SAndroid Build Coastguard Worker      [(google.api.field_behavior) = OUTPUT_ONLY];
98*d5c09012SAndroid Build Coastguard Worker
99*d5c09012SAndroid Build Coastguard Worker  // Output only. The Timestamp of when the `Document` was created.
100*d5c09012SAndroid Build Coastguard Worker  google.protobuf.Timestamp create_time = 5
101*d5c09012SAndroid Build Coastguard Worker      [(google.api.field_behavior) = OUTPUT_ONLY];
102*d5c09012SAndroid Build Coastguard Worker}
103*d5c09012SAndroid Build Coastguard Worker
104*d5c09012SAndroid Build Coastguard Worker// User provided string values assigned to a single metadata key.
105*d5c09012SAndroid Build Coastguard Workermessage StringList {
106*d5c09012SAndroid Build Coastguard Worker  // The string values of the metadata to store.
107*d5c09012SAndroid Build Coastguard Worker  repeated string values = 1;
108*d5c09012SAndroid Build Coastguard Worker}
109*d5c09012SAndroid Build Coastguard Worker
110*d5c09012SAndroid Build Coastguard Worker// User provided metadata stored as key-value pairs.
111*d5c09012SAndroid Build Coastguard Workermessage CustomMetadata {
112*d5c09012SAndroid Build Coastguard Worker  oneof value {
113*d5c09012SAndroid Build Coastguard Worker    // The string value of the metadata to store.
114*d5c09012SAndroid Build Coastguard Worker    string string_value = 2;
115*d5c09012SAndroid Build Coastguard Worker
116*d5c09012SAndroid Build Coastguard Worker    // The StringList value of the metadata to store.
117*d5c09012SAndroid Build Coastguard Worker    StringList string_list_value = 6;
118*d5c09012SAndroid Build Coastguard Worker
119*d5c09012SAndroid Build Coastguard Worker    // The numeric value of the metadata to store.
120*d5c09012SAndroid Build Coastguard Worker    float numeric_value = 7;
121*d5c09012SAndroid Build Coastguard Worker  }
122*d5c09012SAndroid Build Coastguard Worker
123*d5c09012SAndroid Build Coastguard Worker  // Required. The key of the metadata to store.
124*d5c09012SAndroid Build Coastguard Worker  string key = 1 [(google.api.field_behavior) = REQUIRED];
125*d5c09012SAndroid Build Coastguard Worker}
126*d5c09012SAndroid Build Coastguard Worker
127*d5c09012SAndroid Build Coastguard Worker// User provided filter to limit retrieval based on `Chunk` or `Document` level
128*d5c09012SAndroid Build Coastguard Worker// metadata values.
129*d5c09012SAndroid Build Coastguard Worker// Example (genre = drama OR genre = action):
130*d5c09012SAndroid Build Coastguard Worker//   key = "document.custom_metadata.genre"
131*d5c09012SAndroid Build Coastguard Worker//   conditions = [{string_value = "drama", operation = EQUAL},
132*d5c09012SAndroid Build Coastguard Worker//                 {string_value = "action", operation = EQUAL}]
133*d5c09012SAndroid Build Coastguard Workermessage MetadataFilter {
134*d5c09012SAndroid Build Coastguard Worker  // Required. The key of the metadata to filter on.
135*d5c09012SAndroid Build Coastguard Worker  string key = 1 [(google.api.field_behavior) = REQUIRED];
136*d5c09012SAndroid Build Coastguard Worker
137*d5c09012SAndroid Build Coastguard Worker  // Required. The `Condition`s for the given key that will trigger this filter.
138*d5c09012SAndroid Build Coastguard Worker  // Multiple `Condition`s are joined by logical ORs.
139*d5c09012SAndroid Build Coastguard Worker  repeated Condition conditions = 2 [(google.api.field_behavior) = REQUIRED];
140*d5c09012SAndroid Build Coastguard Worker}
141*d5c09012SAndroid Build Coastguard Worker
142*d5c09012SAndroid Build Coastguard Worker// Filter condition applicable to a single key.
143*d5c09012SAndroid Build Coastguard Workermessage Condition {
144*d5c09012SAndroid Build Coastguard Worker  // Defines the valid operators that can be applied to a key-value pair.
145*d5c09012SAndroid Build Coastguard Worker  enum Operator {
146*d5c09012SAndroid Build Coastguard Worker    // The default value. This value is unused.
147*d5c09012SAndroid Build Coastguard Worker    OPERATOR_UNSPECIFIED = 0;
148*d5c09012SAndroid Build Coastguard Worker
149*d5c09012SAndroid Build Coastguard Worker    // Supported by numeric.
150*d5c09012SAndroid Build Coastguard Worker    LESS = 1;
151*d5c09012SAndroid Build Coastguard Worker
152*d5c09012SAndroid Build Coastguard Worker    // Supported by numeric.
153*d5c09012SAndroid Build Coastguard Worker    LESS_EQUAL = 2;
154*d5c09012SAndroid Build Coastguard Worker
155*d5c09012SAndroid Build Coastguard Worker    // Supported by numeric & string.
156*d5c09012SAndroid Build Coastguard Worker    EQUAL = 3;
157*d5c09012SAndroid Build Coastguard Worker
158*d5c09012SAndroid Build Coastguard Worker    // Supported by numeric.
159*d5c09012SAndroid Build Coastguard Worker    GREATER_EQUAL = 4;
160*d5c09012SAndroid Build Coastguard Worker
161*d5c09012SAndroid Build Coastguard Worker    // Supported by numeric.
162*d5c09012SAndroid Build Coastguard Worker    GREATER = 5;
163*d5c09012SAndroid Build Coastguard Worker
164*d5c09012SAndroid Build Coastguard Worker    // Supported by numeric & string.
165*d5c09012SAndroid Build Coastguard Worker    NOT_EQUAL = 6;
166*d5c09012SAndroid Build Coastguard Worker
167*d5c09012SAndroid Build Coastguard Worker    // Supported by string only when `CustomMetadata` value type for the given
168*d5c09012SAndroid Build Coastguard Worker    // key has a `string_list_value`.
169*d5c09012SAndroid Build Coastguard Worker    INCLUDES = 7;
170*d5c09012SAndroid Build Coastguard Worker
171*d5c09012SAndroid Build Coastguard Worker    // Supported by string only when `CustomMetadata` value type for the given
172*d5c09012SAndroid Build Coastguard Worker    // key has a `string_list_value`.
173*d5c09012SAndroid Build Coastguard Worker    EXCLUDES = 8;
174*d5c09012SAndroid Build Coastguard Worker  }
175*d5c09012SAndroid Build Coastguard Worker
176*d5c09012SAndroid Build Coastguard Worker  // The value type must be consistent with the value type defined in the field
177*d5c09012SAndroid Build Coastguard Worker  // for the corresponding key. If the value types are not consistent, the
178*d5c09012SAndroid Build Coastguard Worker  // result will be an empty set. When the `CustomMetadata` has a `StringList`
179*d5c09012SAndroid Build Coastguard Worker  // value type, the filtering condition should use `string_value` paired with
180*d5c09012SAndroid Build Coastguard Worker  // an INCLUDES/EXCLUDES operation, otherwise the result will also be an empty
181*d5c09012SAndroid Build Coastguard Worker  // set.
182*d5c09012SAndroid Build Coastguard Worker  oneof value {
183*d5c09012SAndroid Build Coastguard Worker    // The string value to filter the metadata on.
184*d5c09012SAndroid Build Coastguard Worker    string string_value = 1;
185*d5c09012SAndroid Build Coastguard Worker
186*d5c09012SAndroid Build Coastguard Worker    // The numeric value to filter the metadata on.
187*d5c09012SAndroid Build Coastguard Worker    float numeric_value = 6;
188*d5c09012SAndroid Build Coastguard Worker  }
189*d5c09012SAndroid Build Coastguard Worker
190*d5c09012SAndroid Build Coastguard Worker  // Required. Operator applied to the given key-value pair to trigger the
191*d5c09012SAndroid Build Coastguard Worker  // condition.
192*d5c09012SAndroid Build Coastguard Worker  Operator operation = 5 [(google.api.field_behavior) = REQUIRED];
193*d5c09012SAndroid Build Coastguard Worker}
194*d5c09012SAndroid Build Coastguard Worker
195*d5c09012SAndroid Build Coastguard Worker// A `Chunk` is a subpart of a `Document` that is treated as an independent unit
196*d5c09012SAndroid Build Coastguard Worker// for the purposes of vector representation and storage.
197*d5c09012SAndroid Build Coastguard Worker// A `Corpus` can have a maximum of 1 million `Chunk`s.
198*d5c09012SAndroid Build Coastguard Workermessage Chunk {
199*d5c09012SAndroid Build Coastguard Worker  option (google.api.resource) = {
200*d5c09012SAndroid Build Coastguard Worker    type: "generativelanguage.googleapis.com/Chunk"
201*d5c09012SAndroid Build Coastguard Worker    pattern: "corpora/{corpus}/documents/{document}/chunks/{chunk}"
202*d5c09012SAndroid Build Coastguard Worker    plural: "chunks"
203*d5c09012SAndroid Build Coastguard Worker    singular: "chunk"
204*d5c09012SAndroid Build Coastguard Worker  };
205*d5c09012SAndroid Build Coastguard Worker
206*d5c09012SAndroid Build Coastguard Worker  // States for the lifecycle of a `Chunk`.
207*d5c09012SAndroid Build Coastguard Worker  enum State {
208*d5c09012SAndroid Build Coastguard Worker    // The default value. This value is used if the state is omitted.
209*d5c09012SAndroid Build Coastguard Worker    STATE_UNSPECIFIED = 0;
210*d5c09012SAndroid Build Coastguard Worker
211*d5c09012SAndroid Build Coastguard Worker    // `Chunk` is being processed (embedding and vector storage).
212*d5c09012SAndroid Build Coastguard Worker    STATE_PENDING_PROCESSING = 1;
213*d5c09012SAndroid Build Coastguard Worker
214*d5c09012SAndroid Build Coastguard Worker    // `Chunk` is processed and available for querying.
215*d5c09012SAndroid Build Coastguard Worker    STATE_ACTIVE = 2;
216*d5c09012SAndroid Build Coastguard Worker
217*d5c09012SAndroid Build Coastguard Worker    // `Chunk` failed processing.
218*d5c09012SAndroid Build Coastguard Worker    STATE_FAILED = 10;
219*d5c09012SAndroid Build Coastguard Worker  }
220*d5c09012SAndroid Build Coastguard Worker
221*d5c09012SAndroid Build Coastguard Worker  // Immutable. Identifier. The `Chunk` resource name. The ID (name excluding
222*d5c09012SAndroid Build Coastguard Worker  // the "corpora/*/documents/*/chunks/" prefix) can contain up to 40 characters
223*d5c09012SAndroid Build Coastguard Worker  // that are lowercase alphanumeric or dashes (-). The ID cannot start or end
224*d5c09012SAndroid Build Coastguard Worker  // with a dash. If the name is empty on create, a random 12-character unique
225*d5c09012SAndroid Build Coastguard Worker  // ID will be generated.
226*d5c09012SAndroid Build Coastguard Worker  // Example: `corpora/{corpus_id}/documents/{document_id}/chunks/123a456b789c`
227*d5c09012SAndroid Build Coastguard Worker  string name = 1 [
228*d5c09012SAndroid Build Coastguard Worker    (google.api.field_behavior) = IDENTIFIER,
229*d5c09012SAndroid Build Coastguard Worker    (google.api.field_behavior) = IMMUTABLE
230*d5c09012SAndroid Build Coastguard Worker  ];
231*d5c09012SAndroid Build Coastguard Worker
232*d5c09012SAndroid Build Coastguard Worker  // Required. The content for the `Chunk`, such as the text string.
233*d5c09012SAndroid Build Coastguard Worker  // The maximum number of tokens per chunk is 2043.
234*d5c09012SAndroid Build Coastguard Worker  ChunkData data = 2 [(google.api.field_behavior) = REQUIRED];
235*d5c09012SAndroid Build Coastguard Worker
236*d5c09012SAndroid Build Coastguard Worker  // Optional. User provided custom metadata stored as key-value pairs.
237*d5c09012SAndroid Build Coastguard Worker  // The maximum number of `CustomMetadata` per chunk is 20.
238*d5c09012SAndroid Build Coastguard Worker  repeated CustomMetadata custom_metadata = 3
239*d5c09012SAndroid Build Coastguard Worker      [(google.api.field_behavior) = OPTIONAL];
240*d5c09012SAndroid Build Coastguard Worker
241*d5c09012SAndroid Build Coastguard Worker  // Output only. The Timestamp of when the `Chunk` was created.
242*d5c09012SAndroid Build Coastguard Worker  google.protobuf.Timestamp create_time = 4
243*d5c09012SAndroid Build Coastguard Worker      [(google.api.field_behavior) = OUTPUT_ONLY];
244*d5c09012SAndroid Build Coastguard Worker
245*d5c09012SAndroid Build Coastguard Worker  // Output only. The Timestamp of when the `Chunk` was last updated.
246*d5c09012SAndroid Build Coastguard Worker  google.protobuf.Timestamp update_time = 5
247*d5c09012SAndroid Build Coastguard Worker      [(google.api.field_behavior) = OUTPUT_ONLY];
248*d5c09012SAndroid Build Coastguard Worker
249*d5c09012SAndroid Build Coastguard Worker  // Output only. Current state of the `Chunk`.
250*d5c09012SAndroid Build Coastguard Worker  State state = 6 [(google.api.field_behavior) = OUTPUT_ONLY];
251*d5c09012SAndroid Build Coastguard Worker}
252*d5c09012SAndroid Build Coastguard Worker
253*d5c09012SAndroid Build Coastguard Worker// Extracted data that represents the `Chunk` content.
254*d5c09012SAndroid Build Coastguard Workermessage ChunkData {
255*d5c09012SAndroid Build Coastguard Worker  oneof data {
256*d5c09012SAndroid Build Coastguard Worker    // The `Chunk` content as a string.
257*d5c09012SAndroid Build Coastguard Worker    // The maximum number of tokens per chunk is 2043.
258*d5c09012SAndroid Build Coastguard Worker    string string_value = 1;
259*d5c09012SAndroid Build Coastguard Worker  }
260*d5c09012SAndroid Build Coastguard Worker}
261