xref: /aosp_15_r20/external/googleapis/google/ai/generativelanguage/v1/generative_service.proto (revision d5c09012810ac0c9f33fe448fb6da8260d444cc9)
1// Copyright 2023 Google LLC
2//
3// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at
6//
7//     http://www.apache.org/licenses/LICENSE-2.0
8//
9// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS,
11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12// See the License for the specific language governing permissions and
13// limitations under the License.
14
15syntax = "proto3";
16
17package google.ai.generativelanguage.v1;
18
19import "google/ai/generativelanguage/v1/citation.proto";
20import "google/ai/generativelanguage/v1/content.proto";
21import "google/ai/generativelanguage/v1/safety.proto";
22import "google/api/annotations.proto";
23import "google/api/client.proto";
24import "google/api/field_behavior.proto";
25import "google/api/resource.proto";
26
27option go_package = "cloud.google.com/go/ai/generativelanguage/apiv1/generativelanguagepb;generativelanguagepb";
28option java_multiple_files = true;
29option java_outer_classname = "GenerativeServiceProto";
30option java_package = "com.google.ai.generativelanguage.v1";
31
32// API for using Large Models that generate multimodal content and have
33// additional capabilities beyond text generation.
34service GenerativeService {
35  option (google.api.default_host) = "generativelanguage.googleapis.com";
36
37  // Generates a response from the model given an input
38  // `GenerateContentRequest`.
39  rpc GenerateContent(GenerateContentRequest)
40      returns (GenerateContentResponse) {
41    option (google.api.http) = {
42      post: "/v1/{model=models/*}:generateContent"
43      body: "*"
44      additional_bindings {
45        post: "/v1/{model=tunedModels/*}:generateContent"
46        body: "*"
47      }
48    };
49    option (google.api.method_signature) = "model,contents";
50  }
51
52  // Generates a streamed response from the model given an input
53  // `GenerateContentRequest`.
54  rpc StreamGenerateContent(GenerateContentRequest)
55      returns (stream GenerateContentResponse) {
56    option (google.api.http) = {
57      post: "/v1/{model=models/*}:streamGenerateContent"
58      body: "*"
59    };
60    option (google.api.method_signature) = "model,contents";
61  }
62
63  // Generates an embedding from the model given an input `Content`.
64  rpc EmbedContent(EmbedContentRequest) returns (EmbedContentResponse) {
65    option (google.api.http) = {
66      post: "/v1/{model=models/*}:embedContent"
67      body: "*"
68    };
69    option (google.api.method_signature) = "model,content";
70  }
71
72  // Generates multiple embeddings from the model given input text in a
73  // synchronous call.
74  rpc BatchEmbedContents(BatchEmbedContentsRequest)
75      returns (BatchEmbedContentsResponse) {
76    option (google.api.http) = {
77      post: "/v1/{model=models/*}:batchEmbedContents"
78      body: "*"
79    };
80    option (google.api.method_signature) = "model,requests";
81  }
82
83  // Runs a model's tokenizer on input content and returns the token count.
84  rpc CountTokens(CountTokensRequest) returns (CountTokensResponse) {
85    option (google.api.http) = {
86      post: "/v1/{model=models/*}:countTokens"
87      body: "*"
88    };
89    option (google.api.method_signature) = "model,contents";
90  }
91}
92
93// Type of task for which the embedding will be used.
94enum TaskType {
95  // Unset value, which will default to one of the other enum values.
96  TASK_TYPE_UNSPECIFIED = 0;
97
98  // Specifies the given text is a query in a search/retrieval setting.
99  RETRIEVAL_QUERY = 1;
100
101  // Specifies the given text is a document from the corpus being searched.
102  RETRIEVAL_DOCUMENT = 2;
103
104  // Specifies the given text will be used for STS.
105  SEMANTIC_SIMILARITY = 3;
106
107  // Specifies that the given text will be classified.
108  CLASSIFICATION = 4;
109
110  // Specifies that the embeddings will be used for clustering.
111  CLUSTERING = 5;
112
113  // Specifies that the given text will be used for question answering.
114  QUESTION_ANSWERING = 6;
115
116  // Specifies that the given text will be used for fact verification.
117  FACT_VERIFICATION = 7;
118}
119
120// Request to generate a completion from the model.
121message GenerateContentRequest {
122  // Required. The name of the `Model` to use for generating the completion.
123  //
124  // Format: `name=models/{model}`.
125  string model = 1 [
126    (google.api.field_behavior) = REQUIRED,
127    (google.api.resource_reference) = {
128      type: "generativelanguage.googleapis.com/Model"
129    }
130  ];
131
132  // Required. The content of the current conversation with the model.
133  //
134  // For single-turn queries, this is a single instance. For multi-turn queries,
135  // this is a repeated field that contains conversation history + latest
136  // request.
137  repeated Content contents = 2 [(google.api.field_behavior) = REQUIRED];
138
139  // Optional. A list of unique `SafetySetting` instances for blocking unsafe
140  // content.
141  //
142  // This will be enforced on the `GenerateContentRequest.contents` and
143  // `GenerateContentResponse.candidates`. There should not be more than one
144  // setting for each `SafetyCategory` type. The API will block any contents and
145  // responses that fail to meet the thresholds set by these settings. This list
146  // overrides the default settings for each `SafetyCategory` specified in the
147  // safety_settings. If there is no `SafetySetting` for a given
148  // `SafetyCategory` provided in the list, the API will use the default safety
149  // setting for that category. Harm categories HARM_CATEGORY_HATE_SPEECH,
150  // HARM_CATEGORY_SEXUALLY_EXPLICIT, HARM_CATEGORY_DANGEROUS_CONTENT,
151  // HARM_CATEGORY_HARASSMENT are supported.
152  repeated SafetySetting safety_settings = 3
153      [(google.api.field_behavior) = OPTIONAL];
154
155  // Optional. Configuration options for model generation and outputs.
156  optional GenerationConfig generation_config = 4
157      [(google.api.field_behavior) = OPTIONAL];
158}
159
160// Configuration options for model generation and outputs. Not all parameters
161// may be configurable for every model.
162message GenerationConfig {
163  // Optional. Number of generated responses to return.
164  //
165  // Currently, this value can only be set to 1. If unset, this will default
166  // to 1.
167  optional int32 candidate_count = 1 [(google.api.field_behavior) = OPTIONAL];
168
169  // Optional. The set of character sequences (up to 5) that will stop output
170  // generation. If specified, the API will stop at the first appearance of a
171  // stop sequence. The stop sequence will not be included as part of the
172  // response.
173  repeated string stop_sequences = 2 [(google.api.field_behavior) = OPTIONAL];
174
175  // Optional. The maximum number of tokens to include in a candidate.
176  //
177  // Note: The default value varies by model, see the `Model.output_token_limit`
178  // attribute of the `Model` returned from the `getModel` function.
179  optional int32 max_output_tokens = 4 [(google.api.field_behavior) = OPTIONAL];
180
181  // Optional. Controls the randomness of the output.
182  //
183  // Note: The default value varies by model, see the `Model.temperature`
184  // attribute of the `Model` returned from the `getModel` function.
185  //
186  // Values can range from [0.0, 2.0].
187  optional float temperature = 5 [(google.api.field_behavior) = OPTIONAL];
188
189  // Optional. The maximum cumulative probability of tokens to consider when
190  // sampling.
191  //
192  // The model uses combined Top-k and nucleus sampling.
193  //
194  // Tokens are sorted based on their assigned probabilities so that only the
195  // most likely tokens are considered. Top-k sampling directly limits the
196  // maximum number of tokens to consider, while Nucleus sampling limits number
197  // of tokens based on the cumulative probability.
198  //
199  // Note: The default value varies by model, see the `Model.top_p`
200  // attribute of the `Model` returned from the `getModel` function.
201  optional float top_p = 6 [(google.api.field_behavior) = OPTIONAL];
202
203  // Optional. The maximum number of tokens to consider when sampling.
204  //
205  // Models use nucleus sampling or combined Top-k and nucleus sampling.
206  // Top-k sampling considers the set of `top_k` most probable tokens.
207  // Models running with nucleus sampling don't allow top_k setting.
208  //
209  // Note: The default value varies by model, see the `Model.top_k`
210  // attribute of the `Model` returned from the `getModel` function. Empty
211  // `top_k` field in `Model` indicates the model doesn't apply top-k sampling
212  // and doesn't allow setting `top_k` on requests.
213  optional int32 top_k = 7 [(google.api.field_behavior) = OPTIONAL];
214}
215
216// Response from the model supporting multiple candidates.
217//
218// Note on safety ratings and content filtering. They are reported for both
219// prompt in `GenerateContentResponse.prompt_feedback` and for each candidate
220// in `finish_reason` and in `safety_ratings`. The API contract is that:
221//  - either all requested candidates are returned or no candidates at all
222//  - no candidates are returned only if there was something wrong with the
223//    prompt (see `prompt_feedback`)
224//  - feedback on each candidate is reported on `finish_reason` and
225//    `safety_ratings`.
226message GenerateContentResponse {
227  // A set of the feedback metadata the prompt specified in
228  // `GenerateContentRequest.content`.
229  message PromptFeedback {
230    // Specifies what was the reason why prompt was blocked.
231    enum BlockReason {
232      // Default value. This value is unused.
233      BLOCK_REASON_UNSPECIFIED = 0;
234
235      // Prompt was blocked due to safety reasons. You can inspect
236      // `safety_ratings` to understand which safety category blocked it.
237      SAFETY = 1;
238
239      // Prompt was blocked due to unknown reaasons.
240      OTHER = 2;
241    }
242
243    // Optional. If set, the prompt was blocked and no candidates are returned.
244    // Rephrase your prompt.
245    BlockReason block_reason = 1 [(google.api.field_behavior) = OPTIONAL];
246
247    // Ratings for safety of the prompt.
248    // There is at most one rating per category.
249    repeated SafetyRating safety_ratings = 2;
250  }
251
252  // Candidate responses from the model.
253  repeated Candidate candidates = 1;
254
255  // Returns the prompt's feedback related to the content filters.
256  PromptFeedback prompt_feedback = 2;
257}
258
259// A response candidate generated from the model.
260message Candidate {
261  // Defines the reason why the model stopped generating tokens.
262  enum FinishReason {
263    // Default value. This value is unused.
264    FINISH_REASON_UNSPECIFIED = 0;
265
266    // Natural stop point of the model or provided stop sequence.
267    STOP = 1;
268
269    // The maximum number of tokens as specified in the request was reached.
270    MAX_TOKENS = 2;
271
272    // The candidate content was flagged for safety reasons.
273    SAFETY = 3;
274
275    // The candidate content was flagged for recitation reasons.
276    RECITATION = 4;
277
278    // Unknown reason.
279    OTHER = 5;
280  }
281
282  // Output only. Index of the candidate in the list of candidates.
283  optional int32 index = 3 [(google.api.field_behavior) = OUTPUT_ONLY];
284
285  // Output only. Generated content returned from the model.
286  Content content = 1 [(google.api.field_behavior) = OUTPUT_ONLY];
287
288  // Optional. Output only. The reason why the model stopped generating tokens.
289  //
290  // If empty, the model has not stopped generating the tokens.
291  FinishReason finish_reason = 2 [
292    (google.api.field_behavior) = OPTIONAL,
293    (google.api.field_behavior) = OUTPUT_ONLY
294  ];
295
296  // List of ratings for the safety of a response candidate.
297  //
298  // There is at most one rating per category.
299  repeated SafetyRating safety_ratings = 5;
300
301  // Output only. Citation information for model-generated candidate.
302  //
303  // This field may be populated with recitation information for any text
304  // included in the `content`. These are passages that are "recited" from
305  // copyrighted material in the foundational LLM's training data.
306  CitationMetadata citation_metadata = 6
307      [(google.api.field_behavior) = OUTPUT_ONLY];
308
309  // Output only. Token count for this candidate.
310  int32 token_count = 7 [(google.api.field_behavior) = OUTPUT_ONLY];
311}
312
313// Request containing the `Content` for the model to embed.
314message EmbedContentRequest {
315  // Required. The model's resource name. This serves as an ID for the Model to
316  // use.
317  //
318  // This name should match a model name returned by the `ListModels` method.
319  //
320  // Format: `models/{model}`
321  string model = 1 [
322    (google.api.field_behavior) = REQUIRED,
323    (google.api.resource_reference) = {
324      type: "generativelanguage.googleapis.com/Model"
325    }
326  ];
327
328  // Required. The content to embed. Only the `parts.text` fields will be
329  // counted.
330  Content content = 2 [(google.api.field_behavior) = REQUIRED];
331
332  // Optional. Optional task type for which the embeddings will be used. Can
333  // only be set for `models/embedding-001`.
334  optional TaskType task_type = 3 [(google.api.field_behavior) = OPTIONAL];
335
336  // Optional. An optional title for the text. Only applicable when TaskType is
337  // `RETRIEVAL_DOCUMENT`.
338  //
339  // Note: Specifying a `title` for `RETRIEVAL_DOCUMENT` provides better quality
340  // embeddings for retrieval.
341  optional string title = 4 [(google.api.field_behavior) = OPTIONAL];
342
343  // Optional. Optional reduced dimension for the output embedding. If set,
344  // excessive values in the output embedding are truncated from the end.
345  // Supported by `models/text-embedding-latest`.
346  optional int32 output_dimensionality = 5
347      [(google.api.field_behavior) = OPTIONAL];
348}
349
350// A list of floats representing an embedding.
351message ContentEmbedding {
352  // The embedding values.
353  repeated float values = 1;
354}
355
356// The response to an `EmbedContentRequest`.
357message EmbedContentResponse {
358  // Output only. The embedding generated from the input content.
359  ContentEmbedding embedding = 1 [(google.api.field_behavior) = OUTPUT_ONLY];
360}
361
362// Batch request to get embeddings from the model for a list of prompts.
363message BatchEmbedContentsRequest {
364  // Required. The model's resource name. This serves as an ID for the Model to
365  // use.
366  //
367  // This name should match a model name returned by the `ListModels` method.
368  //
369  // Format: `models/{model}`
370  string model = 1 [
371    (google.api.field_behavior) = REQUIRED,
372    (google.api.resource_reference) = {
373      type: "generativelanguage.googleapis.com/Model"
374    }
375  ];
376
377  // Required. Embed requests for the batch. The model in each of these requests
378  // must match the model specified `BatchEmbedContentsRequest.model`.
379  repeated EmbedContentRequest requests = 2
380      [(google.api.field_behavior) = REQUIRED];
381}
382
383// The response to a `BatchEmbedContentsRequest`.
384message BatchEmbedContentsResponse {
385  // Output only. The embeddings for each request, in the same order as provided
386  // in the batch request.
387  repeated ContentEmbedding embeddings = 1
388      [(google.api.field_behavior) = OUTPUT_ONLY];
389}
390
391// Counts the number of tokens in the `prompt` sent to a model.
392//
393// Models may tokenize text differently, so each model may return a different
394// `token_count`.
395message CountTokensRequest {
396  // Required. The model's resource name. This serves as an ID for the Model to
397  // use.
398  //
399  // This name should match a model name returned by the `ListModels` method.
400  //
401  // Format: `models/{model}`
402  string model = 1 [
403    (google.api.field_behavior) = REQUIRED,
404    (google.api.resource_reference) = {
405      type: "generativelanguage.googleapis.com/Model"
406    }
407  ];
408
409  // Required. The input given to the model as a prompt.
410  repeated Content contents = 2 [(google.api.field_behavior) = REQUIRED];
411}
412
413// A response from `CountTokens`.
414//
415// It returns the model's `token_count` for the `prompt`.
416message CountTokensResponse {
417  // The number of tokens that the `model` tokenizes the `prompt` into.
418  //
419  // Always non-negative.
420  int32 total_tokens = 1;
421}
422