1// Copyright 2023 Google LLC 2// 3// Licensed under the Apache License, Version 2.0 (the "License"); 4// you may not use this file except in compliance with the License. 5// You may obtain a copy of the License at 6// 7// http://www.apache.org/licenses/LICENSE-2.0 8// 9// Unless required by applicable law or agreed to in writing, software 10// distributed under the License is distributed on an "AS IS" BASIS, 11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12// See the License for the specific language governing permissions and 13// limitations under the License. 14 15syntax = "proto3"; 16 17package google.ai.generativelanguage.v1; 18 19import "google/ai/generativelanguage/v1/citation.proto"; 20import "google/ai/generativelanguage/v1/content.proto"; 21import "google/ai/generativelanguage/v1/safety.proto"; 22import "google/api/annotations.proto"; 23import "google/api/client.proto"; 24import "google/api/field_behavior.proto"; 25import "google/api/resource.proto"; 26 27option go_package = "cloud.google.com/go/ai/generativelanguage/apiv1/generativelanguagepb;generativelanguagepb"; 28option java_multiple_files = true; 29option java_outer_classname = "GenerativeServiceProto"; 30option java_package = "com.google.ai.generativelanguage.v1"; 31 32// API for using Large Models that generate multimodal content and have 33// additional capabilities beyond text generation. 34service GenerativeService { 35 option (google.api.default_host) = "generativelanguage.googleapis.com"; 36 37 // Generates a response from the model given an input 38 // `GenerateContentRequest`. 39 rpc GenerateContent(GenerateContentRequest) 40 returns (GenerateContentResponse) { 41 option (google.api.http) = { 42 post: "/v1/{model=models/*}:generateContent" 43 body: "*" 44 additional_bindings { 45 post: "/v1/{model=tunedModels/*}:generateContent" 46 body: "*" 47 } 48 }; 49 option (google.api.method_signature) = "model,contents"; 50 } 51 52 // Generates a streamed response from the model given an input 53 // `GenerateContentRequest`. 54 rpc StreamGenerateContent(GenerateContentRequest) 55 returns (stream GenerateContentResponse) { 56 option (google.api.http) = { 57 post: "/v1/{model=models/*}:streamGenerateContent" 58 body: "*" 59 }; 60 option (google.api.method_signature) = "model,contents"; 61 } 62 63 // Generates an embedding from the model given an input `Content`. 64 rpc EmbedContent(EmbedContentRequest) returns (EmbedContentResponse) { 65 option (google.api.http) = { 66 post: "/v1/{model=models/*}:embedContent" 67 body: "*" 68 }; 69 option (google.api.method_signature) = "model,content"; 70 } 71 72 // Generates multiple embeddings from the model given input text in a 73 // synchronous call. 74 rpc BatchEmbedContents(BatchEmbedContentsRequest) 75 returns (BatchEmbedContentsResponse) { 76 option (google.api.http) = { 77 post: "/v1/{model=models/*}:batchEmbedContents" 78 body: "*" 79 }; 80 option (google.api.method_signature) = "model,requests"; 81 } 82 83 // Runs a model's tokenizer on input content and returns the token count. 84 rpc CountTokens(CountTokensRequest) returns (CountTokensResponse) { 85 option (google.api.http) = { 86 post: "/v1/{model=models/*}:countTokens" 87 body: "*" 88 }; 89 option (google.api.method_signature) = "model,contents"; 90 } 91} 92 93// Type of task for which the embedding will be used. 94enum TaskType { 95 // Unset value, which will default to one of the other enum values. 96 TASK_TYPE_UNSPECIFIED = 0; 97 98 // Specifies the given text is a query in a search/retrieval setting. 99 RETRIEVAL_QUERY = 1; 100 101 // Specifies the given text is a document from the corpus being searched. 102 RETRIEVAL_DOCUMENT = 2; 103 104 // Specifies the given text will be used for STS. 105 SEMANTIC_SIMILARITY = 3; 106 107 // Specifies that the given text will be classified. 108 CLASSIFICATION = 4; 109 110 // Specifies that the embeddings will be used for clustering. 111 CLUSTERING = 5; 112 113 // Specifies that the given text will be used for question answering. 114 QUESTION_ANSWERING = 6; 115 116 // Specifies that the given text will be used for fact verification. 117 FACT_VERIFICATION = 7; 118} 119 120// Request to generate a completion from the model. 121message GenerateContentRequest { 122 // Required. The name of the `Model` to use for generating the completion. 123 // 124 // Format: `name=models/{model}`. 125 string model = 1 [ 126 (google.api.field_behavior) = REQUIRED, 127 (google.api.resource_reference) = { 128 type: "generativelanguage.googleapis.com/Model" 129 } 130 ]; 131 132 // Required. The content of the current conversation with the model. 133 // 134 // For single-turn queries, this is a single instance. For multi-turn queries, 135 // this is a repeated field that contains conversation history + latest 136 // request. 137 repeated Content contents = 2 [(google.api.field_behavior) = REQUIRED]; 138 139 // Optional. A list of unique `SafetySetting` instances for blocking unsafe 140 // content. 141 // 142 // This will be enforced on the `GenerateContentRequest.contents` and 143 // `GenerateContentResponse.candidates`. There should not be more than one 144 // setting for each `SafetyCategory` type. The API will block any contents and 145 // responses that fail to meet the thresholds set by these settings. This list 146 // overrides the default settings for each `SafetyCategory` specified in the 147 // safety_settings. If there is no `SafetySetting` for a given 148 // `SafetyCategory` provided in the list, the API will use the default safety 149 // setting for that category. Harm categories HARM_CATEGORY_HATE_SPEECH, 150 // HARM_CATEGORY_SEXUALLY_EXPLICIT, HARM_CATEGORY_DANGEROUS_CONTENT, 151 // HARM_CATEGORY_HARASSMENT are supported. 152 repeated SafetySetting safety_settings = 3 153 [(google.api.field_behavior) = OPTIONAL]; 154 155 // Optional. Configuration options for model generation and outputs. 156 optional GenerationConfig generation_config = 4 157 [(google.api.field_behavior) = OPTIONAL]; 158} 159 160// Configuration options for model generation and outputs. Not all parameters 161// may be configurable for every model. 162message GenerationConfig { 163 // Optional. Number of generated responses to return. 164 // 165 // Currently, this value can only be set to 1. If unset, this will default 166 // to 1. 167 optional int32 candidate_count = 1 [(google.api.field_behavior) = OPTIONAL]; 168 169 // Optional. The set of character sequences (up to 5) that will stop output 170 // generation. If specified, the API will stop at the first appearance of a 171 // stop sequence. The stop sequence will not be included as part of the 172 // response. 173 repeated string stop_sequences = 2 [(google.api.field_behavior) = OPTIONAL]; 174 175 // Optional. The maximum number of tokens to include in a candidate. 176 // 177 // Note: The default value varies by model, see the `Model.output_token_limit` 178 // attribute of the `Model` returned from the `getModel` function. 179 optional int32 max_output_tokens = 4 [(google.api.field_behavior) = OPTIONAL]; 180 181 // Optional. Controls the randomness of the output. 182 // 183 // Note: The default value varies by model, see the `Model.temperature` 184 // attribute of the `Model` returned from the `getModel` function. 185 // 186 // Values can range from [0.0, 2.0]. 187 optional float temperature = 5 [(google.api.field_behavior) = OPTIONAL]; 188 189 // Optional. The maximum cumulative probability of tokens to consider when 190 // sampling. 191 // 192 // The model uses combined Top-k and nucleus sampling. 193 // 194 // Tokens are sorted based on their assigned probabilities so that only the 195 // most likely tokens are considered. Top-k sampling directly limits the 196 // maximum number of tokens to consider, while Nucleus sampling limits number 197 // of tokens based on the cumulative probability. 198 // 199 // Note: The default value varies by model, see the `Model.top_p` 200 // attribute of the `Model` returned from the `getModel` function. 201 optional float top_p = 6 [(google.api.field_behavior) = OPTIONAL]; 202 203 // Optional. The maximum number of tokens to consider when sampling. 204 // 205 // Models use nucleus sampling or combined Top-k and nucleus sampling. 206 // Top-k sampling considers the set of `top_k` most probable tokens. 207 // Models running with nucleus sampling don't allow top_k setting. 208 // 209 // Note: The default value varies by model, see the `Model.top_k` 210 // attribute of the `Model` returned from the `getModel` function. Empty 211 // `top_k` field in `Model` indicates the model doesn't apply top-k sampling 212 // and doesn't allow setting `top_k` on requests. 213 optional int32 top_k = 7 [(google.api.field_behavior) = OPTIONAL]; 214} 215 216// Response from the model supporting multiple candidates. 217// 218// Note on safety ratings and content filtering. They are reported for both 219// prompt in `GenerateContentResponse.prompt_feedback` and for each candidate 220// in `finish_reason` and in `safety_ratings`. The API contract is that: 221// - either all requested candidates are returned or no candidates at all 222// - no candidates are returned only if there was something wrong with the 223// prompt (see `prompt_feedback`) 224// - feedback on each candidate is reported on `finish_reason` and 225// `safety_ratings`. 226message GenerateContentResponse { 227 // A set of the feedback metadata the prompt specified in 228 // `GenerateContentRequest.content`. 229 message PromptFeedback { 230 // Specifies what was the reason why prompt was blocked. 231 enum BlockReason { 232 // Default value. This value is unused. 233 BLOCK_REASON_UNSPECIFIED = 0; 234 235 // Prompt was blocked due to safety reasons. You can inspect 236 // `safety_ratings` to understand which safety category blocked it. 237 SAFETY = 1; 238 239 // Prompt was blocked due to unknown reaasons. 240 OTHER = 2; 241 } 242 243 // Optional. If set, the prompt was blocked and no candidates are returned. 244 // Rephrase your prompt. 245 BlockReason block_reason = 1 [(google.api.field_behavior) = OPTIONAL]; 246 247 // Ratings for safety of the prompt. 248 // There is at most one rating per category. 249 repeated SafetyRating safety_ratings = 2; 250 } 251 252 // Candidate responses from the model. 253 repeated Candidate candidates = 1; 254 255 // Returns the prompt's feedback related to the content filters. 256 PromptFeedback prompt_feedback = 2; 257} 258 259// A response candidate generated from the model. 260message Candidate { 261 // Defines the reason why the model stopped generating tokens. 262 enum FinishReason { 263 // Default value. This value is unused. 264 FINISH_REASON_UNSPECIFIED = 0; 265 266 // Natural stop point of the model or provided stop sequence. 267 STOP = 1; 268 269 // The maximum number of tokens as specified in the request was reached. 270 MAX_TOKENS = 2; 271 272 // The candidate content was flagged for safety reasons. 273 SAFETY = 3; 274 275 // The candidate content was flagged for recitation reasons. 276 RECITATION = 4; 277 278 // Unknown reason. 279 OTHER = 5; 280 } 281 282 // Output only. Index of the candidate in the list of candidates. 283 optional int32 index = 3 [(google.api.field_behavior) = OUTPUT_ONLY]; 284 285 // Output only. Generated content returned from the model. 286 Content content = 1 [(google.api.field_behavior) = OUTPUT_ONLY]; 287 288 // Optional. Output only. The reason why the model stopped generating tokens. 289 // 290 // If empty, the model has not stopped generating the tokens. 291 FinishReason finish_reason = 2 [ 292 (google.api.field_behavior) = OPTIONAL, 293 (google.api.field_behavior) = OUTPUT_ONLY 294 ]; 295 296 // List of ratings for the safety of a response candidate. 297 // 298 // There is at most one rating per category. 299 repeated SafetyRating safety_ratings = 5; 300 301 // Output only. Citation information for model-generated candidate. 302 // 303 // This field may be populated with recitation information for any text 304 // included in the `content`. These are passages that are "recited" from 305 // copyrighted material in the foundational LLM's training data. 306 CitationMetadata citation_metadata = 6 307 [(google.api.field_behavior) = OUTPUT_ONLY]; 308 309 // Output only. Token count for this candidate. 310 int32 token_count = 7 [(google.api.field_behavior) = OUTPUT_ONLY]; 311} 312 313// Request containing the `Content` for the model to embed. 314message EmbedContentRequest { 315 // Required. The model's resource name. This serves as an ID for the Model to 316 // use. 317 // 318 // This name should match a model name returned by the `ListModels` method. 319 // 320 // Format: `models/{model}` 321 string model = 1 [ 322 (google.api.field_behavior) = REQUIRED, 323 (google.api.resource_reference) = { 324 type: "generativelanguage.googleapis.com/Model" 325 } 326 ]; 327 328 // Required. The content to embed. Only the `parts.text` fields will be 329 // counted. 330 Content content = 2 [(google.api.field_behavior) = REQUIRED]; 331 332 // Optional. Optional task type for which the embeddings will be used. Can 333 // only be set for `models/embedding-001`. 334 optional TaskType task_type = 3 [(google.api.field_behavior) = OPTIONAL]; 335 336 // Optional. An optional title for the text. Only applicable when TaskType is 337 // `RETRIEVAL_DOCUMENT`. 338 // 339 // Note: Specifying a `title` for `RETRIEVAL_DOCUMENT` provides better quality 340 // embeddings for retrieval. 341 optional string title = 4 [(google.api.field_behavior) = OPTIONAL]; 342 343 // Optional. Optional reduced dimension for the output embedding. If set, 344 // excessive values in the output embedding are truncated from the end. 345 // Supported by `models/text-embedding-latest`. 346 optional int32 output_dimensionality = 5 347 [(google.api.field_behavior) = OPTIONAL]; 348} 349 350// A list of floats representing an embedding. 351message ContentEmbedding { 352 // The embedding values. 353 repeated float values = 1; 354} 355 356// The response to an `EmbedContentRequest`. 357message EmbedContentResponse { 358 // Output only. The embedding generated from the input content. 359 ContentEmbedding embedding = 1 [(google.api.field_behavior) = OUTPUT_ONLY]; 360} 361 362// Batch request to get embeddings from the model for a list of prompts. 363message BatchEmbedContentsRequest { 364 // Required. The model's resource name. This serves as an ID for the Model to 365 // use. 366 // 367 // This name should match a model name returned by the `ListModels` method. 368 // 369 // Format: `models/{model}` 370 string model = 1 [ 371 (google.api.field_behavior) = REQUIRED, 372 (google.api.resource_reference) = { 373 type: "generativelanguage.googleapis.com/Model" 374 } 375 ]; 376 377 // Required. Embed requests for the batch. The model in each of these requests 378 // must match the model specified `BatchEmbedContentsRequest.model`. 379 repeated EmbedContentRequest requests = 2 380 [(google.api.field_behavior) = REQUIRED]; 381} 382 383// The response to a `BatchEmbedContentsRequest`. 384message BatchEmbedContentsResponse { 385 // Output only. The embeddings for each request, in the same order as provided 386 // in the batch request. 387 repeated ContentEmbedding embeddings = 1 388 [(google.api.field_behavior) = OUTPUT_ONLY]; 389} 390 391// Counts the number of tokens in the `prompt` sent to a model. 392// 393// Models may tokenize text differently, so each model may return a different 394// `token_count`. 395message CountTokensRequest { 396 // Required. The model's resource name. This serves as an ID for the Model to 397 // use. 398 // 399 // This name should match a model name returned by the `ListModels` method. 400 // 401 // Format: `models/{model}` 402 string model = 1 [ 403 (google.api.field_behavior) = REQUIRED, 404 (google.api.resource_reference) = { 405 type: "generativelanguage.googleapis.com/Model" 406 } 407 ]; 408 409 // Required. The input given to the model as a prompt. 410 repeated Content contents = 2 [(google.api.field_behavior) = REQUIRED]; 411} 412 413// A response from `CountTokens`. 414// 415// It returns the model's `token_count` for the `prompt`. 416message CountTokensResponse { 417 // The number of tokens that the `model` tokenizes the `prompt` into. 418 // 419 // Always non-negative. 420 int32 total_tokens = 1; 421} 422