generativelanguage/v1beta/text_service.proto

// Copyright 2023 Google LLC
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

syntax = "proto3";

package google.ai.generativelanguage.v1beta;

import "google/ai/generativelanguage/v1beta/citation.proto";
import "google/ai/generativelanguage/v1beta/safety.proto";
import "google/api/annotations.proto";
import "google/api/client.proto";
import "google/api/field_behavior.proto";
import "google/api/resource.proto";

option go_package = "cloud.google.com/go/ai/generativelanguage/apiv1beta/generativelanguagepb;generativelanguagepb";
option java_multiple_files = true;
option java_outer_classname = "TextServiceProto";
option java_package = "com.google.ai.generativelanguage.v1beta";

// API for using Generative Language Models (GLMs) trained to generate text.
//
// Also known as Large Language Models (LLM)s, these generate text given an
// input prompt from the user.
service TextService {
  option (google.api.default_host) = "generativelanguage.googleapis.com";

  // Generates a response from the model given an input message.
  rpc GenerateText(GenerateTextRequest) returns (GenerateTextResponse) {
    option (google.api.http) = {
      post: "/v1beta/{model=models/*}:generateText"
      body: "*"
      additional_bindings {
        post: "/v1beta/{model=tunedModels/*}:generateText"
        body: "*"
      }
    };
    option (google.api.method_signature) =
        "model,prompt,temperature,candidate_count,max_output_tokens,top_p,top_k";
  }

  // Generates an embedding from the model given an input message.
  rpc EmbedText(EmbedTextRequest) returns (EmbedTextResponse) {
    option (google.api.http) = {
      post: "/v1beta/{model=models/*}:embedText"
      body: "*"
    };
    option (google.api.method_signature) = "model,text";
  }

  // Generates multiple embeddings from the model given input text in a
  // synchronous call.
  rpc BatchEmbedText(BatchEmbedTextRequest) returns (BatchEmbedTextResponse) {
    option (google.api.http) = {
      post: "/v1beta/{model=models/*}:batchEmbedText"
      body: "*"
    };
    option (google.api.method_signature) = "model,texts";
  }

  // Runs a model's tokenizer on a text and returns the token count.
  rpc CountTextTokens(CountTextTokensRequest)
      returns (CountTextTokensResponse) {
    option (google.api.http) = {
      post: "/v1beta/{model=models/*}:countTextTokens"
      body: "*"
    };
    option (google.api.method_signature) = "model,prompt";
  }
}

// Request to generate a text completion response from the model.
message GenerateTextRequest {
  // Required. The name of the `Model` or `TunedModel` to use for generating the
  // completion.
  // Examples:
  //  models/text-bison-001
  //  tunedModels/sentence-translator-u3b7m
  string model = 1 [(google.api.field_behavior) = REQUIRED];

  // Required. The free-form input text given to the model as a prompt.
  //
  // Given a prompt, the model will generate a TextCompletion response it
  // predicts as the completion of the input text.
  TextPrompt prompt = 2 [(google.api.field_behavior) = REQUIRED];

  // Optional. Controls the randomness of the output.
  // Note: The default value varies by model, see the `Model.temperature`
  // attribute of the `Model` returned the `getModel` function.
  //
  // Values can range from [0.0,1.0],
  // inclusive. A value closer to 1.0 will produce responses that are more
  // varied and creative, while a value closer to 0.0 will typically result in
  // more straightforward responses from the model.
  optional float temperature = 3 [(google.api.field_behavior) = OPTIONAL];

  // Optional. Number of generated responses to return.
  //
  // This value must be between [1, 8], inclusive. If unset, this will default
  // to 1.
  optional int32 candidate_count = 4 [(google.api.field_behavior) = OPTIONAL];

  // Optional. The maximum number of tokens to include in a candidate.
  //
  // If unset, this will default to output_token_limit specified in the `Model`
  // specification.
  optional int32 max_output_tokens = 5 [(google.api.field_behavior) = OPTIONAL];

  // Optional. The maximum cumulative probability of tokens to consider when
  // sampling.
  //
  // The model uses combined Top-k and nucleus sampling.
  //
  // Tokens are sorted based on their assigned probabilities so that only the
  // most likely tokens are considered. Top-k sampling directly limits the
  // maximum number of tokens to consider, while Nucleus sampling limits number
  // of tokens based on the cumulative probability.
  //
  // Note: The default value varies by model, see the `Model.top_p`
  // attribute of the `Model` returned the `getModel` function.
  optional float top_p = 6 [(google.api.field_behavior) = OPTIONAL];

  // Optional. The maximum number of tokens to consider when sampling.
  //
  // The model uses combined Top-k and nucleus sampling.
  //
  // Top-k sampling considers the set of `top_k` most probable tokens.
  // Defaults to 40.
  //
  // Note: The default value varies by model, see the `Model.top_k`
  // attribute of the `Model` returned the `getModel` function.
  optional int32 top_k = 7 [(google.api.field_behavior) = OPTIONAL];

  // Optional. A list of unique `SafetySetting` instances for blocking unsafe
  // content.
  //
  // that will be enforced on the `GenerateTextRequest.prompt` and
  // `GenerateTextResponse.candidates`. There should not be more than one
  // setting for each `SafetyCategory` type. The API will block any prompts and
  // responses that fail to meet the thresholds set by these settings. This list
  // overrides the default settings for each `SafetyCategory` specified in the
  // safety_settings. If there is no `SafetySetting` for a given
  // `SafetyCategory` provided in the list, the API will use the default safety
  // setting for that category. Harm categories HARM_CATEGORY_DEROGATORY,
  // HARM_CATEGORY_TOXICITY, HARM_CATEGORY_VIOLENCE, HARM_CATEGORY_SEXUAL,
  // HARM_CATEGORY_MEDICAL, HARM_CATEGORY_DANGEROUS are supported in text
  // service.
  repeated SafetySetting safety_settings = 8
      [(google.api.field_behavior) = OPTIONAL];

  // The set of character sequences (up to 5) that will stop output generation.
  // If specified, the API will stop at the first appearance of a stop
  // sequence. The stop sequence will not be included as part of the response.
  repeated string stop_sequences = 9;
}

// The response from the model, including candidate completions.
message GenerateTextResponse {
  // Candidate responses from the model.
  repeated TextCompletion candidates = 1;

  // A set of content filtering metadata for the prompt and response
  // text.
  //
  // This indicates which `SafetyCategory`(s) blocked a
  // candidate from this response, the lowest `HarmProbability`
  // that triggered a block, and the HarmThreshold setting for that category.
  // This indicates the smallest change to the `SafetySettings` that would be
  // necessary to unblock at least 1 response.
  //
  // The blocking is configured by the `SafetySettings` in the request (or the
  // default `SafetySettings` of the API).
  repeated ContentFilter filters = 3;

  // Returns any safety feedback related to content filtering.
  repeated SafetyFeedback safety_feedback = 4;
}

// Text given to the model as a prompt.
//
// The Model will use this TextPrompt to Generate a text completion.
message TextPrompt {
  // Required. The prompt text.
  string text = 1 [(google.api.field_behavior) = REQUIRED];
}

// Output text returned from a model.
message TextCompletion {
  // Output only. The generated text returned from the model.
  string output = 1 [(google.api.field_behavior) = OUTPUT_ONLY];

  // Ratings for the safety of a response.
  //
  // There is at most one rating per category.
  repeated SafetyRating safety_ratings = 2;

  // Output only. Citation information for model-generated `output` in this
  // `TextCompletion`.
  //
  // This field may be populated with attribution information for any text
  // included in the `output`.
  optional CitationMetadata citation_metadata = 3
      [(google.api.field_behavior) = OUTPUT_ONLY];
}

// Request to get a text embedding from the model.
message EmbedTextRequest {
  // Required. The model name to use with the format model=models/{model}.
  string model = 1 [
    (google.api.field_behavior) = REQUIRED,
    (google.api.resource_reference) = {
      type: "generativelanguage.googleapis.com/Model"
    }
  ];

  // Optional. The free-form input text that the model will turn into an
  // embedding.
  string text = 2 [(google.api.field_behavior) = OPTIONAL];
}

// The response to a EmbedTextRequest.
message EmbedTextResponse {
  // Output only. The embedding generated from the input text.
  optional Embedding embedding = 1 [(google.api.field_behavior) = OUTPUT_ONLY];
}

// Batch request to get a text embedding from the model.
message BatchEmbedTextRequest {
  // Required. The name of the `Model` to use for generating the embedding.
  // Examples:
  //  models/embedding-gecko-001
  string model = 1 [
    (google.api.field_behavior) = REQUIRED,
    (google.api.resource_reference) = {
      type: "generativelanguage.googleapis.com/Model"
    }
  ];

  // Optional. The free-form input texts that the model will turn into an
  // embedding. The current limit is 100 texts, over which an error will be
  // thrown.
  repeated string texts = 2 [(google.api.field_behavior) = OPTIONAL];

  // Optional. Embed requests for the batch. Only one of `texts` or `requests`
  // can be set.
  repeated EmbedTextRequest requests = 3
      [(google.api.field_behavior) = OPTIONAL];
}

// The response to a EmbedTextRequest.
message BatchEmbedTextResponse {
  // Output only. The embeddings generated from the input text.
  repeated Embedding embeddings = 1 [(google.api.field_behavior) = OUTPUT_ONLY];
}

// A list of floats representing the embedding.
message Embedding {
  // The embedding values.
  repeated float value = 1;
}

// Counts the number of tokens in the `prompt` sent to a model.
//
// Models may tokenize text differently, so each model may return a different
// `token_count`.
message CountTextTokensRequest {
  // Required. The model's resource name. This serves as an ID for the Model to
  // use.
  //
  // This name should match a model name returned by the `ListModels` method.
  //
  // Format: `models/{model}`
  string model = 1 [
    (google.api.field_behavior) = REQUIRED,
    (google.api.resource_reference) = {
      type: "generativelanguage.googleapis.com/Model"
    }
  ];

  // Required. The free-form input text given to the model as a prompt.
  TextPrompt prompt = 2 [(google.api.field_behavior) = REQUIRED];
}

// A response from `CountTextTokens`.
//
// It returns the model's `token_count` for the `prompt`.
message CountTextTokensResponse {
  // The number of tokens that the `model` tokenizes the `prompt` into.
  //
  // Always non-negative.
  int32 token_count = 1;
}