aiplatform/v1beta1/prediction_service.proto

// Copyright 2023 Google LLC
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

syntax = "proto3";

package google.cloud.aiplatform.v1beta1;

import "google/api/annotations.proto";
import "google/api/client.proto";
import "google/api/field_behavior.proto";
import "google/api/httpbody.proto";
import "google/api/resource.proto";
import "google/cloud/aiplatform/v1beta1/content.proto";
import "google/cloud/aiplatform/v1beta1/explanation.proto";
import "google/cloud/aiplatform/v1beta1/tool.proto";
import "google/cloud/aiplatform/v1beta1/types.proto";
import "google/protobuf/struct.proto";

option csharp_namespace = "Google.Cloud.AIPlatform.V1Beta1";
option go_package = "cloud.google.com/go/aiplatform/apiv1beta1/aiplatformpb;aiplatformpb";
option java_multiple_files = true;
option java_outer_classname = "PredictionServiceProto";
option java_package = "com.google.cloud.aiplatform.v1beta1";
option php_namespace = "Google\\Cloud\\AIPlatform\\V1beta1";
option ruby_package = "Google::Cloud::AIPlatform::V1beta1";

// A service for online predictions and explanations.
service PredictionService {
  option (google.api.default_host) = "aiplatform.googleapis.com";
  option (google.api.oauth_scopes) =
      "https://www.googleapis.com/auth/cloud-platform";

  // Perform an online prediction.
  rpc Predict(PredictRequest) returns (PredictResponse) {
    option (google.api.http) = {
      post: "/v1beta1/{endpoint=projects/*/locations/*/endpoints/*}:predict"
      body: "*"
      additional_bindings {
        post: "/v1beta1/{endpoint=projects/*/locations/*/publishers/*/models/*}:predict"
        body: "*"
      }
    };
    option (google.api.method_signature) = "endpoint,instances,parameters";
  }

  // Perform an online prediction with an arbitrary HTTP payload.
  //
  // The response includes the following HTTP headers:
  //
  // * `X-Vertex-AI-Endpoint-Id`: ID of the
  // [Endpoint][google.cloud.aiplatform.v1beta1.Endpoint] that served this
  // prediction.
  //
  // * `X-Vertex-AI-Deployed-Model-Id`: ID of the Endpoint's
  // [DeployedModel][google.cloud.aiplatform.v1beta1.DeployedModel] that served
  // this prediction.
  rpc RawPredict(RawPredictRequest) returns (google.api.HttpBody) {
    option (google.api.http) = {
      post: "/v1beta1/{endpoint=projects/*/locations/*/endpoints/*}:rawPredict"
      body: "*"
      additional_bindings {
        post: "/v1beta1/{endpoint=projects/*/locations/*/publishers/*/models/*}:rawPredict"
        body: "*"
      }
    };
    option (google.api.method_signature) = "endpoint,http_body";
  }

  // Perform an unary online prediction request to a gRPC model server for
  // Vertex first-party products and frameworks.
  rpc DirectPredict(DirectPredictRequest) returns (DirectPredictResponse) {
    option (google.api.http) = {
      post: "/v1beta1/{endpoint=projects/*/locations/*/endpoints/*}:directPredict"
      body: "*"
    };
  }

  // Perform an unary online prediction request to a gRPC model server for
  // custom containers.
  rpc DirectRawPredict(DirectRawPredictRequest)
      returns (DirectRawPredictResponse) {
    option (google.api.http) = {
      post: "/v1beta1/{endpoint=projects/*/locations/*/endpoints/*}:directRawPredict"
      body: "*"
    };
  }

  // Perform a streaming online prediction request to a gRPC model server for
  // Vertex first-party products and frameworks.
  rpc StreamDirectPredict(stream StreamDirectPredictRequest)
      returns (stream StreamDirectPredictResponse) {}

  // Perform a streaming online prediction request to a gRPC model server for
  // custom containers.
  rpc StreamDirectRawPredict(stream StreamDirectRawPredictRequest)
      returns (stream StreamDirectRawPredictResponse) {}

  // Perform a streaming online prediction request for Vertex first-party
  // products and frameworks.
  rpc StreamingPredict(stream StreamingPredictRequest)
      returns (stream StreamingPredictResponse) {}

  // Perform a server-side streaming online prediction request for Vertex
  // LLM streaming.
  rpc ServerStreamingPredict(StreamingPredictRequest)
      returns (stream StreamingPredictResponse) {
    option (google.api.http) = {
      post: "/v1beta1/{endpoint=projects/*/locations/*/endpoints/*}:serverStreamingPredict"
      body: "*"
      additional_bindings {
        post: "/v1beta1/{endpoint=projects/*/locations/*/publishers/*/models/*}:serverStreamingPredict"
        body: "*"
      }
    };
  }

  // Perform a streaming online prediction request through gRPC.
  rpc StreamingRawPredict(stream StreamingRawPredictRequest)
      returns (stream StreamingRawPredictResponse) {}

  // Perform an online explanation.
  //
  // If
  // [deployed_model_id][google.cloud.aiplatform.v1beta1.ExplainRequest.deployed_model_id]
  // is specified, the corresponding DeployModel must have
  // [explanation_spec][google.cloud.aiplatform.v1beta1.DeployedModel.explanation_spec]
  // populated. If
  // [deployed_model_id][google.cloud.aiplatform.v1beta1.ExplainRequest.deployed_model_id]
  // is not specified, all DeployedModels must have
  // [explanation_spec][google.cloud.aiplatform.v1beta1.DeployedModel.explanation_spec]
  // populated.
  rpc Explain(ExplainRequest) returns (ExplainResponse) {
    option (google.api.http) = {
      post: "/v1beta1/{endpoint=projects/*/locations/*/endpoints/*}:explain"
      body: "*"
    };
    option (google.api.method_signature) =
        "endpoint,instances,parameters,deployed_model_id";
  }

  // Perform a token counting.
  rpc CountTokens(CountTokensRequest) returns (CountTokensResponse) {
    option (google.api.http) = {
      post: "/v1beta1/{endpoint=projects/*/locations/*/endpoints/*}:countTokens"
      body: "*"
      additional_bindings {
        post: "/v1beta1/{endpoint=projects/*/locations/*/publishers/*/models/*}:countTokens"
        body: "*"
      }
    };
    option (google.api.method_signature) = "endpoint,instances";
  }

  // Generate content with multimodal inputs.
  rpc GenerateContent(GenerateContentRequest)
      returns (GenerateContentResponse) {
    option (google.api.http) = {
      post: "/v1beta1/{model=projects/*/locations/*/endpoints/*}:generateContent"
      body: "*"
      additional_bindings {
        post: "/v1beta1/{model=projects/*/locations/*/publishers/*/models/*}:generateContent"
        body: "*"
      }
    };
    option (google.api.method_signature) = "model,contents";
  }

  // Generate content with multimodal inputs with streaming support.
  rpc StreamGenerateContent(GenerateContentRequest)
      returns (stream GenerateContentResponse) {
    option (google.api.http) = {
      post: "/v1beta1/{model=projects/*/locations/*/endpoints/*}:streamGenerateContent"
      body: "*"
      additional_bindings {
        post: "/v1beta1/{model=projects/*/locations/*/publishers/*/models/*}:streamGenerateContent"
        body: "*"
      }
    };
    option (google.api.method_signature) = "model,contents";
  }

  // Exposes an OpenAI-compatible endpoint for chat completions.
  rpc ChatCompletions(ChatCompletionsRequest)
      returns (stream google.api.HttpBody) {
    option (google.api.http) = {
      post: "/v1beta1/{endpoint=projects/*/locations/*/endpoints/*}/chat/completions"
      body: "http_body"
    };
    option (google.api.method_signature) = "endpoint,http_body";
  }
}

// Request message for
// [PredictionService.Predict][google.cloud.aiplatform.v1beta1.PredictionService.Predict].
message PredictRequest {
  // Required. The name of the Endpoint requested to serve the prediction.
  // Format:
  // `projects/{project}/locations/{location}/endpoints/{endpoint}`
  string endpoint = 1 [
    (google.api.field_behavior) = REQUIRED,
    (google.api.resource_reference) = {
      type: "aiplatform.googleapis.com/Endpoint"
    }
  ];

  // Required. The instances that are the input to the prediction call.
  // A DeployedModel may have an upper limit on the number of instances it
  // supports per request, and when it is exceeded the prediction call errors
  // in case of AutoML Models, or, in case of customer created Models, the
  // behaviour is as documented by that Model.
  // The schema of any single instance may be specified via Endpoint's
  // DeployedModels'
  // [Model's][google.cloud.aiplatform.v1beta1.DeployedModel.model]
  // [PredictSchemata's][google.cloud.aiplatform.v1beta1.Model.predict_schemata]
  // [instance_schema_uri][google.cloud.aiplatform.v1beta1.PredictSchemata.instance_schema_uri].
  repeated google.protobuf.Value instances = 2
      [(google.api.field_behavior) = REQUIRED];

  // The parameters that govern the prediction. The schema of the parameters may
  // be specified via Endpoint's DeployedModels' [Model's
  // ][google.cloud.aiplatform.v1beta1.DeployedModel.model]
  // [PredictSchemata's][google.cloud.aiplatform.v1beta1.Model.predict_schemata]
  // [parameters_schema_uri][google.cloud.aiplatform.v1beta1.PredictSchemata.parameters_schema_uri].
  google.protobuf.Value parameters = 3;
}

// Response message for
// [PredictionService.Predict][google.cloud.aiplatform.v1beta1.PredictionService.Predict].
message PredictResponse {
  // The predictions that are the output of the predictions call.
  // The schema of any single prediction may be specified via Endpoint's
  // DeployedModels' [Model's
  // ][google.cloud.aiplatform.v1beta1.DeployedModel.model]
  // [PredictSchemata's][google.cloud.aiplatform.v1beta1.Model.predict_schemata]
  // [prediction_schema_uri][google.cloud.aiplatform.v1beta1.PredictSchemata.prediction_schema_uri].
  repeated google.protobuf.Value predictions = 1;

  // ID of the Endpoint's DeployedModel that served this prediction.
  string deployed_model_id = 2;

  // Output only. The resource name of the Model which is deployed as the
  // DeployedModel that this prediction hits.
  string model = 3 [
    (google.api.field_behavior) = OUTPUT_ONLY,
    (google.api.resource_reference) = {
      type: "aiplatform.googleapis.com/Model"
    }
  ];

  // Output only. The version ID of the Model which is deployed as the
  // DeployedModel that this prediction hits.
  string model_version_id = 5 [(google.api.field_behavior) = OUTPUT_ONLY];

  // Output only. The [display
  // name][google.cloud.aiplatform.v1beta1.Model.display_name] of the Model
  // which is deployed as the DeployedModel that this prediction hits.
  string model_display_name = 4 [(google.api.field_behavior) = OUTPUT_ONLY];

  // Output only. Request-level metadata returned by the model. The metadata
  // type will be dependent upon the model implementation.
  google.protobuf.Value metadata = 6
      [(google.api.field_behavior) = OUTPUT_ONLY];
}

// Request message for
// [PredictionService.RawPredict][google.cloud.aiplatform.v1beta1.PredictionService.RawPredict].
message RawPredictRequest {
  // Required. The name of the Endpoint requested to serve the prediction.
  // Format:
  // `projects/{project}/locations/{location}/endpoints/{endpoint}`
  string endpoint = 1 [
    (google.api.field_behavior) = REQUIRED,
    (google.api.resource_reference) = {
      type: "aiplatform.googleapis.com/Endpoint"
    }
  ];

  // The prediction input. Supports HTTP headers and arbitrary data payload.
  //
  // A [DeployedModel][google.cloud.aiplatform.v1beta1.DeployedModel] may have
  // an upper limit on the number of instances it supports per request. When
  // this limit it is exceeded for an AutoML model, the
  // [RawPredict][google.cloud.aiplatform.v1beta1.PredictionService.RawPredict]
  // method returns an error. When this limit is exceeded for a custom-trained
  // model, the behavior varies depending on the model.
  //
  // You can specify the schema for each instance in the
  // [predict_schemata.instance_schema_uri][google.cloud.aiplatform.v1beta1.PredictSchemata.instance_schema_uri]
  // field when you create a [Model][google.cloud.aiplatform.v1beta1.Model].
  // This schema applies when you deploy the `Model` as a `DeployedModel` to an
  // [Endpoint][google.cloud.aiplatform.v1beta1.Endpoint] and use the
  // `RawPredict` method.
  google.api.HttpBody http_body = 2;
}

// Request message for
// [PredictionService.DirectPredict][google.cloud.aiplatform.v1beta1.PredictionService.DirectPredict].
message DirectPredictRequest {
  // Required. The name of the Endpoint requested to serve the prediction.
  // Format:
  // `projects/{project}/locations/{location}/endpoints/{endpoint}`
  string endpoint = 1 [
    (google.api.field_behavior) = REQUIRED,
    (google.api.resource_reference) = {
      type: "aiplatform.googleapis.com/Endpoint"
    }
  ];

  // The prediction input.
  repeated Tensor inputs = 2;

  // The parameters that govern the prediction.
  Tensor parameters = 3;
}

// Response message for
// [PredictionService.DirectPredict][google.cloud.aiplatform.v1beta1.PredictionService.DirectPredict].
message DirectPredictResponse {
  // The prediction output.
  repeated Tensor outputs = 1;

  // The parameters that govern the prediction.
  Tensor parameters = 2;
}

// Request message for
// [PredictionService.DirectRawPredict][google.cloud.aiplatform.v1beta1.PredictionService.DirectRawPredict].
message DirectRawPredictRequest {
  // Required. The name of the Endpoint requested to serve the prediction.
  // Format:
  // `projects/{project}/locations/{location}/endpoints/{endpoint}`
  string endpoint = 1 [
    (google.api.field_behavior) = REQUIRED,
    (google.api.resource_reference) = {
      type: "aiplatform.googleapis.com/Endpoint"
    }
  ];

  // Fully qualified name of the API method being invoked to perform
  // predictions.
  //
  // Format:
  // `/namespace.Service/Method/`
  // Example:
  // `/tensorflow.serving.PredictionService/Predict`
  string method_name = 2;

  // The prediction input.
  bytes input = 3;
}

// Response message for
// [PredictionService.DirectRawPredict][google.cloud.aiplatform.v1beta1.PredictionService.DirectRawPredict].
message DirectRawPredictResponse {
  // The prediction output.
  bytes output = 1;
}

// Request message for
// [PredictionService.StreamDirectPredict][google.cloud.aiplatform.v1beta1.PredictionService.StreamDirectPredict].
//
// The first message must contain
// [endpoint][google.cloud.aiplatform.v1beta1.StreamDirectPredictRequest.endpoint]
// field and optionally [input][]. The subsequent messages must contain
// [input][].
message StreamDirectPredictRequest {
  // Required. The name of the Endpoint requested to serve the prediction.
  // Format:
  // `projects/{project}/locations/{location}/endpoints/{endpoint}`
  string endpoint = 1 [
    (google.api.field_behavior) = REQUIRED,
    (google.api.resource_reference) = {
      type: "aiplatform.googleapis.com/Endpoint"
    }
  ];

  // Optional. The prediction input.
  repeated Tensor inputs = 2 [(google.api.field_behavior) = OPTIONAL];

  // Optional. The parameters that govern the prediction.
  Tensor parameters = 3 [(google.api.field_behavior) = OPTIONAL];
}

// Response message for
// [PredictionService.StreamDirectPredict][google.cloud.aiplatform.v1beta1.PredictionService.StreamDirectPredict].
message StreamDirectPredictResponse {
  // The prediction output.
  repeated Tensor outputs = 1;

  // The parameters that govern the prediction.
  Tensor parameters = 2;
}

// Request message for
// [PredictionService.StreamDirectRawPredict][google.cloud.aiplatform.v1beta1.PredictionService.StreamDirectRawPredict].
//
// The first message must contain
// [endpoint][google.cloud.aiplatform.v1beta1.StreamDirectRawPredictRequest.endpoint]
// and
// [method_name][google.cloud.aiplatform.v1beta1.StreamDirectRawPredictRequest.method_name]
// fields and optionally
// [input][google.cloud.aiplatform.v1beta1.StreamDirectRawPredictRequest.input].
// The subsequent messages must contain
// [input][google.cloud.aiplatform.v1beta1.StreamDirectRawPredictRequest.input].
// [method_name][google.cloud.aiplatform.v1beta1.StreamDirectRawPredictRequest.method_name]
// in the subsequent messages have no effect.
message StreamDirectRawPredictRequest {
  // Required. The name of the Endpoint requested to serve the prediction.
  // Format:
  // `projects/{project}/locations/{location}/endpoints/{endpoint}`
  string endpoint = 1 [
    (google.api.field_behavior) = REQUIRED,
    (google.api.resource_reference) = {
      type: "aiplatform.googleapis.com/Endpoint"
    }
  ];

  // Optional. Fully qualified name of the API method being invoked to perform
  // predictions.
  //
  // Format:
  // `/namespace.Service/Method/`
  // Example:
  // `/tensorflow.serving.PredictionService/Predict`
  string method_name = 2 [(google.api.field_behavior) = OPTIONAL];

  // Optional. The prediction input.
  bytes input = 3 [(google.api.field_behavior) = OPTIONAL];
}

// Response message for
// [PredictionService.StreamDirectRawPredict][google.cloud.aiplatform.v1beta1.PredictionService.StreamDirectRawPredict].
message StreamDirectRawPredictResponse {
  // The prediction output.
  bytes output = 1;
}

// Request message for
// [PredictionService.StreamingPredict][google.cloud.aiplatform.v1beta1.PredictionService.StreamingPredict].
//
// The first message must contain
// [endpoint][google.cloud.aiplatform.v1beta1.StreamingPredictRequest.endpoint]
// field and optionally [input][]. The subsequent messages must contain
// [input][].
message StreamingPredictRequest {
  // Required. The name of the Endpoint requested to serve the prediction.
  // Format:
  // `projects/{project}/locations/{location}/endpoints/{endpoint}`
  string endpoint = 1 [
    (google.api.field_behavior) = REQUIRED,
    (google.api.resource_reference) = {
      type: "aiplatform.googleapis.com/Endpoint"
    }
  ];

  // The prediction input.
  repeated Tensor inputs = 2;

  // The parameters that govern the prediction.
  Tensor parameters = 3;
}

// Response message for
// [PredictionService.StreamingPredict][google.cloud.aiplatform.v1beta1.PredictionService.StreamingPredict].
message StreamingPredictResponse {
  // The prediction output.
  repeated Tensor outputs = 1;

  // The parameters that govern the prediction.
  Tensor parameters = 2;
}

// Request message for
// [PredictionService.StreamingRawPredict][google.cloud.aiplatform.v1beta1.PredictionService.StreamingRawPredict].
//
// The first message must contain
// [endpoint][google.cloud.aiplatform.v1beta1.StreamingRawPredictRequest.endpoint]
// and
// [method_name][google.cloud.aiplatform.v1beta1.StreamingRawPredictRequest.method_name]
// fields and optionally
// [input][google.cloud.aiplatform.v1beta1.StreamingRawPredictRequest.input].
// The subsequent messages must contain
// [input][google.cloud.aiplatform.v1beta1.StreamingRawPredictRequest.input].
// [method_name][google.cloud.aiplatform.v1beta1.StreamingRawPredictRequest.method_name]
// in the subsequent messages have no effect.
message StreamingRawPredictRequest {
  // Required. The name of the Endpoint requested to serve the prediction.
  // Format:
  // `projects/{project}/locations/{location}/endpoints/{endpoint}`
  string endpoint = 1 [
    (google.api.field_behavior) = REQUIRED,
    (google.api.resource_reference) = {
      type: "aiplatform.googleapis.com/Endpoint"
    }
  ];

  // Fully qualified name of the API method being invoked to perform
  // predictions.
  //
  // Format:
  // `/namespace.Service/Method/`
  // Example:
  // `/tensorflow.serving.PredictionService/Predict`
  string method_name = 2;

  // The prediction input.
  bytes input = 3;
}

// Response message for
// [PredictionService.StreamingRawPredict][google.cloud.aiplatform.v1beta1.PredictionService.StreamingRawPredict].
message StreamingRawPredictResponse {
  // The prediction output.
  bytes output = 1;
}

// Request message for
// [PredictionService.Explain][google.cloud.aiplatform.v1beta1.PredictionService.Explain].
message ExplainRequest {
  // Required. The name of the Endpoint requested to serve the explanation.
  // Format:
  // `projects/{project}/locations/{location}/endpoints/{endpoint}`
  string endpoint = 1 [
    (google.api.field_behavior) = REQUIRED,
    (google.api.resource_reference) = {
      type: "aiplatform.googleapis.com/Endpoint"
    }
  ];

  // Required. The instances that are the input to the explanation call.
  // A DeployedModel may have an upper limit on the number of instances it
  // supports per request, and when it is exceeded the explanation call errors
  // in case of AutoML Models, or, in case of customer created Models, the
  // behaviour is as documented by that Model.
  // The schema of any single instance may be specified via Endpoint's
  // DeployedModels'
  // [Model's][google.cloud.aiplatform.v1beta1.DeployedModel.model]
  // [PredictSchemata's][google.cloud.aiplatform.v1beta1.Model.predict_schemata]
  // [instance_schema_uri][google.cloud.aiplatform.v1beta1.PredictSchemata.instance_schema_uri].
  repeated google.protobuf.Value instances = 2
      [(google.api.field_behavior) = REQUIRED];

  // The parameters that govern the prediction. The schema of the parameters may
  // be specified via Endpoint's DeployedModels' [Model's
  // ][google.cloud.aiplatform.v1beta1.DeployedModel.model]
  // [PredictSchemata's][google.cloud.aiplatform.v1beta1.Model.predict_schemata]
  // [parameters_schema_uri][google.cloud.aiplatform.v1beta1.PredictSchemata.parameters_schema_uri].
  google.protobuf.Value parameters = 4;

  // If specified, overrides the
  // [explanation_spec][google.cloud.aiplatform.v1beta1.DeployedModel.explanation_spec]
  // of the DeployedModel. Can be used for explaining prediction results with
  // different configurations, such as:
  //  - Explaining top-5 predictions results as opposed to top-1;
  //  - Increasing path count or step count of the attribution methods to reduce
  //    approximate errors;
  //  - Using different baselines for explaining the prediction results.
  ExplanationSpecOverride explanation_spec_override = 5;

  // Optional. This field is the same as the one above, but supports multiple
  // explanations to occur in parallel. The key can be any string. Each override
  // will be run against the model, then its explanations will be grouped
  // together.
  //
  // Note - these explanations are run **In Addition** to the default
  // Explanation in the deployed model.
  map<string, ExplanationSpecOverride> concurrent_explanation_spec_override = 6
      [(google.api.field_behavior) = OPTIONAL];

  // If specified, this ExplainRequest will be served by the chosen
  // DeployedModel, overriding
  // [Endpoint.traffic_split][google.cloud.aiplatform.v1beta1.Endpoint.traffic_split].
  string deployed_model_id = 3;
}

// Response message for
// [PredictionService.Explain][google.cloud.aiplatform.v1beta1.PredictionService.Explain].
message ExplainResponse {
  // This message is a wrapper grouping Concurrent Explanations.
  message ConcurrentExplanation {
    // The explanations of the Model's
    // [PredictResponse.predictions][google.cloud.aiplatform.v1beta1.PredictResponse.predictions].
    //
    // It has the same number of elements as
    // [instances][google.cloud.aiplatform.v1beta1.ExplainRequest.instances] to
    // be explained.
    repeated Explanation explanations = 1;
  }

  // The explanations of the Model's
  // [PredictResponse.predictions][google.cloud.aiplatform.v1beta1.PredictResponse.predictions].
  //
  // It has the same number of elements as
  // [instances][google.cloud.aiplatform.v1beta1.ExplainRequest.instances] to be
  // explained.
  repeated Explanation explanations = 1;

  // This field stores the results of the explanations run in parallel with
  // The default explanation strategy/method.
  map<string, ConcurrentExplanation> concurrent_explanations = 4;

  // ID of the Endpoint's DeployedModel that served this explanation.
  string deployed_model_id = 2;

  // The predictions that are the output of the predictions call.
  // Same as
  // [PredictResponse.predictions][google.cloud.aiplatform.v1beta1.PredictResponse.predictions].
  repeated google.protobuf.Value predictions = 3;
}

// Request message for
// [PredictionService.CountTokens][google.cloud.aiplatform.v1beta1.PredictionService.CountTokens].
message CountTokensRequest {
  // Required. The name of the Endpoint requested to perform token counting.
  // Format:
  // `projects/{project}/locations/{location}/endpoints/{endpoint}`
  string endpoint = 1 [
    (google.api.field_behavior) = REQUIRED,
    (google.api.resource_reference) = {
      type: "aiplatform.googleapis.com/Endpoint"
    }
  ];

  // Required. The name of the publisher model requested to serve the
  // prediction. Format:
  // `projects/{project}/locations/{location}/publishers/*/models/*`
  string model = 3 [(google.api.field_behavior) = REQUIRED];

  // Required. The instances that are the input to token counting call.
  // Schema is identical to the prediction schema of the underlying model.
  repeated google.protobuf.Value instances = 2
      [(google.api.field_behavior) = REQUIRED];

  // Required. Input content.
  repeated Content contents = 4 [(google.api.field_behavior) = REQUIRED];
}

// Response message for
// [PredictionService.CountTokens][google.cloud.aiplatform.v1beta1.PredictionService.CountTokens].
message CountTokensResponse {
  // The total number of tokens counted across all instances from the request.
  int32 total_tokens = 1;

  // The total number of billable characters counted across all instances from
  // the request.
  int32 total_billable_characters = 2;
}

// Request message for [PredictionService.GenerateContent].
message GenerateContentRequest {
  // Required. The name of the publisher model requested to serve the
  // prediction. Format:
  // `projects/{project}/locations/{location}/publishers/*/models/*`
  string model = 5 [(google.api.field_behavior) = REQUIRED];

  // Required. The content of the current conversation with the model.
  //
  // For single-turn queries, this is a single instance. For multi-turn queries,
  // this is a repeated field that contains conversation history + latest
  // request.
  repeated Content contents = 2 [(google.api.field_behavior) = REQUIRED];

  // Optional. The user provided system instructions for the model.
  // Note: only text should be used in parts and content in each part will be in
  // a separate paragraph.
  optional Content system_instruction = 8
      [(google.api.field_behavior) = OPTIONAL];

  // Optional. A list of `Tools` the model may use to generate the next
  // response.
  //
  // A `Tool` is a piece of code that enables the system to interact with
  // external systems to perform an action, or set of actions, outside of
  // knowledge and scope of the model.
  repeated Tool tools = 6 [(google.api.field_behavior) = OPTIONAL];

  // Optional. Tool config. This config is shared for all tools provided in the
  // request.
  ToolConfig tool_config = 7 [(google.api.field_behavior) = OPTIONAL];

  // Optional. Per request settings for blocking unsafe content.
  // Enforced on GenerateContentResponse.candidates.
  repeated SafetySetting safety_settings = 3
      [(google.api.field_behavior) = OPTIONAL];

  // Optional. Generation config.
  GenerationConfig generation_config = 4
      [(google.api.field_behavior) = OPTIONAL];
}

// Response message for [PredictionService.GenerateContent].
message GenerateContentResponse {
  // Content filter results for a prompt sent in the request.
  message PromptFeedback {
    // Blocked reason enumeration.
    enum BlockedReason {
      // Unspecified blocked reason.
      BLOCKED_REASON_UNSPECIFIED = 0;

      // Candidates blocked due to safety.
      SAFETY = 1;

      // Candidates blocked due to other reason.
      OTHER = 2;

      // Candidates blocked due to the terms which are included from the
      // terminology blocklist.
      BLOCKLIST = 3;

      // Candidates blocked due to prohibited content.
      PROHIBITED_CONTENT = 4;
    }

    // Output only. Blocked reason.
    BlockedReason block_reason = 1 [(google.api.field_behavior) = OUTPUT_ONLY];

    // Output only. Safety ratings.
    repeated SafetyRating safety_ratings = 2
        [(google.api.field_behavior) = OUTPUT_ONLY];

    // Output only. A readable block reason message.
    string block_reason_message = 3 [(google.api.field_behavior) = OUTPUT_ONLY];
  }

  // Usage metadata about response(s).
  message UsageMetadata {
    // Number of tokens in the request.
    int32 prompt_token_count = 1;

    // Number of tokens in the response(s).
    int32 candidates_token_count = 2;

    int32 total_token_count = 3;
  }

  // Output only. Generated candidates.
  repeated Candidate candidates = 2 [(google.api.field_behavior) = OUTPUT_ONLY];

  // Output only. Content filter results for a prompt sent in the request.
  // Note: Sent only in the first stream chunk.
  // Only happens when no candidates were generated due to content violations.
  PromptFeedback prompt_feedback = 3
      [(google.api.field_behavior) = OUTPUT_ONLY];

  // Usage metadata about the response(s).
  UsageMetadata usage_metadata = 4;
}

// Request message for [PredictionService.ChatCompletions]
message ChatCompletionsRequest {
  // Required. The name of the Endpoint requested to serve the prediction.
  // Format:
  // `projects/{project}/locations/{location}/endpoints/openapi`
  string endpoint = 1 [
    (google.api.field_behavior) = REQUIRED,
    (google.api.resource_reference) = {
      type: "aiplatform.googleapis.com/Endpoint"
    }
  ];

  // Optional. The prediction input. Supports HTTP headers and arbitrary data
  // payload.
  google.api.HttpBody http_body = 2 [(google.api.field_behavior) = OPTIONAL];
}