xref: /aosp_15_r20/external/googleapis/google/cloud/aiplatform/v1beta1/prediction_service.proto (revision d5c09012810ac0c9f33fe448fb6da8260d444cc9)
1// Copyright 2023 Google LLC
2//
3// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at
6//
7//     http://www.apache.org/licenses/LICENSE-2.0
8//
9// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS,
11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12// See the License for the specific language governing permissions and
13// limitations under the License.
14
15syntax = "proto3";
16
17package google.cloud.aiplatform.v1beta1;
18
19import "google/api/annotations.proto";
20import "google/api/client.proto";
21import "google/api/field_behavior.proto";
22import "google/api/httpbody.proto";
23import "google/api/resource.proto";
24import "google/cloud/aiplatform/v1beta1/content.proto";
25import "google/cloud/aiplatform/v1beta1/explanation.proto";
26import "google/cloud/aiplatform/v1beta1/tool.proto";
27import "google/cloud/aiplatform/v1beta1/types.proto";
28import "google/protobuf/struct.proto";
29
30option csharp_namespace = "Google.Cloud.AIPlatform.V1Beta1";
31option go_package = "cloud.google.com/go/aiplatform/apiv1beta1/aiplatformpb;aiplatformpb";
32option java_multiple_files = true;
33option java_outer_classname = "PredictionServiceProto";
34option java_package = "com.google.cloud.aiplatform.v1beta1";
35option php_namespace = "Google\\Cloud\\AIPlatform\\V1beta1";
36option ruby_package = "Google::Cloud::AIPlatform::V1beta1";
37
38// A service for online predictions and explanations.
39service PredictionService {
40  option (google.api.default_host) = "aiplatform.googleapis.com";
41  option (google.api.oauth_scopes) =
42      "https://www.googleapis.com/auth/cloud-platform";
43
44  // Perform an online prediction.
45  rpc Predict(PredictRequest) returns (PredictResponse) {
46    option (google.api.http) = {
47      post: "/v1beta1/{endpoint=projects/*/locations/*/endpoints/*}:predict"
48      body: "*"
49      additional_bindings {
50        post: "/v1beta1/{endpoint=projects/*/locations/*/publishers/*/models/*}:predict"
51        body: "*"
52      }
53    };
54    option (google.api.method_signature) = "endpoint,instances,parameters";
55  }
56
57  // Perform an online prediction with an arbitrary HTTP payload.
58  //
59  // The response includes the following HTTP headers:
60  //
61  // * `X-Vertex-AI-Endpoint-Id`: ID of the
62  // [Endpoint][google.cloud.aiplatform.v1beta1.Endpoint] that served this
63  // prediction.
64  //
65  // * `X-Vertex-AI-Deployed-Model-Id`: ID of the Endpoint's
66  // [DeployedModel][google.cloud.aiplatform.v1beta1.DeployedModel] that served
67  // this prediction.
68  rpc RawPredict(RawPredictRequest) returns (google.api.HttpBody) {
69    option (google.api.http) = {
70      post: "/v1beta1/{endpoint=projects/*/locations/*/endpoints/*}:rawPredict"
71      body: "*"
72      additional_bindings {
73        post: "/v1beta1/{endpoint=projects/*/locations/*/publishers/*/models/*}:rawPredict"
74        body: "*"
75      }
76    };
77    option (google.api.method_signature) = "endpoint,http_body";
78  }
79
80  // Perform an unary online prediction request to a gRPC model server for
81  // Vertex first-party products and frameworks.
82  rpc DirectPredict(DirectPredictRequest) returns (DirectPredictResponse) {
83    option (google.api.http) = {
84      post: "/v1beta1/{endpoint=projects/*/locations/*/endpoints/*}:directPredict"
85      body: "*"
86    };
87  }
88
89  // Perform an unary online prediction request to a gRPC model server for
90  // custom containers.
91  rpc DirectRawPredict(DirectRawPredictRequest)
92      returns (DirectRawPredictResponse) {
93    option (google.api.http) = {
94      post: "/v1beta1/{endpoint=projects/*/locations/*/endpoints/*}:directRawPredict"
95      body: "*"
96    };
97  }
98
99  // Perform a streaming online prediction request to a gRPC model server for
100  // Vertex first-party products and frameworks.
101  rpc StreamDirectPredict(stream StreamDirectPredictRequest)
102      returns (stream StreamDirectPredictResponse) {}
103
104  // Perform a streaming online prediction request to a gRPC model server for
105  // custom containers.
106  rpc StreamDirectRawPredict(stream StreamDirectRawPredictRequest)
107      returns (stream StreamDirectRawPredictResponse) {}
108
109  // Perform a streaming online prediction request for Vertex first-party
110  // products and frameworks.
111  rpc StreamingPredict(stream StreamingPredictRequest)
112      returns (stream StreamingPredictResponse) {}
113
114  // Perform a server-side streaming online prediction request for Vertex
115  // LLM streaming.
116  rpc ServerStreamingPredict(StreamingPredictRequest)
117      returns (stream StreamingPredictResponse) {
118    option (google.api.http) = {
119      post: "/v1beta1/{endpoint=projects/*/locations/*/endpoints/*}:serverStreamingPredict"
120      body: "*"
121      additional_bindings {
122        post: "/v1beta1/{endpoint=projects/*/locations/*/publishers/*/models/*}:serverStreamingPredict"
123        body: "*"
124      }
125    };
126  }
127
128  // Perform a streaming online prediction request through gRPC.
129  rpc StreamingRawPredict(stream StreamingRawPredictRequest)
130      returns (stream StreamingRawPredictResponse) {}
131
132  // Perform an online explanation.
133  //
134  // If
135  // [deployed_model_id][google.cloud.aiplatform.v1beta1.ExplainRequest.deployed_model_id]
136  // is specified, the corresponding DeployModel must have
137  // [explanation_spec][google.cloud.aiplatform.v1beta1.DeployedModel.explanation_spec]
138  // populated. If
139  // [deployed_model_id][google.cloud.aiplatform.v1beta1.ExplainRequest.deployed_model_id]
140  // is not specified, all DeployedModels must have
141  // [explanation_spec][google.cloud.aiplatform.v1beta1.DeployedModel.explanation_spec]
142  // populated.
143  rpc Explain(ExplainRequest) returns (ExplainResponse) {
144    option (google.api.http) = {
145      post: "/v1beta1/{endpoint=projects/*/locations/*/endpoints/*}:explain"
146      body: "*"
147    };
148    option (google.api.method_signature) =
149        "endpoint,instances,parameters,deployed_model_id";
150  }
151
152  // Perform a token counting.
153  rpc CountTokens(CountTokensRequest) returns (CountTokensResponse) {
154    option (google.api.http) = {
155      post: "/v1beta1/{endpoint=projects/*/locations/*/endpoints/*}:countTokens"
156      body: "*"
157      additional_bindings {
158        post: "/v1beta1/{endpoint=projects/*/locations/*/publishers/*/models/*}:countTokens"
159        body: "*"
160      }
161    };
162    option (google.api.method_signature) = "endpoint,instances";
163  }
164
165  // Generate content with multimodal inputs.
166  rpc GenerateContent(GenerateContentRequest)
167      returns (GenerateContentResponse) {
168    option (google.api.http) = {
169      post: "/v1beta1/{model=projects/*/locations/*/endpoints/*}:generateContent"
170      body: "*"
171      additional_bindings {
172        post: "/v1beta1/{model=projects/*/locations/*/publishers/*/models/*}:generateContent"
173        body: "*"
174      }
175    };
176    option (google.api.method_signature) = "model,contents";
177  }
178
179  // Generate content with multimodal inputs with streaming support.
180  rpc StreamGenerateContent(GenerateContentRequest)
181      returns (stream GenerateContentResponse) {
182    option (google.api.http) = {
183      post: "/v1beta1/{model=projects/*/locations/*/endpoints/*}:streamGenerateContent"
184      body: "*"
185      additional_bindings {
186        post: "/v1beta1/{model=projects/*/locations/*/publishers/*/models/*}:streamGenerateContent"
187        body: "*"
188      }
189    };
190    option (google.api.method_signature) = "model,contents";
191  }
192
193  // Exposes an OpenAI-compatible endpoint for chat completions.
194  rpc ChatCompletions(ChatCompletionsRequest)
195      returns (stream google.api.HttpBody) {
196    option (google.api.http) = {
197      post: "/v1beta1/{endpoint=projects/*/locations/*/endpoints/*}/chat/completions"
198      body: "http_body"
199    };
200    option (google.api.method_signature) = "endpoint,http_body";
201  }
202}
203
204// Request message for
205// [PredictionService.Predict][google.cloud.aiplatform.v1beta1.PredictionService.Predict].
206message PredictRequest {
207  // Required. The name of the Endpoint requested to serve the prediction.
208  // Format:
209  // `projects/{project}/locations/{location}/endpoints/{endpoint}`
210  string endpoint = 1 [
211    (google.api.field_behavior) = REQUIRED,
212    (google.api.resource_reference) = {
213      type: "aiplatform.googleapis.com/Endpoint"
214    }
215  ];
216
217  // Required. The instances that are the input to the prediction call.
218  // A DeployedModel may have an upper limit on the number of instances it
219  // supports per request, and when it is exceeded the prediction call errors
220  // in case of AutoML Models, or, in case of customer created Models, the
221  // behaviour is as documented by that Model.
222  // The schema of any single instance may be specified via Endpoint's
223  // DeployedModels'
224  // [Model's][google.cloud.aiplatform.v1beta1.DeployedModel.model]
225  // [PredictSchemata's][google.cloud.aiplatform.v1beta1.Model.predict_schemata]
226  // [instance_schema_uri][google.cloud.aiplatform.v1beta1.PredictSchemata.instance_schema_uri].
227  repeated google.protobuf.Value instances = 2
228      [(google.api.field_behavior) = REQUIRED];
229
230  // The parameters that govern the prediction. The schema of the parameters may
231  // be specified via Endpoint's DeployedModels' [Model's
232  // ][google.cloud.aiplatform.v1beta1.DeployedModel.model]
233  // [PredictSchemata's][google.cloud.aiplatform.v1beta1.Model.predict_schemata]
234  // [parameters_schema_uri][google.cloud.aiplatform.v1beta1.PredictSchemata.parameters_schema_uri].
235  google.protobuf.Value parameters = 3;
236}
237
238// Response message for
239// [PredictionService.Predict][google.cloud.aiplatform.v1beta1.PredictionService.Predict].
240message PredictResponse {
241  // The predictions that are the output of the predictions call.
242  // The schema of any single prediction may be specified via Endpoint's
243  // DeployedModels' [Model's
244  // ][google.cloud.aiplatform.v1beta1.DeployedModel.model]
245  // [PredictSchemata's][google.cloud.aiplatform.v1beta1.Model.predict_schemata]
246  // [prediction_schema_uri][google.cloud.aiplatform.v1beta1.PredictSchemata.prediction_schema_uri].
247  repeated google.protobuf.Value predictions = 1;
248
249  // ID of the Endpoint's DeployedModel that served this prediction.
250  string deployed_model_id = 2;
251
252  // Output only. The resource name of the Model which is deployed as the
253  // DeployedModel that this prediction hits.
254  string model = 3 [
255    (google.api.field_behavior) = OUTPUT_ONLY,
256    (google.api.resource_reference) = {
257      type: "aiplatform.googleapis.com/Model"
258    }
259  ];
260
261  // Output only. The version ID of the Model which is deployed as the
262  // DeployedModel that this prediction hits.
263  string model_version_id = 5 [(google.api.field_behavior) = OUTPUT_ONLY];
264
265  // Output only. The [display
266  // name][google.cloud.aiplatform.v1beta1.Model.display_name] of the Model
267  // which is deployed as the DeployedModel that this prediction hits.
268  string model_display_name = 4 [(google.api.field_behavior) = OUTPUT_ONLY];
269
270  // Output only. Request-level metadata returned by the model. The metadata
271  // type will be dependent upon the model implementation.
272  google.protobuf.Value metadata = 6
273      [(google.api.field_behavior) = OUTPUT_ONLY];
274}
275
276// Request message for
277// [PredictionService.RawPredict][google.cloud.aiplatform.v1beta1.PredictionService.RawPredict].
278message RawPredictRequest {
279  // Required. The name of the Endpoint requested to serve the prediction.
280  // Format:
281  // `projects/{project}/locations/{location}/endpoints/{endpoint}`
282  string endpoint = 1 [
283    (google.api.field_behavior) = REQUIRED,
284    (google.api.resource_reference) = {
285      type: "aiplatform.googleapis.com/Endpoint"
286    }
287  ];
288
289  // The prediction input. Supports HTTP headers and arbitrary data payload.
290  //
291  // A [DeployedModel][google.cloud.aiplatform.v1beta1.DeployedModel] may have
292  // an upper limit on the number of instances it supports per request. When
293  // this limit it is exceeded for an AutoML model, the
294  // [RawPredict][google.cloud.aiplatform.v1beta1.PredictionService.RawPredict]
295  // method returns an error. When this limit is exceeded for a custom-trained
296  // model, the behavior varies depending on the model.
297  //
298  // You can specify the schema for each instance in the
299  // [predict_schemata.instance_schema_uri][google.cloud.aiplatform.v1beta1.PredictSchemata.instance_schema_uri]
300  // field when you create a [Model][google.cloud.aiplatform.v1beta1.Model].
301  // This schema applies when you deploy the `Model` as a `DeployedModel` to an
302  // [Endpoint][google.cloud.aiplatform.v1beta1.Endpoint] and use the
303  // `RawPredict` method.
304  google.api.HttpBody http_body = 2;
305}
306
307// Request message for
308// [PredictionService.DirectPredict][google.cloud.aiplatform.v1beta1.PredictionService.DirectPredict].
309message DirectPredictRequest {
310  // Required. The name of the Endpoint requested to serve the prediction.
311  // Format:
312  // `projects/{project}/locations/{location}/endpoints/{endpoint}`
313  string endpoint = 1 [
314    (google.api.field_behavior) = REQUIRED,
315    (google.api.resource_reference) = {
316      type: "aiplatform.googleapis.com/Endpoint"
317    }
318  ];
319
320  // The prediction input.
321  repeated Tensor inputs = 2;
322
323  // The parameters that govern the prediction.
324  Tensor parameters = 3;
325}
326
327// Response message for
328// [PredictionService.DirectPredict][google.cloud.aiplatform.v1beta1.PredictionService.DirectPredict].
329message DirectPredictResponse {
330  // The prediction output.
331  repeated Tensor outputs = 1;
332
333  // The parameters that govern the prediction.
334  Tensor parameters = 2;
335}
336
337// Request message for
338// [PredictionService.DirectRawPredict][google.cloud.aiplatform.v1beta1.PredictionService.DirectRawPredict].
339message DirectRawPredictRequest {
340  // Required. The name of the Endpoint requested to serve the prediction.
341  // Format:
342  // `projects/{project}/locations/{location}/endpoints/{endpoint}`
343  string endpoint = 1 [
344    (google.api.field_behavior) = REQUIRED,
345    (google.api.resource_reference) = {
346      type: "aiplatform.googleapis.com/Endpoint"
347    }
348  ];
349
350  // Fully qualified name of the API method being invoked to perform
351  // predictions.
352  //
353  // Format:
354  // `/namespace.Service/Method/`
355  // Example:
356  // `/tensorflow.serving.PredictionService/Predict`
357  string method_name = 2;
358
359  // The prediction input.
360  bytes input = 3;
361}
362
363// Response message for
364// [PredictionService.DirectRawPredict][google.cloud.aiplatform.v1beta1.PredictionService.DirectRawPredict].
365message DirectRawPredictResponse {
366  // The prediction output.
367  bytes output = 1;
368}
369
370// Request message for
371// [PredictionService.StreamDirectPredict][google.cloud.aiplatform.v1beta1.PredictionService.StreamDirectPredict].
372//
373// The first message must contain
374// [endpoint][google.cloud.aiplatform.v1beta1.StreamDirectPredictRequest.endpoint]
375// field and optionally [input][]. The subsequent messages must contain
376// [input][].
377message StreamDirectPredictRequest {
378  // Required. The name of the Endpoint requested to serve the prediction.
379  // Format:
380  // `projects/{project}/locations/{location}/endpoints/{endpoint}`
381  string endpoint = 1 [
382    (google.api.field_behavior) = REQUIRED,
383    (google.api.resource_reference) = {
384      type: "aiplatform.googleapis.com/Endpoint"
385    }
386  ];
387
388  // Optional. The prediction input.
389  repeated Tensor inputs = 2 [(google.api.field_behavior) = OPTIONAL];
390
391  // Optional. The parameters that govern the prediction.
392  Tensor parameters = 3 [(google.api.field_behavior) = OPTIONAL];
393}
394
395// Response message for
396// [PredictionService.StreamDirectPredict][google.cloud.aiplatform.v1beta1.PredictionService.StreamDirectPredict].
397message StreamDirectPredictResponse {
398  // The prediction output.
399  repeated Tensor outputs = 1;
400
401  // The parameters that govern the prediction.
402  Tensor parameters = 2;
403}
404
405// Request message for
406// [PredictionService.StreamDirectRawPredict][google.cloud.aiplatform.v1beta1.PredictionService.StreamDirectRawPredict].
407//
408// The first message must contain
409// [endpoint][google.cloud.aiplatform.v1beta1.StreamDirectRawPredictRequest.endpoint]
410// and
411// [method_name][google.cloud.aiplatform.v1beta1.StreamDirectRawPredictRequest.method_name]
412// fields and optionally
413// [input][google.cloud.aiplatform.v1beta1.StreamDirectRawPredictRequest.input].
414// The subsequent messages must contain
415// [input][google.cloud.aiplatform.v1beta1.StreamDirectRawPredictRequest.input].
416// [method_name][google.cloud.aiplatform.v1beta1.StreamDirectRawPredictRequest.method_name]
417// in the subsequent messages have no effect.
418message StreamDirectRawPredictRequest {
419  // Required. The name of the Endpoint requested to serve the prediction.
420  // Format:
421  // `projects/{project}/locations/{location}/endpoints/{endpoint}`
422  string endpoint = 1 [
423    (google.api.field_behavior) = REQUIRED,
424    (google.api.resource_reference) = {
425      type: "aiplatform.googleapis.com/Endpoint"
426    }
427  ];
428
429  // Optional. Fully qualified name of the API method being invoked to perform
430  // predictions.
431  //
432  // Format:
433  // `/namespace.Service/Method/`
434  // Example:
435  // `/tensorflow.serving.PredictionService/Predict`
436  string method_name = 2 [(google.api.field_behavior) = OPTIONAL];
437
438  // Optional. The prediction input.
439  bytes input = 3 [(google.api.field_behavior) = OPTIONAL];
440}
441
442// Response message for
443// [PredictionService.StreamDirectRawPredict][google.cloud.aiplatform.v1beta1.PredictionService.StreamDirectRawPredict].
444message StreamDirectRawPredictResponse {
445  // The prediction output.
446  bytes output = 1;
447}
448
449// Request message for
450// [PredictionService.StreamingPredict][google.cloud.aiplatform.v1beta1.PredictionService.StreamingPredict].
451//
452// The first message must contain
453// [endpoint][google.cloud.aiplatform.v1beta1.StreamingPredictRequest.endpoint]
454// field and optionally [input][]. The subsequent messages must contain
455// [input][].
456message StreamingPredictRequest {
457  // Required. The name of the Endpoint requested to serve the prediction.
458  // Format:
459  // `projects/{project}/locations/{location}/endpoints/{endpoint}`
460  string endpoint = 1 [
461    (google.api.field_behavior) = REQUIRED,
462    (google.api.resource_reference) = {
463      type: "aiplatform.googleapis.com/Endpoint"
464    }
465  ];
466
467  // The prediction input.
468  repeated Tensor inputs = 2;
469
470  // The parameters that govern the prediction.
471  Tensor parameters = 3;
472}
473
474// Response message for
475// [PredictionService.StreamingPredict][google.cloud.aiplatform.v1beta1.PredictionService.StreamingPredict].
476message StreamingPredictResponse {
477  // The prediction output.
478  repeated Tensor outputs = 1;
479
480  // The parameters that govern the prediction.
481  Tensor parameters = 2;
482}
483
484// Request message for
485// [PredictionService.StreamingRawPredict][google.cloud.aiplatform.v1beta1.PredictionService.StreamingRawPredict].
486//
487// The first message must contain
488// [endpoint][google.cloud.aiplatform.v1beta1.StreamingRawPredictRequest.endpoint]
489// and
490// [method_name][google.cloud.aiplatform.v1beta1.StreamingRawPredictRequest.method_name]
491// fields and optionally
492// [input][google.cloud.aiplatform.v1beta1.StreamingRawPredictRequest.input].
493// The subsequent messages must contain
494// [input][google.cloud.aiplatform.v1beta1.StreamingRawPredictRequest.input].
495// [method_name][google.cloud.aiplatform.v1beta1.StreamingRawPredictRequest.method_name]
496// in the subsequent messages have no effect.
497message StreamingRawPredictRequest {
498  // Required. The name of the Endpoint requested to serve the prediction.
499  // Format:
500  // `projects/{project}/locations/{location}/endpoints/{endpoint}`
501  string endpoint = 1 [
502    (google.api.field_behavior) = REQUIRED,
503    (google.api.resource_reference) = {
504      type: "aiplatform.googleapis.com/Endpoint"
505    }
506  ];
507
508  // Fully qualified name of the API method being invoked to perform
509  // predictions.
510  //
511  // Format:
512  // `/namespace.Service/Method/`
513  // Example:
514  // `/tensorflow.serving.PredictionService/Predict`
515  string method_name = 2;
516
517  // The prediction input.
518  bytes input = 3;
519}
520
521// Response message for
522// [PredictionService.StreamingRawPredict][google.cloud.aiplatform.v1beta1.PredictionService.StreamingRawPredict].
523message StreamingRawPredictResponse {
524  // The prediction output.
525  bytes output = 1;
526}
527
528// Request message for
529// [PredictionService.Explain][google.cloud.aiplatform.v1beta1.PredictionService.Explain].
530message ExplainRequest {
531  // Required. The name of the Endpoint requested to serve the explanation.
532  // Format:
533  // `projects/{project}/locations/{location}/endpoints/{endpoint}`
534  string endpoint = 1 [
535    (google.api.field_behavior) = REQUIRED,
536    (google.api.resource_reference) = {
537      type: "aiplatform.googleapis.com/Endpoint"
538    }
539  ];
540
541  // Required. The instances that are the input to the explanation call.
542  // A DeployedModel may have an upper limit on the number of instances it
543  // supports per request, and when it is exceeded the explanation call errors
544  // in case of AutoML Models, or, in case of customer created Models, the
545  // behaviour is as documented by that Model.
546  // The schema of any single instance may be specified via Endpoint's
547  // DeployedModels'
548  // [Model's][google.cloud.aiplatform.v1beta1.DeployedModel.model]
549  // [PredictSchemata's][google.cloud.aiplatform.v1beta1.Model.predict_schemata]
550  // [instance_schema_uri][google.cloud.aiplatform.v1beta1.PredictSchemata.instance_schema_uri].
551  repeated google.protobuf.Value instances = 2
552      [(google.api.field_behavior) = REQUIRED];
553
554  // The parameters that govern the prediction. The schema of the parameters may
555  // be specified via Endpoint's DeployedModels' [Model's
556  // ][google.cloud.aiplatform.v1beta1.DeployedModel.model]
557  // [PredictSchemata's][google.cloud.aiplatform.v1beta1.Model.predict_schemata]
558  // [parameters_schema_uri][google.cloud.aiplatform.v1beta1.PredictSchemata.parameters_schema_uri].
559  google.protobuf.Value parameters = 4;
560
561  // If specified, overrides the
562  // [explanation_spec][google.cloud.aiplatform.v1beta1.DeployedModel.explanation_spec]
563  // of the DeployedModel. Can be used for explaining prediction results with
564  // different configurations, such as:
565  //  - Explaining top-5 predictions results as opposed to top-1;
566  //  - Increasing path count or step count of the attribution methods to reduce
567  //    approximate errors;
568  //  - Using different baselines for explaining the prediction results.
569  ExplanationSpecOverride explanation_spec_override = 5;
570
571  // Optional. This field is the same as the one above, but supports multiple
572  // explanations to occur in parallel. The key can be any string. Each override
573  // will be run against the model, then its explanations will be grouped
574  // together.
575  //
576  // Note - these explanations are run **In Addition** to the default
577  // Explanation in the deployed model.
578  map<string, ExplanationSpecOverride> concurrent_explanation_spec_override = 6
579      [(google.api.field_behavior) = OPTIONAL];
580
581  // If specified, this ExplainRequest will be served by the chosen
582  // DeployedModel, overriding
583  // [Endpoint.traffic_split][google.cloud.aiplatform.v1beta1.Endpoint.traffic_split].
584  string deployed_model_id = 3;
585}
586
587// Response message for
588// [PredictionService.Explain][google.cloud.aiplatform.v1beta1.PredictionService.Explain].
589message ExplainResponse {
590  // This message is a wrapper grouping Concurrent Explanations.
591  message ConcurrentExplanation {
592    // The explanations of the Model's
593    // [PredictResponse.predictions][google.cloud.aiplatform.v1beta1.PredictResponse.predictions].
594    //
595    // It has the same number of elements as
596    // [instances][google.cloud.aiplatform.v1beta1.ExplainRequest.instances] to
597    // be explained.
598    repeated Explanation explanations = 1;
599  }
600
601  // The explanations of the Model's
602  // [PredictResponse.predictions][google.cloud.aiplatform.v1beta1.PredictResponse.predictions].
603  //
604  // It has the same number of elements as
605  // [instances][google.cloud.aiplatform.v1beta1.ExplainRequest.instances] to be
606  // explained.
607  repeated Explanation explanations = 1;
608
609  // This field stores the results of the explanations run in parallel with
610  // The default explanation strategy/method.
611  map<string, ConcurrentExplanation> concurrent_explanations = 4;
612
613  // ID of the Endpoint's DeployedModel that served this explanation.
614  string deployed_model_id = 2;
615
616  // The predictions that are the output of the predictions call.
617  // Same as
618  // [PredictResponse.predictions][google.cloud.aiplatform.v1beta1.PredictResponse.predictions].
619  repeated google.protobuf.Value predictions = 3;
620}
621
622// Request message for
623// [PredictionService.CountTokens][google.cloud.aiplatform.v1beta1.PredictionService.CountTokens].
624message CountTokensRequest {
625  // Required. The name of the Endpoint requested to perform token counting.
626  // Format:
627  // `projects/{project}/locations/{location}/endpoints/{endpoint}`
628  string endpoint = 1 [
629    (google.api.field_behavior) = REQUIRED,
630    (google.api.resource_reference) = {
631      type: "aiplatform.googleapis.com/Endpoint"
632    }
633  ];
634
635  // Required. The name of the publisher model requested to serve the
636  // prediction. Format:
637  // `projects/{project}/locations/{location}/publishers/*/models/*`
638  string model = 3 [(google.api.field_behavior) = REQUIRED];
639
640  // Required. The instances that are the input to token counting call.
641  // Schema is identical to the prediction schema of the underlying model.
642  repeated google.protobuf.Value instances = 2
643      [(google.api.field_behavior) = REQUIRED];
644
645  // Required. Input content.
646  repeated Content contents = 4 [(google.api.field_behavior) = REQUIRED];
647}
648
649// Response message for
650// [PredictionService.CountTokens][google.cloud.aiplatform.v1beta1.PredictionService.CountTokens].
651message CountTokensResponse {
652  // The total number of tokens counted across all instances from the request.
653  int32 total_tokens = 1;
654
655  // The total number of billable characters counted across all instances from
656  // the request.
657  int32 total_billable_characters = 2;
658}
659
660// Request message for [PredictionService.GenerateContent].
661message GenerateContentRequest {
662  // Required. The name of the publisher model requested to serve the
663  // prediction. Format:
664  // `projects/{project}/locations/{location}/publishers/*/models/*`
665  string model = 5 [(google.api.field_behavior) = REQUIRED];
666
667  // Required. The content of the current conversation with the model.
668  //
669  // For single-turn queries, this is a single instance. For multi-turn queries,
670  // this is a repeated field that contains conversation history + latest
671  // request.
672  repeated Content contents = 2 [(google.api.field_behavior) = REQUIRED];
673
674  // Optional. The user provided system instructions for the model.
675  // Note: only text should be used in parts and content in each part will be in
676  // a separate paragraph.
677  optional Content system_instruction = 8
678      [(google.api.field_behavior) = OPTIONAL];
679
680  // Optional. A list of `Tools` the model may use to generate the next
681  // response.
682  //
683  // A `Tool` is a piece of code that enables the system to interact with
684  // external systems to perform an action, or set of actions, outside of
685  // knowledge and scope of the model.
686  repeated Tool tools = 6 [(google.api.field_behavior) = OPTIONAL];
687
688  // Optional. Tool config. This config is shared for all tools provided in the
689  // request.
690  ToolConfig tool_config = 7 [(google.api.field_behavior) = OPTIONAL];
691
692  // Optional. Per request settings for blocking unsafe content.
693  // Enforced on GenerateContentResponse.candidates.
694  repeated SafetySetting safety_settings = 3
695      [(google.api.field_behavior) = OPTIONAL];
696
697  // Optional. Generation config.
698  GenerationConfig generation_config = 4
699      [(google.api.field_behavior) = OPTIONAL];
700}
701
702// Response message for [PredictionService.GenerateContent].
703message GenerateContentResponse {
704  // Content filter results for a prompt sent in the request.
705  message PromptFeedback {
706    // Blocked reason enumeration.
707    enum BlockedReason {
708      // Unspecified blocked reason.
709      BLOCKED_REASON_UNSPECIFIED = 0;
710
711      // Candidates blocked due to safety.
712      SAFETY = 1;
713
714      // Candidates blocked due to other reason.
715      OTHER = 2;
716
717      // Candidates blocked due to the terms which are included from the
718      // terminology blocklist.
719      BLOCKLIST = 3;
720
721      // Candidates blocked due to prohibited content.
722      PROHIBITED_CONTENT = 4;
723    }
724
725    // Output only. Blocked reason.
726    BlockedReason block_reason = 1 [(google.api.field_behavior) = OUTPUT_ONLY];
727
728    // Output only. Safety ratings.
729    repeated SafetyRating safety_ratings = 2
730        [(google.api.field_behavior) = OUTPUT_ONLY];
731
732    // Output only. A readable block reason message.
733    string block_reason_message = 3 [(google.api.field_behavior) = OUTPUT_ONLY];
734  }
735
736  // Usage metadata about response(s).
737  message UsageMetadata {
738    // Number of tokens in the request.
739    int32 prompt_token_count = 1;
740
741    // Number of tokens in the response(s).
742    int32 candidates_token_count = 2;
743
744    int32 total_token_count = 3;
745  }
746
747  // Output only. Generated candidates.
748  repeated Candidate candidates = 2 [(google.api.field_behavior) = OUTPUT_ONLY];
749
750  // Output only. Content filter results for a prompt sent in the request.
751  // Note: Sent only in the first stream chunk.
752  // Only happens when no candidates were generated due to content violations.
753  PromptFeedback prompt_feedback = 3
754      [(google.api.field_behavior) = OUTPUT_ONLY];
755
756  // Usage metadata about the response(s).
757  UsageMetadata usage_metadata = 4;
758}
759
760// Request message for [PredictionService.ChatCompletions]
761message ChatCompletionsRequest {
762  // Required. The name of the Endpoint requested to serve the prediction.
763  // Format:
764  // `projects/{project}/locations/{location}/endpoints/openapi`
765  string endpoint = 1 [
766    (google.api.field_behavior) = REQUIRED,
767    (google.api.resource_reference) = {
768      type: "aiplatform.googleapis.com/Endpoint"
769    }
770  ];
771
772  // Optional. The prediction input. Supports HTTP headers and arbitrary data
773  // payload.
774  google.api.HttpBody http_body = 2 [(google.api.field_behavior) = OPTIONAL];
775}
776