xref: /aosp_15_r20/external/googleapis/google/cloud/aiplatform/v1/prediction_service.proto (revision d5c09012810ac0c9f33fe448fb6da8260d444cc9)
1// Copyright 2023 Google LLC
2//
3// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at
6//
7//     http://www.apache.org/licenses/LICENSE-2.0
8//
9// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS,
11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12// See the License for the specific language governing permissions and
13// limitations under the License.
14
15syntax = "proto3";
16
17package google.cloud.aiplatform.v1;
18
19import "google/api/annotations.proto";
20import "google/api/client.proto";
21import "google/api/field_behavior.proto";
22import "google/api/httpbody.proto";
23import "google/api/resource.proto";
24import "google/cloud/aiplatform/v1/content.proto";
25import "google/cloud/aiplatform/v1/explanation.proto";
26import "google/cloud/aiplatform/v1/tool.proto";
27import "google/cloud/aiplatform/v1/types.proto";
28import "google/protobuf/struct.proto";
29
30option csharp_namespace = "Google.Cloud.AIPlatform.V1";
31option go_package = "cloud.google.com/go/aiplatform/apiv1/aiplatformpb;aiplatformpb";
32option java_multiple_files = true;
33option java_outer_classname = "PredictionServiceProto";
34option java_package = "com.google.cloud.aiplatform.v1";
35option php_namespace = "Google\\Cloud\\AIPlatform\\V1";
36option ruby_package = "Google::Cloud::AIPlatform::V1";
37
38// A service for online predictions and explanations.
39service PredictionService {
40  option (google.api.default_host) = "aiplatform.googleapis.com";
41  option (google.api.oauth_scopes) =
42      "https://www.googleapis.com/auth/cloud-platform";
43
44  // Perform an online prediction.
45  rpc Predict(PredictRequest) returns (PredictResponse) {
46    option (google.api.http) = {
47      post: "/v1/{endpoint=projects/*/locations/*/endpoints/*}:predict"
48      body: "*"
49      additional_bindings {
50        post: "/v1/{endpoint=projects/*/locations/*/publishers/*/models/*}:predict"
51        body: "*"
52      }
53    };
54    option (google.api.method_signature) = "endpoint,instances,parameters";
55  }
56
57  // Perform an online prediction with an arbitrary HTTP payload.
58  //
59  // The response includes the following HTTP headers:
60  //
61  // * `X-Vertex-AI-Endpoint-Id`: ID of the
62  // [Endpoint][google.cloud.aiplatform.v1.Endpoint] that served this
63  // prediction.
64  //
65  // * `X-Vertex-AI-Deployed-Model-Id`: ID of the Endpoint's
66  // [DeployedModel][google.cloud.aiplatform.v1.DeployedModel] that served this
67  // prediction.
68  rpc RawPredict(RawPredictRequest) returns (google.api.HttpBody) {
69    option (google.api.http) = {
70      post: "/v1/{endpoint=projects/*/locations/*/endpoints/*}:rawPredict"
71      body: "*"
72      additional_bindings {
73        post: "/v1/{endpoint=projects/*/locations/*/publishers/*/models/*}:rawPredict"
74        body: "*"
75      }
76    };
77    option (google.api.method_signature) = "endpoint,http_body";
78  }
79
80  // Perform a streaming online prediction with an arbitrary HTTP payload.
81  rpc StreamRawPredict(StreamRawPredictRequest)
82      returns (stream google.api.HttpBody) {
83    option (google.api.http) = {
84      post: "/v1/{endpoint=projects/*/locations/*/endpoints/*}:streamRawPredict"
85      body: "*"
86      additional_bindings {
87        post: "/v1/{endpoint=projects/*/locations/*/publishers/*/models/*}:streamRawPredict"
88        body: "*"
89      }
90    };
91    option (google.api.method_signature) = "endpoint,http_body";
92  }
93
94  // Perform an unary online prediction request to a gRPC model server for
95  // Vertex first-party products and frameworks.
96  rpc DirectPredict(DirectPredictRequest) returns (DirectPredictResponse) {
97    option (google.api.http) = {
98      post: "/v1/{endpoint=projects/*/locations/*/endpoints/*}:directPredict"
99      body: "*"
100    };
101  }
102
103  // Perform an unary online prediction request to a gRPC model server for
104  // custom containers.
105  rpc DirectRawPredict(DirectRawPredictRequest)
106      returns (DirectRawPredictResponse) {
107    option (google.api.http) = {
108      post: "/v1/{endpoint=projects/*/locations/*/endpoints/*}:directRawPredict"
109      body: "*"
110    };
111  }
112
113  // Perform a streaming online prediction request to a gRPC model server for
114  // Vertex first-party products and frameworks.
115  rpc StreamDirectPredict(stream StreamDirectPredictRequest)
116      returns (stream StreamDirectPredictResponse) {}
117
118  // Perform a streaming online prediction request to a gRPC model server for
119  // custom containers.
120  rpc StreamDirectRawPredict(stream StreamDirectRawPredictRequest)
121      returns (stream StreamDirectRawPredictResponse) {}
122
123  // Perform a streaming online prediction request for Vertex first-party
124  // products and frameworks.
125  rpc StreamingPredict(stream StreamingPredictRequest)
126      returns (stream StreamingPredictResponse) {}
127
128  // Perform a server-side streaming online prediction request for Vertex
129  // LLM streaming.
130  rpc ServerStreamingPredict(StreamingPredictRequest)
131      returns (stream StreamingPredictResponse) {
132    option (google.api.http) = {
133      post: "/v1/{endpoint=projects/*/locations/*/endpoints/*}:serverStreamingPredict"
134      body: "*"
135      additional_bindings {
136        post: "/v1/{endpoint=projects/*/locations/*/publishers/*/models/*}:serverStreamingPredict"
137        body: "*"
138      }
139    };
140  }
141
142  // Perform a streaming online prediction request through gRPC.
143  rpc StreamingRawPredict(stream StreamingRawPredictRequest)
144      returns (stream StreamingRawPredictResponse) {}
145
146  // Perform an online explanation.
147  //
148  // If
149  // [deployed_model_id][google.cloud.aiplatform.v1.ExplainRequest.deployed_model_id]
150  // is specified, the corresponding DeployModel must have
151  // [explanation_spec][google.cloud.aiplatform.v1.DeployedModel.explanation_spec]
152  // populated. If
153  // [deployed_model_id][google.cloud.aiplatform.v1.ExplainRequest.deployed_model_id]
154  // is not specified, all DeployedModels must have
155  // [explanation_spec][google.cloud.aiplatform.v1.DeployedModel.explanation_spec]
156  // populated.
157  rpc Explain(ExplainRequest) returns (ExplainResponse) {
158    option (google.api.http) = {
159      post: "/v1/{endpoint=projects/*/locations/*/endpoints/*}:explain"
160      body: "*"
161    };
162    option (google.api.method_signature) =
163        "endpoint,instances,parameters,deployed_model_id";
164  }
165
166  // Generate content with multimodal inputs.
167  rpc GenerateContent(GenerateContentRequest)
168      returns (GenerateContentResponse) {
169    option (google.api.http) = {
170      post: "/v1/{model=projects/*/locations/*/endpoints/*}:generateContent"
171      body: "*"
172      additional_bindings {
173        post: "/v1/{model=projects/*/locations/*/publishers/*/models/*}:generateContent"
174        body: "*"
175      }
176    };
177    option (google.api.method_signature) = "model,contents";
178  }
179
180  // Generate content with multimodal inputs with streaming support.
181  rpc StreamGenerateContent(GenerateContentRequest)
182      returns (stream GenerateContentResponse) {
183    option (google.api.http) = {
184      post: "/v1/{model=projects/*/locations/*/endpoints/*}:streamGenerateContent"
185      body: "*"
186      additional_bindings {
187        post: "/v1/{model=projects/*/locations/*/publishers/*/models/*}:streamGenerateContent"
188        body: "*"
189      }
190    };
191    option (google.api.method_signature) = "model,contents";
192  }
193}
194
195// Request message for
196// [PredictionService.Predict][google.cloud.aiplatform.v1.PredictionService.Predict].
197message PredictRequest {
198  // Required. The name of the Endpoint requested to serve the prediction.
199  // Format:
200  // `projects/{project}/locations/{location}/endpoints/{endpoint}`
201  string endpoint = 1 [
202    (google.api.field_behavior) = REQUIRED,
203    (google.api.resource_reference) = {
204      type: "aiplatform.googleapis.com/Endpoint"
205    }
206  ];
207
208  // Required. The instances that are the input to the prediction call.
209  // A DeployedModel may have an upper limit on the number of instances it
210  // supports per request, and when it is exceeded the prediction call errors
211  // in case of AutoML Models, or, in case of customer created Models, the
212  // behaviour is as documented by that Model.
213  // The schema of any single instance may be specified via Endpoint's
214  // DeployedModels' [Model's][google.cloud.aiplatform.v1.DeployedModel.model]
215  // [PredictSchemata's][google.cloud.aiplatform.v1.Model.predict_schemata]
216  // [instance_schema_uri][google.cloud.aiplatform.v1.PredictSchemata.instance_schema_uri].
217  repeated google.protobuf.Value instances = 2
218      [(google.api.field_behavior) = REQUIRED];
219
220  // The parameters that govern the prediction. The schema of the parameters may
221  // be specified via Endpoint's DeployedModels' [Model's
222  // ][google.cloud.aiplatform.v1.DeployedModel.model]
223  // [PredictSchemata's][google.cloud.aiplatform.v1.Model.predict_schemata]
224  // [parameters_schema_uri][google.cloud.aiplatform.v1.PredictSchemata.parameters_schema_uri].
225  google.protobuf.Value parameters = 3;
226}
227
228// Response message for
229// [PredictionService.Predict][google.cloud.aiplatform.v1.PredictionService.Predict].
230message PredictResponse {
231  // The predictions that are the output of the predictions call.
232  // The schema of any single prediction may be specified via Endpoint's
233  // DeployedModels' [Model's ][google.cloud.aiplatform.v1.DeployedModel.model]
234  // [PredictSchemata's][google.cloud.aiplatform.v1.Model.predict_schemata]
235  // [prediction_schema_uri][google.cloud.aiplatform.v1.PredictSchemata.prediction_schema_uri].
236  repeated google.protobuf.Value predictions = 1;
237
238  // ID of the Endpoint's DeployedModel that served this prediction.
239  string deployed_model_id = 2;
240
241  // Output only. The resource name of the Model which is deployed as the
242  // DeployedModel that this prediction hits.
243  string model = 3 [
244    (google.api.field_behavior) = OUTPUT_ONLY,
245    (google.api.resource_reference) = {
246      type: "aiplatform.googleapis.com/Model"
247    }
248  ];
249
250  // Output only. The version ID of the Model which is deployed as the
251  // DeployedModel that this prediction hits.
252  string model_version_id = 5 [(google.api.field_behavior) = OUTPUT_ONLY];
253
254  // Output only. The [display
255  // name][google.cloud.aiplatform.v1.Model.display_name] of the Model which is
256  // deployed as the DeployedModel that this prediction hits.
257  string model_display_name = 4 [(google.api.field_behavior) = OUTPUT_ONLY];
258
259  // Output only. Request-level metadata returned by the model. The metadata
260  // type will be dependent upon the model implementation.
261  google.protobuf.Value metadata = 6
262      [(google.api.field_behavior) = OUTPUT_ONLY];
263}
264
265// Request message for
266// [PredictionService.RawPredict][google.cloud.aiplatform.v1.PredictionService.RawPredict].
267message RawPredictRequest {
268  // Required. The name of the Endpoint requested to serve the prediction.
269  // Format:
270  // `projects/{project}/locations/{location}/endpoints/{endpoint}`
271  string endpoint = 1 [
272    (google.api.field_behavior) = REQUIRED,
273    (google.api.resource_reference) = {
274      type: "aiplatform.googleapis.com/Endpoint"
275    }
276  ];
277
278  // The prediction input. Supports HTTP headers and arbitrary data payload.
279  //
280  // A [DeployedModel][google.cloud.aiplatform.v1.DeployedModel] may have an
281  // upper limit on the number of instances it supports per request. When this
282  // limit it is exceeded for an AutoML model, the
283  // [RawPredict][google.cloud.aiplatform.v1.PredictionService.RawPredict]
284  // method returns an error. When this limit is exceeded for a custom-trained
285  // model, the behavior varies depending on the model.
286  //
287  // You can specify the schema for each instance in the
288  // [predict_schemata.instance_schema_uri][google.cloud.aiplatform.v1.PredictSchemata.instance_schema_uri]
289  // field when you create a [Model][google.cloud.aiplatform.v1.Model]. This
290  // schema applies when you deploy the `Model` as a `DeployedModel` to an
291  // [Endpoint][google.cloud.aiplatform.v1.Endpoint] and use the `RawPredict`
292  // method.
293  google.api.HttpBody http_body = 2;
294}
295
296// Request message for
297// [PredictionService.StreamRawPredict][google.cloud.aiplatform.v1.PredictionService.StreamRawPredict].
298message StreamRawPredictRequest {
299  // Required. The name of the Endpoint requested to serve the prediction.
300  // Format:
301  // `projects/{project}/locations/{location}/endpoints/{endpoint}`
302  string endpoint = 1 [
303    (google.api.field_behavior) = REQUIRED,
304    (google.api.resource_reference) = {
305      type: "aiplatform.googleapis.com/Endpoint"
306    }
307  ];
308
309  // The prediction input. Supports HTTP headers and arbitrary data payload.
310  google.api.HttpBody http_body = 2;
311}
312
313// Request message for
314// [PredictionService.DirectPredict][google.cloud.aiplatform.v1.PredictionService.DirectPredict].
315message DirectPredictRequest {
316  // Required. The name of the Endpoint requested to serve the prediction.
317  // Format:
318  // `projects/{project}/locations/{location}/endpoints/{endpoint}`
319  string endpoint = 1 [
320    (google.api.field_behavior) = REQUIRED,
321    (google.api.resource_reference) = {
322      type: "aiplatform.googleapis.com/Endpoint"
323    }
324  ];
325
326  // The prediction input.
327  repeated Tensor inputs = 2;
328
329  // The parameters that govern the prediction.
330  Tensor parameters = 3;
331}
332
333// Response message for
334// [PredictionService.DirectPredict][google.cloud.aiplatform.v1.PredictionService.DirectPredict].
335message DirectPredictResponse {
336  // The prediction output.
337  repeated Tensor outputs = 1;
338
339  // The parameters that govern the prediction.
340  Tensor parameters = 2;
341}
342
343// Request message for
344// [PredictionService.DirectRawPredict][google.cloud.aiplatform.v1.PredictionService.DirectRawPredict].
345message DirectRawPredictRequest {
346  // Required. The name of the Endpoint requested to serve the prediction.
347  // Format:
348  // `projects/{project}/locations/{location}/endpoints/{endpoint}`
349  string endpoint = 1 [
350    (google.api.field_behavior) = REQUIRED,
351    (google.api.resource_reference) = {
352      type: "aiplatform.googleapis.com/Endpoint"
353    }
354  ];
355
356  // Fully qualified name of the API method being invoked to perform
357  // predictions.
358  //
359  // Format:
360  // `/namespace.Service/Method/`
361  // Example:
362  // `/tensorflow.serving.PredictionService/Predict`
363  string method_name = 2;
364
365  // The prediction input.
366  bytes input = 3;
367}
368
369// Response message for
370// [PredictionService.DirectRawPredict][google.cloud.aiplatform.v1.PredictionService.DirectRawPredict].
371message DirectRawPredictResponse {
372  // The prediction output.
373  bytes output = 1;
374}
375
376// Request message for
377// [PredictionService.StreamDirectPredict][google.cloud.aiplatform.v1.PredictionService.StreamDirectPredict].
378//
379// The first message must contain
380// [endpoint][google.cloud.aiplatform.v1.StreamDirectPredictRequest.endpoint]
381// field and optionally [input][]. The subsequent messages must contain
382// [input][].
383message StreamDirectPredictRequest {
384  // Required. The name of the Endpoint requested to serve the prediction.
385  // Format:
386  // `projects/{project}/locations/{location}/endpoints/{endpoint}`
387  string endpoint = 1 [
388    (google.api.field_behavior) = REQUIRED,
389    (google.api.resource_reference) = {
390      type: "aiplatform.googleapis.com/Endpoint"
391    }
392  ];
393
394  // Optional. The prediction input.
395  repeated Tensor inputs = 2 [(google.api.field_behavior) = OPTIONAL];
396
397  // Optional. The parameters that govern the prediction.
398  Tensor parameters = 3 [(google.api.field_behavior) = OPTIONAL];
399}
400
401// Response message for
402// [PredictionService.StreamDirectPredict][google.cloud.aiplatform.v1.PredictionService.StreamDirectPredict].
403message StreamDirectPredictResponse {
404  // The prediction output.
405  repeated Tensor outputs = 1;
406
407  // The parameters that govern the prediction.
408  Tensor parameters = 2;
409}
410
411// Request message for
412// [PredictionService.StreamDirectRawPredict][google.cloud.aiplatform.v1.PredictionService.StreamDirectRawPredict].
413//
414// The first message must contain
415// [endpoint][google.cloud.aiplatform.v1.StreamDirectRawPredictRequest.endpoint]
416// and
417// [method_name][google.cloud.aiplatform.v1.StreamDirectRawPredictRequest.method_name]
418// fields and optionally
419// [input][google.cloud.aiplatform.v1.StreamDirectRawPredictRequest.input]. The
420// subsequent messages must contain
421// [input][google.cloud.aiplatform.v1.StreamDirectRawPredictRequest.input].
422// [method_name][google.cloud.aiplatform.v1.StreamDirectRawPredictRequest.method_name]
423// in the subsequent messages have no effect.
424message StreamDirectRawPredictRequest {
425  // Required. The name of the Endpoint requested to serve the prediction.
426  // Format:
427  // `projects/{project}/locations/{location}/endpoints/{endpoint}`
428  string endpoint = 1 [
429    (google.api.field_behavior) = REQUIRED,
430    (google.api.resource_reference) = {
431      type: "aiplatform.googleapis.com/Endpoint"
432    }
433  ];
434
435  // Optional. Fully qualified name of the API method being invoked to perform
436  // predictions.
437  //
438  // Format:
439  // `/namespace.Service/Method/`
440  // Example:
441  // `/tensorflow.serving.PredictionService/Predict`
442  string method_name = 2 [(google.api.field_behavior) = OPTIONAL];
443
444  // Optional. The prediction input.
445  bytes input = 3 [(google.api.field_behavior) = OPTIONAL];
446}
447
448// Response message for
449// [PredictionService.StreamDirectRawPredict][google.cloud.aiplatform.v1.PredictionService.StreamDirectRawPredict].
450message StreamDirectRawPredictResponse {
451  // The prediction output.
452  bytes output = 1;
453}
454
455// Request message for
456// [PredictionService.StreamingPredict][google.cloud.aiplatform.v1.PredictionService.StreamingPredict].
457//
458// The first message must contain
459// [endpoint][google.cloud.aiplatform.v1.StreamingPredictRequest.endpoint] field
460// and optionally [input][]. The subsequent messages must contain [input][].
461message StreamingPredictRequest {
462  // Required. The name of the Endpoint requested to serve the prediction.
463  // Format:
464  // `projects/{project}/locations/{location}/endpoints/{endpoint}`
465  string endpoint = 1 [
466    (google.api.field_behavior) = REQUIRED,
467    (google.api.resource_reference) = {
468      type: "aiplatform.googleapis.com/Endpoint"
469    }
470  ];
471
472  // The prediction input.
473  repeated Tensor inputs = 2;
474
475  // The parameters that govern the prediction.
476  Tensor parameters = 3;
477}
478
479// Response message for
480// [PredictionService.StreamingPredict][google.cloud.aiplatform.v1.PredictionService.StreamingPredict].
481message StreamingPredictResponse {
482  // The prediction output.
483  repeated Tensor outputs = 1;
484
485  // The parameters that govern the prediction.
486  Tensor parameters = 2;
487}
488
489// Request message for
490// [PredictionService.StreamingRawPredict][google.cloud.aiplatform.v1.PredictionService.StreamingRawPredict].
491//
492// The first message must contain
493// [endpoint][google.cloud.aiplatform.v1.StreamingRawPredictRequest.endpoint]
494// and
495// [method_name][google.cloud.aiplatform.v1.StreamingRawPredictRequest.method_name]
496// fields and optionally
497// [input][google.cloud.aiplatform.v1.StreamingRawPredictRequest.input]. The
498// subsequent messages must contain
499// [input][google.cloud.aiplatform.v1.StreamingRawPredictRequest.input].
500// [method_name][google.cloud.aiplatform.v1.StreamingRawPredictRequest.method_name]
501// in the subsequent messages have no effect.
502message StreamingRawPredictRequest {
503  // Required. The name of the Endpoint requested to serve the prediction.
504  // Format:
505  // `projects/{project}/locations/{location}/endpoints/{endpoint}`
506  string endpoint = 1 [
507    (google.api.field_behavior) = REQUIRED,
508    (google.api.resource_reference) = {
509      type: "aiplatform.googleapis.com/Endpoint"
510    }
511  ];
512
513  // Fully qualified name of the API method being invoked to perform
514  // predictions.
515  //
516  // Format:
517  // `/namespace.Service/Method/`
518  // Example:
519  // `/tensorflow.serving.PredictionService/Predict`
520  string method_name = 2;
521
522  // The prediction input.
523  bytes input = 3;
524}
525
526// Response message for
527// [PredictionService.StreamingRawPredict][google.cloud.aiplatform.v1.PredictionService.StreamingRawPredict].
528message StreamingRawPredictResponse {
529  // The prediction output.
530  bytes output = 1;
531}
532
533// Request message for
534// [PredictionService.Explain][google.cloud.aiplatform.v1.PredictionService.Explain].
535message ExplainRequest {
536  // Required. The name of the Endpoint requested to serve the explanation.
537  // Format:
538  // `projects/{project}/locations/{location}/endpoints/{endpoint}`
539  string endpoint = 1 [
540    (google.api.field_behavior) = REQUIRED,
541    (google.api.resource_reference) = {
542      type: "aiplatform.googleapis.com/Endpoint"
543    }
544  ];
545
546  // Required. The instances that are the input to the explanation call.
547  // A DeployedModel may have an upper limit on the number of instances it
548  // supports per request, and when it is exceeded the explanation call errors
549  // in case of AutoML Models, or, in case of customer created Models, the
550  // behaviour is as documented by that Model.
551  // The schema of any single instance may be specified via Endpoint's
552  // DeployedModels' [Model's][google.cloud.aiplatform.v1.DeployedModel.model]
553  // [PredictSchemata's][google.cloud.aiplatform.v1.Model.predict_schemata]
554  // [instance_schema_uri][google.cloud.aiplatform.v1.PredictSchemata.instance_schema_uri].
555  repeated google.protobuf.Value instances = 2
556      [(google.api.field_behavior) = REQUIRED];
557
558  // The parameters that govern the prediction. The schema of the parameters may
559  // be specified via Endpoint's DeployedModels' [Model's
560  // ][google.cloud.aiplatform.v1.DeployedModel.model]
561  // [PredictSchemata's][google.cloud.aiplatform.v1.Model.predict_schemata]
562  // [parameters_schema_uri][google.cloud.aiplatform.v1.PredictSchemata.parameters_schema_uri].
563  google.protobuf.Value parameters = 4;
564
565  // If specified, overrides the
566  // [explanation_spec][google.cloud.aiplatform.v1.DeployedModel.explanation_spec]
567  // of the DeployedModel. Can be used for explaining prediction results with
568  // different configurations, such as:
569  //  - Explaining top-5 predictions results as opposed to top-1;
570  //  - Increasing path count or step count of the attribution methods to reduce
571  //    approximate errors;
572  //  - Using different baselines for explaining the prediction results.
573  ExplanationSpecOverride explanation_spec_override = 5;
574
575  // If specified, this ExplainRequest will be served by the chosen
576  // DeployedModel, overriding
577  // [Endpoint.traffic_split][google.cloud.aiplatform.v1.Endpoint.traffic_split].
578  string deployed_model_id = 3;
579}
580
581// Response message for
582// [PredictionService.Explain][google.cloud.aiplatform.v1.PredictionService.Explain].
583message ExplainResponse {
584  // The explanations of the Model's
585  // [PredictResponse.predictions][google.cloud.aiplatform.v1.PredictResponse.predictions].
586  //
587  // It has the same number of elements as
588  // [instances][google.cloud.aiplatform.v1.ExplainRequest.instances] to be
589  // explained.
590  repeated Explanation explanations = 1;
591
592  // ID of the Endpoint's DeployedModel that served this explanation.
593  string deployed_model_id = 2;
594
595  // The predictions that are the output of the predictions call.
596  // Same as
597  // [PredictResponse.predictions][google.cloud.aiplatform.v1.PredictResponse.predictions].
598  repeated google.protobuf.Value predictions = 3;
599}
600
601// Request message for [PredictionService.CountTokens][].
602message CountTokensRequest {
603  // Required. The name of the Endpoint requested to perform token counting.
604  // Format:
605  // `projects/{project}/locations/{location}/endpoints/{endpoint}`
606  string endpoint = 1 [
607    (google.api.field_behavior) = REQUIRED,
608    (google.api.resource_reference) = {
609      type: "aiplatform.googleapis.com/Endpoint"
610    }
611  ];
612
613  // Required. The name of the publisher model requested to serve the
614  // prediction. Format:
615  // `projects/{project}/locations/{location}/publishers/*/models/*`
616  string model = 3 [(google.api.field_behavior) = REQUIRED];
617
618  // Required. The instances that are the input to token counting call.
619  // Schema is identical to the prediction schema of the underlying model.
620  repeated google.protobuf.Value instances = 2
621      [(google.api.field_behavior) = REQUIRED];
622
623  // Required. Input content.
624  repeated Content contents = 4 [(google.api.field_behavior) = REQUIRED];
625}
626
627// Response message for [PredictionService.CountTokens][].
628message CountTokensResponse {
629  // The total number of tokens counted across all instances from the request.
630  int32 total_tokens = 1;
631
632  // The total number of billable characters counted across all instances from
633  // the request.
634  int32 total_billable_characters = 2;
635}
636
637// Request message for [PredictionService.GenerateContent].
638message GenerateContentRequest {
639  // Required. The name of the publisher model requested to serve the
640  // prediction. Format:
641  // `projects/{project}/locations/{location}/publishers/*/models/*`
642  string model = 5 [(google.api.field_behavior) = REQUIRED];
643
644  // Required. The content of the current conversation with the model.
645  //
646  // For single-turn queries, this is a single instance. For multi-turn queries,
647  // this is a repeated field that contains conversation history + latest
648  // request.
649  repeated Content contents = 2 [(google.api.field_behavior) = REQUIRED];
650
651  // Optional. The user provided system instructions for the model.
652  // Note: only text should be used in parts and content in each part will be in
653  // a separate paragraph.
654  optional Content system_instruction = 8
655      [(google.api.field_behavior) = OPTIONAL];
656
657  // Optional. A list of `Tools` the model may use to generate the next
658  // response.
659  //
660  // A `Tool` is a piece of code that enables the system to interact with
661  // external systems to perform an action, or set of actions, outside of
662  // knowledge and scope of the model.
663  repeated Tool tools = 6 [(google.api.field_behavior) = OPTIONAL];
664
665  // Optional. Per request settings for blocking unsafe content.
666  // Enforced on GenerateContentResponse.candidates.
667  repeated SafetySetting safety_settings = 3
668      [(google.api.field_behavior) = OPTIONAL];
669
670  // Optional. Generation config.
671  GenerationConfig generation_config = 4
672      [(google.api.field_behavior) = OPTIONAL];
673}
674
675// Response message for [PredictionService.GenerateContent].
676message GenerateContentResponse {
677  // Content filter results for a prompt sent in the request.
678  message PromptFeedback {
679    // Blocked reason enumeration.
680    enum BlockedReason {
681      // Unspecified blocked reason.
682      BLOCKED_REASON_UNSPECIFIED = 0;
683
684      // Candidates blocked due to safety.
685      SAFETY = 1;
686
687      // Candidates blocked due to other reason.
688      OTHER = 2;
689
690      // Candidates blocked due to the terms which are included from the
691      // terminology blocklist.
692      BLOCKLIST = 3;
693
694      // Candidates blocked due to prohibited content.
695      PROHIBITED_CONTENT = 4;
696    }
697
698    // Output only. Blocked reason.
699    BlockedReason block_reason = 1 [(google.api.field_behavior) = OUTPUT_ONLY];
700
701    // Output only. Safety ratings.
702    repeated SafetyRating safety_ratings = 2
703        [(google.api.field_behavior) = OUTPUT_ONLY];
704
705    // Output only. A readable block reason message.
706    string block_reason_message = 3 [(google.api.field_behavior) = OUTPUT_ONLY];
707  }
708
709  // Usage metadata about response(s).
710  message UsageMetadata {
711    // Number of tokens in the request.
712    int32 prompt_token_count = 1;
713
714    // Number of tokens in the response(s).
715    int32 candidates_token_count = 2;
716
717    int32 total_token_count = 3;
718  }
719
720  // Output only. Generated candidates.
721  repeated Candidate candidates = 2 [(google.api.field_behavior) = OUTPUT_ONLY];
722
723  // Output only. Content filter results for a prompt sent in the request.
724  // Note: Sent only in the first stream chunk.
725  // Only happens when no candidates were generated due to content violations.
726  PromptFeedback prompt_feedback = 3
727      [(google.api.field_behavior) = OUTPUT_ONLY];
728
729  // Usage metadata about the response(s).
730  UsageMetadata usage_metadata = 4;
731}
732