1// Copyright 2023 Google LLC 2// 3// Licensed under the Apache License, Version 2.0 (the "License"); 4// you may not use this file except in compliance with the License. 5// You may obtain a copy of the License at 6// 7// http://www.apache.org/licenses/LICENSE-2.0 8// 9// Unless required by applicable law or agreed to in writing, software 10// distributed under the License is distributed on an "AS IS" BASIS, 11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12// See the License for the specific language governing permissions and 13// limitations under the License. 14 15syntax = "proto3"; 16 17package google.cloud.aiplatform.v1; 18 19import "google/api/annotations.proto"; 20import "google/api/client.proto"; 21import "google/api/field_behavior.proto"; 22import "google/api/httpbody.proto"; 23import "google/api/resource.proto"; 24import "google/cloud/aiplatform/v1/content.proto"; 25import "google/cloud/aiplatform/v1/explanation.proto"; 26import "google/cloud/aiplatform/v1/tool.proto"; 27import "google/cloud/aiplatform/v1/types.proto"; 28import "google/protobuf/struct.proto"; 29 30option csharp_namespace = "Google.Cloud.AIPlatform.V1"; 31option go_package = "cloud.google.com/go/aiplatform/apiv1/aiplatformpb;aiplatformpb"; 32option java_multiple_files = true; 33option java_outer_classname = "PredictionServiceProto"; 34option java_package = "com.google.cloud.aiplatform.v1"; 35option php_namespace = "Google\\Cloud\\AIPlatform\\V1"; 36option ruby_package = "Google::Cloud::AIPlatform::V1"; 37 38// A service for online predictions and explanations. 39service PredictionService { 40 option (google.api.default_host) = "aiplatform.googleapis.com"; 41 option (google.api.oauth_scopes) = 42 "https://www.googleapis.com/auth/cloud-platform"; 43 44 // Perform an online prediction. 45 rpc Predict(PredictRequest) returns (PredictResponse) { 46 option (google.api.http) = { 47 post: "/v1/{endpoint=projects/*/locations/*/endpoints/*}:predict" 48 body: "*" 49 additional_bindings { 50 post: "/v1/{endpoint=projects/*/locations/*/publishers/*/models/*}:predict" 51 body: "*" 52 } 53 }; 54 option (google.api.method_signature) = "endpoint,instances,parameters"; 55 } 56 57 // Perform an online prediction with an arbitrary HTTP payload. 58 // 59 // The response includes the following HTTP headers: 60 // 61 // * `X-Vertex-AI-Endpoint-Id`: ID of the 62 // [Endpoint][google.cloud.aiplatform.v1.Endpoint] that served this 63 // prediction. 64 // 65 // * `X-Vertex-AI-Deployed-Model-Id`: ID of the Endpoint's 66 // [DeployedModel][google.cloud.aiplatform.v1.DeployedModel] that served this 67 // prediction. 68 rpc RawPredict(RawPredictRequest) returns (google.api.HttpBody) { 69 option (google.api.http) = { 70 post: "/v1/{endpoint=projects/*/locations/*/endpoints/*}:rawPredict" 71 body: "*" 72 additional_bindings { 73 post: "/v1/{endpoint=projects/*/locations/*/publishers/*/models/*}:rawPredict" 74 body: "*" 75 } 76 }; 77 option (google.api.method_signature) = "endpoint,http_body"; 78 } 79 80 // Perform a streaming online prediction with an arbitrary HTTP payload. 81 rpc StreamRawPredict(StreamRawPredictRequest) 82 returns (stream google.api.HttpBody) { 83 option (google.api.http) = { 84 post: "/v1/{endpoint=projects/*/locations/*/endpoints/*}:streamRawPredict" 85 body: "*" 86 additional_bindings { 87 post: "/v1/{endpoint=projects/*/locations/*/publishers/*/models/*}:streamRawPredict" 88 body: "*" 89 } 90 }; 91 option (google.api.method_signature) = "endpoint,http_body"; 92 } 93 94 // Perform an unary online prediction request to a gRPC model server for 95 // Vertex first-party products and frameworks. 96 rpc DirectPredict(DirectPredictRequest) returns (DirectPredictResponse) { 97 option (google.api.http) = { 98 post: "/v1/{endpoint=projects/*/locations/*/endpoints/*}:directPredict" 99 body: "*" 100 }; 101 } 102 103 // Perform an unary online prediction request to a gRPC model server for 104 // custom containers. 105 rpc DirectRawPredict(DirectRawPredictRequest) 106 returns (DirectRawPredictResponse) { 107 option (google.api.http) = { 108 post: "/v1/{endpoint=projects/*/locations/*/endpoints/*}:directRawPredict" 109 body: "*" 110 }; 111 } 112 113 // Perform a streaming online prediction request to a gRPC model server for 114 // Vertex first-party products and frameworks. 115 rpc StreamDirectPredict(stream StreamDirectPredictRequest) 116 returns (stream StreamDirectPredictResponse) {} 117 118 // Perform a streaming online prediction request to a gRPC model server for 119 // custom containers. 120 rpc StreamDirectRawPredict(stream StreamDirectRawPredictRequest) 121 returns (stream StreamDirectRawPredictResponse) {} 122 123 // Perform a streaming online prediction request for Vertex first-party 124 // products and frameworks. 125 rpc StreamingPredict(stream StreamingPredictRequest) 126 returns (stream StreamingPredictResponse) {} 127 128 // Perform a server-side streaming online prediction request for Vertex 129 // LLM streaming. 130 rpc ServerStreamingPredict(StreamingPredictRequest) 131 returns (stream StreamingPredictResponse) { 132 option (google.api.http) = { 133 post: "/v1/{endpoint=projects/*/locations/*/endpoints/*}:serverStreamingPredict" 134 body: "*" 135 additional_bindings { 136 post: "/v1/{endpoint=projects/*/locations/*/publishers/*/models/*}:serverStreamingPredict" 137 body: "*" 138 } 139 }; 140 } 141 142 // Perform a streaming online prediction request through gRPC. 143 rpc StreamingRawPredict(stream StreamingRawPredictRequest) 144 returns (stream StreamingRawPredictResponse) {} 145 146 // Perform an online explanation. 147 // 148 // If 149 // [deployed_model_id][google.cloud.aiplatform.v1.ExplainRequest.deployed_model_id] 150 // is specified, the corresponding DeployModel must have 151 // [explanation_spec][google.cloud.aiplatform.v1.DeployedModel.explanation_spec] 152 // populated. If 153 // [deployed_model_id][google.cloud.aiplatform.v1.ExplainRequest.deployed_model_id] 154 // is not specified, all DeployedModels must have 155 // [explanation_spec][google.cloud.aiplatform.v1.DeployedModel.explanation_spec] 156 // populated. 157 rpc Explain(ExplainRequest) returns (ExplainResponse) { 158 option (google.api.http) = { 159 post: "/v1/{endpoint=projects/*/locations/*/endpoints/*}:explain" 160 body: "*" 161 }; 162 option (google.api.method_signature) = 163 "endpoint,instances,parameters,deployed_model_id"; 164 } 165 166 // Generate content with multimodal inputs. 167 rpc GenerateContent(GenerateContentRequest) 168 returns (GenerateContentResponse) { 169 option (google.api.http) = { 170 post: "/v1/{model=projects/*/locations/*/endpoints/*}:generateContent" 171 body: "*" 172 additional_bindings { 173 post: "/v1/{model=projects/*/locations/*/publishers/*/models/*}:generateContent" 174 body: "*" 175 } 176 }; 177 option (google.api.method_signature) = "model,contents"; 178 } 179 180 // Generate content with multimodal inputs with streaming support. 181 rpc StreamGenerateContent(GenerateContentRequest) 182 returns (stream GenerateContentResponse) { 183 option (google.api.http) = { 184 post: "/v1/{model=projects/*/locations/*/endpoints/*}:streamGenerateContent" 185 body: "*" 186 additional_bindings { 187 post: "/v1/{model=projects/*/locations/*/publishers/*/models/*}:streamGenerateContent" 188 body: "*" 189 } 190 }; 191 option (google.api.method_signature) = "model,contents"; 192 } 193} 194 195// Request message for 196// [PredictionService.Predict][google.cloud.aiplatform.v1.PredictionService.Predict]. 197message PredictRequest { 198 // Required. The name of the Endpoint requested to serve the prediction. 199 // Format: 200 // `projects/{project}/locations/{location}/endpoints/{endpoint}` 201 string endpoint = 1 [ 202 (google.api.field_behavior) = REQUIRED, 203 (google.api.resource_reference) = { 204 type: "aiplatform.googleapis.com/Endpoint" 205 } 206 ]; 207 208 // Required. The instances that are the input to the prediction call. 209 // A DeployedModel may have an upper limit on the number of instances it 210 // supports per request, and when it is exceeded the prediction call errors 211 // in case of AutoML Models, or, in case of customer created Models, the 212 // behaviour is as documented by that Model. 213 // The schema of any single instance may be specified via Endpoint's 214 // DeployedModels' [Model's][google.cloud.aiplatform.v1.DeployedModel.model] 215 // [PredictSchemata's][google.cloud.aiplatform.v1.Model.predict_schemata] 216 // [instance_schema_uri][google.cloud.aiplatform.v1.PredictSchemata.instance_schema_uri]. 217 repeated google.protobuf.Value instances = 2 218 [(google.api.field_behavior) = REQUIRED]; 219 220 // The parameters that govern the prediction. The schema of the parameters may 221 // be specified via Endpoint's DeployedModels' [Model's 222 // ][google.cloud.aiplatform.v1.DeployedModel.model] 223 // [PredictSchemata's][google.cloud.aiplatform.v1.Model.predict_schemata] 224 // [parameters_schema_uri][google.cloud.aiplatform.v1.PredictSchemata.parameters_schema_uri]. 225 google.protobuf.Value parameters = 3; 226} 227 228// Response message for 229// [PredictionService.Predict][google.cloud.aiplatform.v1.PredictionService.Predict]. 230message PredictResponse { 231 // The predictions that are the output of the predictions call. 232 // The schema of any single prediction may be specified via Endpoint's 233 // DeployedModels' [Model's ][google.cloud.aiplatform.v1.DeployedModel.model] 234 // [PredictSchemata's][google.cloud.aiplatform.v1.Model.predict_schemata] 235 // [prediction_schema_uri][google.cloud.aiplatform.v1.PredictSchemata.prediction_schema_uri]. 236 repeated google.protobuf.Value predictions = 1; 237 238 // ID of the Endpoint's DeployedModel that served this prediction. 239 string deployed_model_id = 2; 240 241 // Output only. The resource name of the Model which is deployed as the 242 // DeployedModel that this prediction hits. 243 string model = 3 [ 244 (google.api.field_behavior) = OUTPUT_ONLY, 245 (google.api.resource_reference) = { 246 type: "aiplatform.googleapis.com/Model" 247 } 248 ]; 249 250 // Output only. The version ID of the Model which is deployed as the 251 // DeployedModel that this prediction hits. 252 string model_version_id = 5 [(google.api.field_behavior) = OUTPUT_ONLY]; 253 254 // Output only. The [display 255 // name][google.cloud.aiplatform.v1.Model.display_name] of the Model which is 256 // deployed as the DeployedModel that this prediction hits. 257 string model_display_name = 4 [(google.api.field_behavior) = OUTPUT_ONLY]; 258 259 // Output only. Request-level metadata returned by the model. The metadata 260 // type will be dependent upon the model implementation. 261 google.protobuf.Value metadata = 6 262 [(google.api.field_behavior) = OUTPUT_ONLY]; 263} 264 265// Request message for 266// [PredictionService.RawPredict][google.cloud.aiplatform.v1.PredictionService.RawPredict]. 267message RawPredictRequest { 268 // Required. The name of the Endpoint requested to serve the prediction. 269 // Format: 270 // `projects/{project}/locations/{location}/endpoints/{endpoint}` 271 string endpoint = 1 [ 272 (google.api.field_behavior) = REQUIRED, 273 (google.api.resource_reference) = { 274 type: "aiplatform.googleapis.com/Endpoint" 275 } 276 ]; 277 278 // The prediction input. Supports HTTP headers and arbitrary data payload. 279 // 280 // A [DeployedModel][google.cloud.aiplatform.v1.DeployedModel] may have an 281 // upper limit on the number of instances it supports per request. When this 282 // limit it is exceeded for an AutoML model, the 283 // [RawPredict][google.cloud.aiplatform.v1.PredictionService.RawPredict] 284 // method returns an error. When this limit is exceeded for a custom-trained 285 // model, the behavior varies depending on the model. 286 // 287 // You can specify the schema for each instance in the 288 // [predict_schemata.instance_schema_uri][google.cloud.aiplatform.v1.PredictSchemata.instance_schema_uri] 289 // field when you create a [Model][google.cloud.aiplatform.v1.Model]. This 290 // schema applies when you deploy the `Model` as a `DeployedModel` to an 291 // [Endpoint][google.cloud.aiplatform.v1.Endpoint] and use the `RawPredict` 292 // method. 293 google.api.HttpBody http_body = 2; 294} 295 296// Request message for 297// [PredictionService.StreamRawPredict][google.cloud.aiplatform.v1.PredictionService.StreamRawPredict]. 298message StreamRawPredictRequest { 299 // Required. The name of the Endpoint requested to serve the prediction. 300 // Format: 301 // `projects/{project}/locations/{location}/endpoints/{endpoint}` 302 string endpoint = 1 [ 303 (google.api.field_behavior) = REQUIRED, 304 (google.api.resource_reference) = { 305 type: "aiplatform.googleapis.com/Endpoint" 306 } 307 ]; 308 309 // The prediction input. Supports HTTP headers and arbitrary data payload. 310 google.api.HttpBody http_body = 2; 311} 312 313// Request message for 314// [PredictionService.DirectPredict][google.cloud.aiplatform.v1.PredictionService.DirectPredict]. 315message DirectPredictRequest { 316 // Required. The name of the Endpoint requested to serve the prediction. 317 // Format: 318 // `projects/{project}/locations/{location}/endpoints/{endpoint}` 319 string endpoint = 1 [ 320 (google.api.field_behavior) = REQUIRED, 321 (google.api.resource_reference) = { 322 type: "aiplatform.googleapis.com/Endpoint" 323 } 324 ]; 325 326 // The prediction input. 327 repeated Tensor inputs = 2; 328 329 // The parameters that govern the prediction. 330 Tensor parameters = 3; 331} 332 333// Response message for 334// [PredictionService.DirectPredict][google.cloud.aiplatform.v1.PredictionService.DirectPredict]. 335message DirectPredictResponse { 336 // The prediction output. 337 repeated Tensor outputs = 1; 338 339 // The parameters that govern the prediction. 340 Tensor parameters = 2; 341} 342 343// Request message for 344// [PredictionService.DirectRawPredict][google.cloud.aiplatform.v1.PredictionService.DirectRawPredict]. 345message DirectRawPredictRequest { 346 // Required. The name of the Endpoint requested to serve the prediction. 347 // Format: 348 // `projects/{project}/locations/{location}/endpoints/{endpoint}` 349 string endpoint = 1 [ 350 (google.api.field_behavior) = REQUIRED, 351 (google.api.resource_reference) = { 352 type: "aiplatform.googleapis.com/Endpoint" 353 } 354 ]; 355 356 // Fully qualified name of the API method being invoked to perform 357 // predictions. 358 // 359 // Format: 360 // `/namespace.Service/Method/` 361 // Example: 362 // `/tensorflow.serving.PredictionService/Predict` 363 string method_name = 2; 364 365 // The prediction input. 366 bytes input = 3; 367} 368 369// Response message for 370// [PredictionService.DirectRawPredict][google.cloud.aiplatform.v1.PredictionService.DirectRawPredict]. 371message DirectRawPredictResponse { 372 // The prediction output. 373 bytes output = 1; 374} 375 376// Request message for 377// [PredictionService.StreamDirectPredict][google.cloud.aiplatform.v1.PredictionService.StreamDirectPredict]. 378// 379// The first message must contain 380// [endpoint][google.cloud.aiplatform.v1.StreamDirectPredictRequest.endpoint] 381// field and optionally [input][]. The subsequent messages must contain 382// [input][]. 383message StreamDirectPredictRequest { 384 // Required. The name of the Endpoint requested to serve the prediction. 385 // Format: 386 // `projects/{project}/locations/{location}/endpoints/{endpoint}` 387 string endpoint = 1 [ 388 (google.api.field_behavior) = REQUIRED, 389 (google.api.resource_reference) = { 390 type: "aiplatform.googleapis.com/Endpoint" 391 } 392 ]; 393 394 // Optional. The prediction input. 395 repeated Tensor inputs = 2 [(google.api.field_behavior) = OPTIONAL]; 396 397 // Optional. The parameters that govern the prediction. 398 Tensor parameters = 3 [(google.api.field_behavior) = OPTIONAL]; 399} 400 401// Response message for 402// [PredictionService.StreamDirectPredict][google.cloud.aiplatform.v1.PredictionService.StreamDirectPredict]. 403message StreamDirectPredictResponse { 404 // The prediction output. 405 repeated Tensor outputs = 1; 406 407 // The parameters that govern the prediction. 408 Tensor parameters = 2; 409} 410 411// Request message for 412// [PredictionService.StreamDirectRawPredict][google.cloud.aiplatform.v1.PredictionService.StreamDirectRawPredict]. 413// 414// The first message must contain 415// [endpoint][google.cloud.aiplatform.v1.StreamDirectRawPredictRequest.endpoint] 416// and 417// [method_name][google.cloud.aiplatform.v1.StreamDirectRawPredictRequest.method_name] 418// fields and optionally 419// [input][google.cloud.aiplatform.v1.StreamDirectRawPredictRequest.input]. The 420// subsequent messages must contain 421// [input][google.cloud.aiplatform.v1.StreamDirectRawPredictRequest.input]. 422// [method_name][google.cloud.aiplatform.v1.StreamDirectRawPredictRequest.method_name] 423// in the subsequent messages have no effect. 424message StreamDirectRawPredictRequest { 425 // Required. The name of the Endpoint requested to serve the prediction. 426 // Format: 427 // `projects/{project}/locations/{location}/endpoints/{endpoint}` 428 string endpoint = 1 [ 429 (google.api.field_behavior) = REQUIRED, 430 (google.api.resource_reference) = { 431 type: "aiplatform.googleapis.com/Endpoint" 432 } 433 ]; 434 435 // Optional. Fully qualified name of the API method being invoked to perform 436 // predictions. 437 // 438 // Format: 439 // `/namespace.Service/Method/` 440 // Example: 441 // `/tensorflow.serving.PredictionService/Predict` 442 string method_name = 2 [(google.api.field_behavior) = OPTIONAL]; 443 444 // Optional. The prediction input. 445 bytes input = 3 [(google.api.field_behavior) = OPTIONAL]; 446} 447 448// Response message for 449// [PredictionService.StreamDirectRawPredict][google.cloud.aiplatform.v1.PredictionService.StreamDirectRawPredict]. 450message StreamDirectRawPredictResponse { 451 // The prediction output. 452 bytes output = 1; 453} 454 455// Request message for 456// [PredictionService.StreamingPredict][google.cloud.aiplatform.v1.PredictionService.StreamingPredict]. 457// 458// The first message must contain 459// [endpoint][google.cloud.aiplatform.v1.StreamingPredictRequest.endpoint] field 460// and optionally [input][]. The subsequent messages must contain [input][]. 461message StreamingPredictRequest { 462 // Required. The name of the Endpoint requested to serve the prediction. 463 // Format: 464 // `projects/{project}/locations/{location}/endpoints/{endpoint}` 465 string endpoint = 1 [ 466 (google.api.field_behavior) = REQUIRED, 467 (google.api.resource_reference) = { 468 type: "aiplatform.googleapis.com/Endpoint" 469 } 470 ]; 471 472 // The prediction input. 473 repeated Tensor inputs = 2; 474 475 // The parameters that govern the prediction. 476 Tensor parameters = 3; 477} 478 479// Response message for 480// [PredictionService.StreamingPredict][google.cloud.aiplatform.v1.PredictionService.StreamingPredict]. 481message StreamingPredictResponse { 482 // The prediction output. 483 repeated Tensor outputs = 1; 484 485 // The parameters that govern the prediction. 486 Tensor parameters = 2; 487} 488 489// Request message for 490// [PredictionService.StreamingRawPredict][google.cloud.aiplatform.v1.PredictionService.StreamingRawPredict]. 491// 492// The first message must contain 493// [endpoint][google.cloud.aiplatform.v1.StreamingRawPredictRequest.endpoint] 494// and 495// [method_name][google.cloud.aiplatform.v1.StreamingRawPredictRequest.method_name] 496// fields and optionally 497// [input][google.cloud.aiplatform.v1.StreamingRawPredictRequest.input]. The 498// subsequent messages must contain 499// [input][google.cloud.aiplatform.v1.StreamingRawPredictRequest.input]. 500// [method_name][google.cloud.aiplatform.v1.StreamingRawPredictRequest.method_name] 501// in the subsequent messages have no effect. 502message StreamingRawPredictRequest { 503 // Required. The name of the Endpoint requested to serve the prediction. 504 // Format: 505 // `projects/{project}/locations/{location}/endpoints/{endpoint}` 506 string endpoint = 1 [ 507 (google.api.field_behavior) = REQUIRED, 508 (google.api.resource_reference) = { 509 type: "aiplatform.googleapis.com/Endpoint" 510 } 511 ]; 512 513 // Fully qualified name of the API method being invoked to perform 514 // predictions. 515 // 516 // Format: 517 // `/namespace.Service/Method/` 518 // Example: 519 // `/tensorflow.serving.PredictionService/Predict` 520 string method_name = 2; 521 522 // The prediction input. 523 bytes input = 3; 524} 525 526// Response message for 527// [PredictionService.StreamingRawPredict][google.cloud.aiplatform.v1.PredictionService.StreamingRawPredict]. 528message StreamingRawPredictResponse { 529 // The prediction output. 530 bytes output = 1; 531} 532 533// Request message for 534// [PredictionService.Explain][google.cloud.aiplatform.v1.PredictionService.Explain]. 535message ExplainRequest { 536 // Required. The name of the Endpoint requested to serve the explanation. 537 // Format: 538 // `projects/{project}/locations/{location}/endpoints/{endpoint}` 539 string endpoint = 1 [ 540 (google.api.field_behavior) = REQUIRED, 541 (google.api.resource_reference) = { 542 type: "aiplatform.googleapis.com/Endpoint" 543 } 544 ]; 545 546 // Required. The instances that are the input to the explanation call. 547 // A DeployedModel may have an upper limit on the number of instances it 548 // supports per request, and when it is exceeded the explanation call errors 549 // in case of AutoML Models, or, in case of customer created Models, the 550 // behaviour is as documented by that Model. 551 // The schema of any single instance may be specified via Endpoint's 552 // DeployedModels' [Model's][google.cloud.aiplatform.v1.DeployedModel.model] 553 // [PredictSchemata's][google.cloud.aiplatform.v1.Model.predict_schemata] 554 // [instance_schema_uri][google.cloud.aiplatform.v1.PredictSchemata.instance_schema_uri]. 555 repeated google.protobuf.Value instances = 2 556 [(google.api.field_behavior) = REQUIRED]; 557 558 // The parameters that govern the prediction. The schema of the parameters may 559 // be specified via Endpoint's DeployedModels' [Model's 560 // ][google.cloud.aiplatform.v1.DeployedModel.model] 561 // [PredictSchemata's][google.cloud.aiplatform.v1.Model.predict_schemata] 562 // [parameters_schema_uri][google.cloud.aiplatform.v1.PredictSchemata.parameters_schema_uri]. 563 google.protobuf.Value parameters = 4; 564 565 // If specified, overrides the 566 // [explanation_spec][google.cloud.aiplatform.v1.DeployedModel.explanation_spec] 567 // of the DeployedModel. Can be used for explaining prediction results with 568 // different configurations, such as: 569 // - Explaining top-5 predictions results as opposed to top-1; 570 // - Increasing path count or step count of the attribution methods to reduce 571 // approximate errors; 572 // - Using different baselines for explaining the prediction results. 573 ExplanationSpecOverride explanation_spec_override = 5; 574 575 // If specified, this ExplainRequest will be served by the chosen 576 // DeployedModel, overriding 577 // [Endpoint.traffic_split][google.cloud.aiplatform.v1.Endpoint.traffic_split]. 578 string deployed_model_id = 3; 579} 580 581// Response message for 582// [PredictionService.Explain][google.cloud.aiplatform.v1.PredictionService.Explain]. 583message ExplainResponse { 584 // The explanations of the Model's 585 // [PredictResponse.predictions][google.cloud.aiplatform.v1.PredictResponse.predictions]. 586 // 587 // It has the same number of elements as 588 // [instances][google.cloud.aiplatform.v1.ExplainRequest.instances] to be 589 // explained. 590 repeated Explanation explanations = 1; 591 592 // ID of the Endpoint's DeployedModel that served this explanation. 593 string deployed_model_id = 2; 594 595 // The predictions that are the output of the predictions call. 596 // Same as 597 // [PredictResponse.predictions][google.cloud.aiplatform.v1.PredictResponse.predictions]. 598 repeated google.protobuf.Value predictions = 3; 599} 600 601// Request message for [PredictionService.CountTokens][]. 602message CountTokensRequest { 603 // Required. The name of the Endpoint requested to perform token counting. 604 // Format: 605 // `projects/{project}/locations/{location}/endpoints/{endpoint}` 606 string endpoint = 1 [ 607 (google.api.field_behavior) = REQUIRED, 608 (google.api.resource_reference) = { 609 type: "aiplatform.googleapis.com/Endpoint" 610 } 611 ]; 612 613 // Required. The name of the publisher model requested to serve the 614 // prediction. Format: 615 // `projects/{project}/locations/{location}/publishers/*/models/*` 616 string model = 3 [(google.api.field_behavior) = REQUIRED]; 617 618 // Required. The instances that are the input to token counting call. 619 // Schema is identical to the prediction schema of the underlying model. 620 repeated google.protobuf.Value instances = 2 621 [(google.api.field_behavior) = REQUIRED]; 622 623 // Required. Input content. 624 repeated Content contents = 4 [(google.api.field_behavior) = REQUIRED]; 625} 626 627// Response message for [PredictionService.CountTokens][]. 628message CountTokensResponse { 629 // The total number of tokens counted across all instances from the request. 630 int32 total_tokens = 1; 631 632 // The total number of billable characters counted across all instances from 633 // the request. 634 int32 total_billable_characters = 2; 635} 636 637// Request message for [PredictionService.GenerateContent]. 638message GenerateContentRequest { 639 // Required. The name of the publisher model requested to serve the 640 // prediction. Format: 641 // `projects/{project}/locations/{location}/publishers/*/models/*` 642 string model = 5 [(google.api.field_behavior) = REQUIRED]; 643 644 // Required. The content of the current conversation with the model. 645 // 646 // For single-turn queries, this is a single instance. For multi-turn queries, 647 // this is a repeated field that contains conversation history + latest 648 // request. 649 repeated Content contents = 2 [(google.api.field_behavior) = REQUIRED]; 650 651 // Optional. The user provided system instructions for the model. 652 // Note: only text should be used in parts and content in each part will be in 653 // a separate paragraph. 654 optional Content system_instruction = 8 655 [(google.api.field_behavior) = OPTIONAL]; 656 657 // Optional. A list of `Tools` the model may use to generate the next 658 // response. 659 // 660 // A `Tool` is a piece of code that enables the system to interact with 661 // external systems to perform an action, or set of actions, outside of 662 // knowledge and scope of the model. 663 repeated Tool tools = 6 [(google.api.field_behavior) = OPTIONAL]; 664 665 // Optional. Per request settings for blocking unsafe content. 666 // Enforced on GenerateContentResponse.candidates. 667 repeated SafetySetting safety_settings = 3 668 [(google.api.field_behavior) = OPTIONAL]; 669 670 // Optional. Generation config. 671 GenerationConfig generation_config = 4 672 [(google.api.field_behavior) = OPTIONAL]; 673} 674 675// Response message for [PredictionService.GenerateContent]. 676message GenerateContentResponse { 677 // Content filter results for a prompt sent in the request. 678 message PromptFeedback { 679 // Blocked reason enumeration. 680 enum BlockedReason { 681 // Unspecified blocked reason. 682 BLOCKED_REASON_UNSPECIFIED = 0; 683 684 // Candidates blocked due to safety. 685 SAFETY = 1; 686 687 // Candidates blocked due to other reason. 688 OTHER = 2; 689 690 // Candidates blocked due to the terms which are included from the 691 // terminology blocklist. 692 BLOCKLIST = 3; 693 694 // Candidates blocked due to prohibited content. 695 PROHIBITED_CONTENT = 4; 696 } 697 698 // Output only. Blocked reason. 699 BlockedReason block_reason = 1 [(google.api.field_behavior) = OUTPUT_ONLY]; 700 701 // Output only. Safety ratings. 702 repeated SafetyRating safety_ratings = 2 703 [(google.api.field_behavior) = OUTPUT_ONLY]; 704 705 // Output only. A readable block reason message. 706 string block_reason_message = 3 [(google.api.field_behavior) = OUTPUT_ONLY]; 707 } 708 709 // Usage metadata about response(s). 710 message UsageMetadata { 711 // Number of tokens in the request. 712 int32 prompt_token_count = 1; 713 714 // Number of tokens in the response(s). 715 int32 candidates_token_count = 2; 716 717 int32 total_token_count = 3; 718 } 719 720 // Output only. Generated candidates. 721 repeated Candidate candidates = 2 [(google.api.field_behavior) = OUTPUT_ONLY]; 722 723 // Output only. Content filter results for a prompt sent in the request. 724 // Note: Sent only in the first stream chunk. 725 // Only happens when no candidates were generated due to content violations. 726 PromptFeedback prompt_feedback = 3 727 [(google.api.field_behavior) = OUTPUT_ONLY]; 728 729 // Usage metadata about the response(s). 730 UsageMetadata usage_metadata = 4; 731} 732