1// Copyright 2023 Google LLC 2// 3// Licensed under the Apache License, Version 2.0 (the "License"); 4// you may not use this file except in compliance with the License. 5// You may obtain a copy of the License at 6// 7// http://www.apache.org/licenses/LICENSE-2.0 8// 9// Unless required by applicable law or agreed to in writing, software 10// distributed under the License is distributed on an "AS IS" BASIS, 11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12// See the License for the specific language governing permissions and 13// limitations under the License. 14 15syntax = "proto3"; 16 17package google.cloud.aiplatform.v1beta1; 18 19import "google/api/annotations.proto"; 20import "google/api/client.proto"; 21import "google/api/field_behavior.proto"; 22import "google/api/httpbody.proto"; 23import "google/api/resource.proto"; 24import "google/cloud/aiplatform/v1beta1/content.proto"; 25import "google/cloud/aiplatform/v1beta1/explanation.proto"; 26import "google/cloud/aiplatform/v1beta1/tool.proto"; 27import "google/cloud/aiplatform/v1beta1/types.proto"; 28import "google/protobuf/struct.proto"; 29 30option csharp_namespace = "Google.Cloud.AIPlatform.V1Beta1"; 31option go_package = "cloud.google.com/go/aiplatform/apiv1beta1/aiplatformpb;aiplatformpb"; 32option java_multiple_files = true; 33option java_outer_classname = "PredictionServiceProto"; 34option java_package = "com.google.cloud.aiplatform.v1beta1"; 35option php_namespace = "Google\\Cloud\\AIPlatform\\V1beta1"; 36option ruby_package = "Google::Cloud::AIPlatform::V1beta1"; 37 38// A service for online predictions and explanations. 39service PredictionService { 40 option (google.api.default_host) = "aiplatform.googleapis.com"; 41 option (google.api.oauth_scopes) = 42 "https://www.googleapis.com/auth/cloud-platform"; 43 44 // Perform an online prediction. 45 rpc Predict(PredictRequest) returns (PredictResponse) { 46 option (google.api.http) = { 47 post: "/v1beta1/{endpoint=projects/*/locations/*/endpoints/*}:predict" 48 body: "*" 49 additional_bindings { 50 post: "/v1beta1/{endpoint=projects/*/locations/*/publishers/*/models/*}:predict" 51 body: "*" 52 } 53 }; 54 option (google.api.method_signature) = "endpoint,instances,parameters"; 55 } 56 57 // Perform an online prediction with an arbitrary HTTP payload. 58 // 59 // The response includes the following HTTP headers: 60 // 61 // * `X-Vertex-AI-Endpoint-Id`: ID of the 62 // [Endpoint][google.cloud.aiplatform.v1beta1.Endpoint] that served this 63 // prediction. 64 // 65 // * `X-Vertex-AI-Deployed-Model-Id`: ID of the Endpoint's 66 // [DeployedModel][google.cloud.aiplatform.v1beta1.DeployedModel] that served 67 // this prediction. 68 rpc RawPredict(RawPredictRequest) returns (google.api.HttpBody) { 69 option (google.api.http) = { 70 post: "/v1beta1/{endpoint=projects/*/locations/*/endpoints/*}:rawPredict" 71 body: "*" 72 additional_bindings { 73 post: "/v1beta1/{endpoint=projects/*/locations/*/publishers/*/models/*}:rawPredict" 74 body: "*" 75 } 76 }; 77 option (google.api.method_signature) = "endpoint,http_body"; 78 } 79 80 // Perform an unary online prediction request to a gRPC model server for 81 // Vertex first-party products and frameworks. 82 rpc DirectPredict(DirectPredictRequest) returns (DirectPredictResponse) { 83 option (google.api.http) = { 84 post: "/v1beta1/{endpoint=projects/*/locations/*/endpoints/*}:directPredict" 85 body: "*" 86 }; 87 } 88 89 // Perform an unary online prediction request to a gRPC model server for 90 // custom containers. 91 rpc DirectRawPredict(DirectRawPredictRequest) 92 returns (DirectRawPredictResponse) { 93 option (google.api.http) = { 94 post: "/v1beta1/{endpoint=projects/*/locations/*/endpoints/*}:directRawPredict" 95 body: "*" 96 }; 97 } 98 99 // Perform a streaming online prediction request to a gRPC model server for 100 // Vertex first-party products and frameworks. 101 rpc StreamDirectPredict(stream StreamDirectPredictRequest) 102 returns (stream StreamDirectPredictResponse) {} 103 104 // Perform a streaming online prediction request to a gRPC model server for 105 // custom containers. 106 rpc StreamDirectRawPredict(stream StreamDirectRawPredictRequest) 107 returns (stream StreamDirectRawPredictResponse) {} 108 109 // Perform a streaming online prediction request for Vertex first-party 110 // products and frameworks. 111 rpc StreamingPredict(stream StreamingPredictRequest) 112 returns (stream StreamingPredictResponse) {} 113 114 // Perform a server-side streaming online prediction request for Vertex 115 // LLM streaming. 116 rpc ServerStreamingPredict(StreamingPredictRequest) 117 returns (stream StreamingPredictResponse) { 118 option (google.api.http) = { 119 post: "/v1beta1/{endpoint=projects/*/locations/*/endpoints/*}:serverStreamingPredict" 120 body: "*" 121 additional_bindings { 122 post: "/v1beta1/{endpoint=projects/*/locations/*/publishers/*/models/*}:serverStreamingPredict" 123 body: "*" 124 } 125 }; 126 } 127 128 // Perform a streaming online prediction request through gRPC. 129 rpc StreamingRawPredict(stream StreamingRawPredictRequest) 130 returns (stream StreamingRawPredictResponse) {} 131 132 // Perform an online explanation. 133 // 134 // If 135 // [deployed_model_id][google.cloud.aiplatform.v1beta1.ExplainRequest.deployed_model_id] 136 // is specified, the corresponding DeployModel must have 137 // [explanation_spec][google.cloud.aiplatform.v1beta1.DeployedModel.explanation_spec] 138 // populated. If 139 // [deployed_model_id][google.cloud.aiplatform.v1beta1.ExplainRequest.deployed_model_id] 140 // is not specified, all DeployedModels must have 141 // [explanation_spec][google.cloud.aiplatform.v1beta1.DeployedModel.explanation_spec] 142 // populated. 143 rpc Explain(ExplainRequest) returns (ExplainResponse) { 144 option (google.api.http) = { 145 post: "/v1beta1/{endpoint=projects/*/locations/*/endpoints/*}:explain" 146 body: "*" 147 }; 148 option (google.api.method_signature) = 149 "endpoint,instances,parameters,deployed_model_id"; 150 } 151 152 // Perform a token counting. 153 rpc CountTokens(CountTokensRequest) returns (CountTokensResponse) { 154 option (google.api.http) = { 155 post: "/v1beta1/{endpoint=projects/*/locations/*/endpoints/*}:countTokens" 156 body: "*" 157 additional_bindings { 158 post: "/v1beta1/{endpoint=projects/*/locations/*/publishers/*/models/*}:countTokens" 159 body: "*" 160 } 161 }; 162 option (google.api.method_signature) = "endpoint,instances"; 163 } 164 165 // Generate content with multimodal inputs. 166 rpc GenerateContent(GenerateContentRequest) 167 returns (GenerateContentResponse) { 168 option (google.api.http) = { 169 post: "/v1beta1/{model=projects/*/locations/*/endpoints/*}:generateContent" 170 body: "*" 171 additional_bindings { 172 post: "/v1beta1/{model=projects/*/locations/*/publishers/*/models/*}:generateContent" 173 body: "*" 174 } 175 }; 176 option (google.api.method_signature) = "model,contents"; 177 } 178 179 // Generate content with multimodal inputs with streaming support. 180 rpc StreamGenerateContent(GenerateContentRequest) 181 returns (stream GenerateContentResponse) { 182 option (google.api.http) = { 183 post: "/v1beta1/{model=projects/*/locations/*/endpoints/*}:streamGenerateContent" 184 body: "*" 185 additional_bindings { 186 post: "/v1beta1/{model=projects/*/locations/*/publishers/*/models/*}:streamGenerateContent" 187 body: "*" 188 } 189 }; 190 option (google.api.method_signature) = "model,contents"; 191 } 192 193 // Exposes an OpenAI-compatible endpoint for chat completions. 194 rpc ChatCompletions(ChatCompletionsRequest) 195 returns (stream google.api.HttpBody) { 196 option (google.api.http) = { 197 post: "/v1beta1/{endpoint=projects/*/locations/*/endpoints/*}/chat/completions" 198 body: "http_body" 199 }; 200 option (google.api.method_signature) = "endpoint,http_body"; 201 } 202} 203 204// Request message for 205// [PredictionService.Predict][google.cloud.aiplatform.v1beta1.PredictionService.Predict]. 206message PredictRequest { 207 // Required. The name of the Endpoint requested to serve the prediction. 208 // Format: 209 // `projects/{project}/locations/{location}/endpoints/{endpoint}` 210 string endpoint = 1 [ 211 (google.api.field_behavior) = REQUIRED, 212 (google.api.resource_reference) = { 213 type: "aiplatform.googleapis.com/Endpoint" 214 } 215 ]; 216 217 // Required. The instances that are the input to the prediction call. 218 // A DeployedModel may have an upper limit on the number of instances it 219 // supports per request, and when it is exceeded the prediction call errors 220 // in case of AutoML Models, or, in case of customer created Models, the 221 // behaviour is as documented by that Model. 222 // The schema of any single instance may be specified via Endpoint's 223 // DeployedModels' 224 // [Model's][google.cloud.aiplatform.v1beta1.DeployedModel.model] 225 // [PredictSchemata's][google.cloud.aiplatform.v1beta1.Model.predict_schemata] 226 // [instance_schema_uri][google.cloud.aiplatform.v1beta1.PredictSchemata.instance_schema_uri]. 227 repeated google.protobuf.Value instances = 2 228 [(google.api.field_behavior) = REQUIRED]; 229 230 // The parameters that govern the prediction. The schema of the parameters may 231 // be specified via Endpoint's DeployedModels' [Model's 232 // ][google.cloud.aiplatform.v1beta1.DeployedModel.model] 233 // [PredictSchemata's][google.cloud.aiplatform.v1beta1.Model.predict_schemata] 234 // [parameters_schema_uri][google.cloud.aiplatform.v1beta1.PredictSchemata.parameters_schema_uri]. 235 google.protobuf.Value parameters = 3; 236} 237 238// Response message for 239// [PredictionService.Predict][google.cloud.aiplatform.v1beta1.PredictionService.Predict]. 240message PredictResponse { 241 // The predictions that are the output of the predictions call. 242 // The schema of any single prediction may be specified via Endpoint's 243 // DeployedModels' [Model's 244 // ][google.cloud.aiplatform.v1beta1.DeployedModel.model] 245 // [PredictSchemata's][google.cloud.aiplatform.v1beta1.Model.predict_schemata] 246 // [prediction_schema_uri][google.cloud.aiplatform.v1beta1.PredictSchemata.prediction_schema_uri]. 247 repeated google.protobuf.Value predictions = 1; 248 249 // ID of the Endpoint's DeployedModel that served this prediction. 250 string deployed_model_id = 2; 251 252 // Output only. The resource name of the Model which is deployed as the 253 // DeployedModel that this prediction hits. 254 string model = 3 [ 255 (google.api.field_behavior) = OUTPUT_ONLY, 256 (google.api.resource_reference) = { 257 type: "aiplatform.googleapis.com/Model" 258 } 259 ]; 260 261 // Output only. The version ID of the Model which is deployed as the 262 // DeployedModel that this prediction hits. 263 string model_version_id = 5 [(google.api.field_behavior) = OUTPUT_ONLY]; 264 265 // Output only. The [display 266 // name][google.cloud.aiplatform.v1beta1.Model.display_name] of the Model 267 // which is deployed as the DeployedModel that this prediction hits. 268 string model_display_name = 4 [(google.api.field_behavior) = OUTPUT_ONLY]; 269 270 // Output only. Request-level metadata returned by the model. The metadata 271 // type will be dependent upon the model implementation. 272 google.protobuf.Value metadata = 6 273 [(google.api.field_behavior) = OUTPUT_ONLY]; 274} 275 276// Request message for 277// [PredictionService.RawPredict][google.cloud.aiplatform.v1beta1.PredictionService.RawPredict]. 278message RawPredictRequest { 279 // Required. The name of the Endpoint requested to serve the prediction. 280 // Format: 281 // `projects/{project}/locations/{location}/endpoints/{endpoint}` 282 string endpoint = 1 [ 283 (google.api.field_behavior) = REQUIRED, 284 (google.api.resource_reference) = { 285 type: "aiplatform.googleapis.com/Endpoint" 286 } 287 ]; 288 289 // The prediction input. Supports HTTP headers and arbitrary data payload. 290 // 291 // A [DeployedModel][google.cloud.aiplatform.v1beta1.DeployedModel] may have 292 // an upper limit on the number of instances it supports per request. When 293 // this limit it is exceeded for an AutoML model, the 294 // [RawPredict][google.cloud.aiplatform.v1beta1.PredictionService.RawPredict] 295 // method returns an error. When this limit is exceeded for a custom-trained 296 // model, the behavior varies depending on the model. 297 // 298 // You can specify the schema for each instance in the 299 // [predict_schemata.instance_schema_uri][google.cloud.aiplatform.v1beta1.PredictSchemata.instance_schema_uri] 300 // field when you create a [Model][google.cloud.aiplatform.v1beta1.Model]. 301 // This schema applies when you deploy the `Model` as a `DeployedModel` to an 302 // [Endpoint][google.cloud.aiplatform.v1beta1.Endpoint] and use the 303 // `RawPredict` method. 304 google.api.HttpBody http_body = 2; 305} 306 307// Request message for 308// [PredictionService.DirectPredict][google.cloud.aiplatform.v1beta1.PredictionService.DirectPredict]. 309message DirectPredictRequest { 310 // Required. The name of the Endpoint requested to serve the prediction. 311 // Format: 312 // `projects/{project}/locations/{location}/endpoints/{endpoint}` 313 string endpoint = 1 [ 314 (google.api.field_behavior) = REQUIRED, 315 (google.api.resource_reference) = { 316 type: "aiplatform.googleapis.com/Endpoint" 317 } 318 ]; 319 320 // The prediction input. 321 repeated Tensor inputs = 2; 322 323 // The parameters that govern the prediction. 324 Tensor parameters = 3; 325} 326 327// Response message for 328// [PredictionService.DirectPredict][google.cloud.aiplatform.v1beta1.PredictionService.DirectPredict]. 329message DirectPredictResponse { 330 // The prediction output. 331 repeated Tensor outputs = 1; 332 333 // The parameters that govern the prediction. 334 Tensor parameters = 2; 335} 336 337// Request message for 338// [PredictionService.DirectRawPredict][google.cloud.aiplatform.v1beta1.PredictionService.DirectRawPredict]. 339message DirectRawPredictRequest { 340 // Required. The name of the Endpoint requested to serve the prediction. 341 // Format: 342 // `projects/{project}/locations/{location}/endpoints/{endpoint}` 343 string endpoint = 1 [ 344 (google.api.field_behavior) = REQUIRED, 345 (google.api.resource_reference) = { 346 type: "aiplatform.googleapis.com/Endpoint" 347 } 348 ]; 349 350 // Fully qualified name of the API method being invoked to perform 351 // predictions. 352 // 353 // Format: 354 // `/namespace.Service/Method/` 355 // Example: 356 // `/tensorflow.serving.PredictionService/Predict` 357 string method_name = 2; 358 359 // The prediction input. 360 bytes input = 3; 361} 362 363// Response message for 364// [PredictionService.DirectRawPredict][google.cloud.aiplatform.v1beta1.PredictionService.DirectRawPredict]. 365message DirectRawPredictResponse { 366 // The prediction output. 367 bytes output = 1; 368} 369 370// Request message for 371// [PredictionService.StreamDirectPredict][google.cloud.aiplatform.v1beta1.PredictionService.StreamDirectPredict]. 372// 373// The first message must contain 374// [endpoint][google.cloud.aiplatform.v1beta1.StreamDirectPredictRequest.endpoint] 375// field and optionally [input][]. The subsequent messages must contain 376// [input][]. 377message StreamDirectPredictRequest { 378 // Required. The name of the Endpoint requested to serve the prediction. 379 // Format: 380 // `projects/{project}/locations/{location}/endpoints/{endpoint}` 381 string endpoint = 1 [ 382 (google.api.field_behavior) = REQUIRED, 383 (google.api.resource_reference) = { 384 type: "aiplatform.googleapis.com/Endpoint" 385 } 386 ]; 387 388 // Optional. The prediction input. 389 repeated Tensor inputs = 2 [(google.api.field_behavior) = OPTIONAL]; 390 391 // Optional. The parameters that govern the prediction. 392 Tensor parameters = 3 [(google.api.field_behavior) = OPTIONAL]; 393} 394 395// Response message for 396// [PredictionService.StreamDirectPredict][google.cloud.aiplatform.v1beta1.PredictionService.StreamDirectPredict]. 397message StreamDirectPredictResponse { 398 // The prediction output. 399 repeated Tensor outputs = 1; 400 401 // The parameters that govern the prediction. 402 Tensor parameters = 2; 403} 404 405// Request message for 406// [PredictionService.StreamDirectRawPredict][google.cloud.aiplatform.v1beta1.PredictionService.StreamDirectRawPredict]. 407// 408// The first message must contain 409// [endpoint][google.cloud.aiplatform.v1beta1.StreamDirectRawPredictRequest.endpoint] 410// and 411// [method_name][google.cloud.aiplatform.v1beta1.StreamDirectRawPredictRequest.method_name] 412// fields and optionally 413// [input][google.cloud.aiplatform.v1beta1.StreamDirectRawPredictRequest.input]. 414// The subsequent messages must contain 415// [input][google.cloud.aiplatform.v1beta1.StreamDirectRawPredictRequest.input]. 416// [method_name][google.cloud.aiplatform.v1beta1.StreamDirectRawPredictRequest.method_name] 417// in the subsequent messages have no effect. 418message StreamDirectRawPredictRequest { 419 // Required. The name of the Endpoint requested to serve the prediction. 420 // Format: 421 // `projects/{project}/locations/{location}/endpoints/{endpoint}` 422 string endpoint = 1 [ 423 (google.api.field_behavior) = REQUIRED, 424 (google.api.resource_reference) = { 425 type: "aiplatform.googleapis.com/Endpoint" 426 } 427 ]; 428 429 // Optional. Fully qualified name of the API method being invoked to perform 430 // predictions. 431 // 432 // Format: 433 // `/namespace.Service/Method/` 434 // Example: 435 // `/tensorflow.serving.PredictionService/Predict` 436 string method_name = 2 [(google.api.field_behavior) = OPTIONAL]; 437 438 // Optional. The prediction input. 439 bytes input = 3 [(google.api.field_behavior) = OPTIONAL]; 440} 441 442// Response message for 443// [PredictionService.StreamDirectRawPredict][google.cloud.aiplatform.v1beta1.PredictionService.StreamDirectRawPredict]. 444message StreamDirectRawPredictResponse { 445 // The prediction output. 446 bytes output = 1; 447} 448 449// Request message for 450// [PredictionService.StreamingPredict][google.cloud.aiplatform.v1beta1.PredictionService.StreamingPredict]. 451// 452// The first message must contain 453// [endpoint][google.cloud.aiplatform.v1beta1.StreamingPredictRequest.endpoint] 454// field and optionally [input][]. The subsequent messages must contain 455// [input][]. 456message StreamingPredictRequest { 457 // Required. The name of the Endpoint requested to serve the prediction. 458 // Format: 459 // `projects/{project}/locations/{location}/endpoints/{endpoint}` 460 string endpoint = 1 [ 461 (google.api.field_behavior) = REQUIRED, 462 (google.api.resource_reference) = { 463 type: "aiplatform.googleapis.com/Endpoint" 464 } 465 ]; 466 467 // The prediction input. 468 repeated Tensor inputs = 2; 469 470 // The parameters that govern the prediction. 471 Tensor parameters = 3; 472} 473 474// Response message for 475// [PredictionService.StreamingPredict][google.cloud.aiplatform.v1beta1.PredictionService.StreamingPredict]. 476message StreamingPredictResponse { 477 // The prediction output. 478 repeated Tensor outputs = 1; 479 480 // The parameters that govern the prediction. 481 Tensor parameters = 2; 482} 483 484// Request message for 485// [PredictionService.StreamingRawPredict][google.cloud.aiplatform.v1beta1.PredictionService.StreamingRawPredict]. 486// 487// The first message must contain 488// [endpoint][google.cloud.aiplatform.v1beta1.StreamingRawPredictRequest.endpoint] 489// and 490// [method_name][google.cloud.aiplatform.v1beta1.StreamingRawPredictRequest.method_name] 491// fields and optionally 492// [input][google.cloud.aiplatform.v1beta1.StreamingRawPredictRequest.input]. 493// The subsequent messages must contain 494// [input][google.cloud.aiplatform.v1beta1.StreamingRawPredictRequest.input]. 495// [method_name][google.cloud.aiplatform.v1beta1.StreamingRawPredictRequest.method_name] 496// in the subsequent messages have no effect. 497message StreamingRawPredictRequest { 498 // Required. The name of the Endpoint requested to serve the prediction. 499 // Format: 500 // `projects/{project}/locations/{location}/endpoints/{endpoint}` 501 string endpoint = 1 [ 502 (google.api.field_behavior) = REQUIRED, 503 (google.api.resource_reference) = { 504 type: "aiplatform.googleapis.com/Endpoint" 505 } 506 ]; 507 508 // Fully qualified name of the API method being invoked to perform 509 // predictions. 510 // 511 // Format: 512 // `/namespace.Service/Method/` 513 // Example: 514 // `/tensorflow.serving.PredictionService/Predict` 515 string method_name = 2; 516 517 // The prediction input. 518 bytes input = 3; 519} 520 521// Response message for 522// [PredictionService.StreamingRawPredict][google.cloud.aiplatform.v1beta1.PredictionService.StreamingRawPredict]. 523message StreamingRawPredictResponse { 524 // The prediction output. 525 bytes output = 1; 526} 527 528// Request message for 529// [PredictionService.Explain][google.cloud.aiplatform.v1beta1.PredictionService.Explain]. 530message ExplainRequest { 531 // Required. The name of the Endpoint requested to serve the explanation. 532 // Format: 533 // `projects/{project}/locations/{location}/endpoints/{endpoint}` 534 string endpoint = 1 [ 535 (google.api.field_behavior) = REQUIRED, 536 (google.api.resource_reference) = { 537 type: "aiplatform.googleapis.com/Endpoint" 538 } 539 ]; 540 541 // Required. The instances that are the input to the explanation call. 542 // A DeployedModel may have an upper limit on the number of instances it 543 // supports per request, and when it is exceeded the explanation call errors 544 // in case of AutoML Models, or, in case of customer created Models, the 545 // behaviour is as documented by that Model. 546 // The schema of any single instance may be specified via Endpoint's 547 // DeployedModels' 548 // [Model's][google.cloud.aiplatform.v1beta1.DeployedModel.model] 549 // [PredictSchemata's][google.cloud.aiplatform.v1beta1.Model.predict_schemata] 550 // [instance_schema_uri][google.cloud.aiplatform.v1beta1.PredictSchemata.instance_schema_uri]. 551 repeated google.protobuf.Value instances = 2 552 [(google.api.field_behavior) = REQUIRED]; 553 554 // The parameters that govern the prediction. The schema of the parameters may 555 // be specified via Endpoint's DeployedModels' [Model's 556 // ][google.cloud.aiplatform.v1beta1.DeployedModel.model] 557 // [PredictSchemata's][google.cloud.aiplatform.v1beta1.Model.predict_schemata] 558 // [parameters_schema_uri][google.cloud.aiplatform.v1beta1.PredictSchemata.parameters_schema_uri]. 559 google.protobuf.Value parameters = 4; 560 561 // If specified, overrides the 562 // [explanation_spec][google.cloud.aiplatform.v1beta1.DeployedModel.explanation_spec] 563 // of the DeployedModel. Can be used for explaining prediction results with 564 // different configurations, such as: 565 // - Explaining top-5 predictions results as opposed to top-1; 566 // - Increasing path count or step count of the attribution methods to reduce 567 // approximate errors; 568 // - Using different baselines for explaining the prediction results. 569 ExplanationSpecOverride explanation_spec_override = 5; 570 571 // Optional. This field is the same as the one above, but supports multiple 572 // explanations to occur in parallel. The key can be any string. Each override 573 // will be run against the model, then its explanations will be grouped 574 // together. 575 // 576 // Note - these explanations are run **In Addition** to the default 577 // Explanation in the deployed model. 578 map<string, ExplanationSpecOverride> concurrent_explanation_spec_override = 6 579 [(google.api.field_behavior) = OPTIONAL]; 580 581 // If specified, this ExplainRequest will be served by the chosen 582 // DeployedModel, overriding 583 // [Endpoint.traffic_split][google.cloud.aiplatform.v1beta1.Endpoint.traffic_split]. 584 string deployed_model_id = 3; 585} 586 587// Response message for 588// [PredictionService.Explain][google.cloud.aiplatform.v1beta1.PredictionService.Explain]. 589message ExplainResponse { 590 // This message is a wrapper grouping Concurrent Explanations. 591 message ConcurrentExplanation { 592 // The explanations of the Model's 593 // [PredictResponse.predictions][google.cloud.aiplatform.v1beta1.PredictResponse.predictions]. 594 // 595 // It has the same number of elements as 596 // [instances][google.cloud.aiplatform.v1beta1.ExplainRequest.instances] to 597 // be explained. 598 repeated Explanation explanations = 1; 599 } 600 601 // The explanations of the Model's 602 // [PredictResponse.predictions][google.cloud.aiplatform.v1beta1.PredictResponse.predictions]. 603 // 604 // It has the same number of elements as 605 // [instances][google.cloud.aiplatform.v1beta1.ExplainRequest.instances] to be 606 // explained. 607 repeated Explanation explanations = 1; 608 609 // This field stores the results of the explanations run in parallel with 610 // The default explanation strategy/method. 611 map<string, ConcurrentExplanation> concurrent_explanations = 4; 612 613 // ID of the Endpoint's DeployedModel that served this explanation. 614 string deployed_model_id = 2; 615 616 // The predictions that are the output of the predictions call. 617 // Same as 618 // [PredictResponse.predictions][google.cloud.aiplatform.v1beta1.PredictResponse.predictions]. 619 repeated google.protobuf.Value predictions = 3; 620} 621 622// Request message for 623// [PredictionService.CountTokens][google.cloud.aiplatform.v1beta1.PredictionService.CountTokens]. 624message CountTokensRequest { 625 // Required. The name of the Endpoint requested to perform token counting. 626 // Format: 627 // `projects/{project}/locations/{location}/endpoints/{endpoint}` 628 string endpoint = 1 [ 629 (google.api.field_behavior) = REQUIRED, 630 (google.api.resource_reference) = { 631 type: "aiplatform.googleapis.com/Endpoint" 632 } 633 ]; 634 635 // Required. The name of the publisher model requested to serve the 636 // prediction. Format: 637 // `projects/{project}/locations/{location}/publishers/*/models/*` 638 string model = 3 [(google.api.field_behavior) = REQUIRED]; 639 640 // Required. The instances that are the input to token counting call. 641 // Schema is identical to the prediction schema of the underlying model. 642 repeated google.protobuf.Value instances = 2 643 [(google.api.field_behavior) = REQUIRED]; 644 645 // Required. Input content. 646 repeated Content contents = 4 [(google.api.field_behavior) = REQUIRED]; 647} 648 649// Response message for 650// [PredictionService.CountTokens][google.cloud.aiplatform.v1beta1.PredictionService.CountTokens]. 651message CountTokensResponse { 652 // The total number of tokens counted across all instances from the request. 653 int32 total_tokens = 1; 654 655 // The total number of billable characters counted across all instances from 656 // the request. 657 int32 total_billable_characters = 2; 658} 659 660// Request message for [PredictionService.GenerateContent]. 661message GenerateContentRequest { 662 // Required. The name of the publisher model requested to serve the 663 // prediction. Format: 664 // `projects/{project}/locations/{location}/publishers/*/models/*` 665 string model = 5 [(google.api.field_behavior) = REQUIRED]; 666 667 // Required. The content of the current conversation with the model. 668 // 669 // For single-turn queries, this is a single instance. For multi-turn queries, 670 // this is a repeated field that contains conversation history + latest 671 // request. 672 repeated Content contents = 2 [(google.api.field_behavior) = REQUIRED]; 673 674 // Optional. The user provided system instructions for the model. 675 // Note: only text should be used in parts and content in each part will be in 676 // a separate paragraph. 677 optional Content system_instruction = 8 678 [(google.api.field_behavior) = OPTIONAL]; 679 680 // Optional. A list of `Tools` the model may use to generate the next 681 // response. 682 // 683 // A `Tool` is a piece of code that enables the system to interact with 684 // external systems to perform an action, or set of actions, outside of 685 // knowledge and scope of the model. 686 repeated Tool tools = 6 [(google.api.field_behavior) = OPTIONAL]; 687 688 // Optional. Tool config. This config is shared for all tools provided in the 689 // request. 690 ToolConfig tool_config = 7 [(google.api.field_behavior) = OPTIONAL]; 691 692 // Optional. Per request settings for blocking unsafe content. 693 // Enforced on GenerateContentResponse.candidates. 694 repeated SafetySetting safety_settings = 3 695 [(google.api.field_behavior) = OPTIONAL]; 696 697 // Optional. Generation config. 698 GenerationConfig generation_config = 4 699 [(google.api.field_behavior) = OPTIONAL]; 700} 701 702// Response message for [PredictionService.GenerateContent]. 703message GenerateContentResponse { 704 // Content filter results for a prompt sent in the request. 705 message PromptFeedback { 706 // Blocked reason enumeration. 707 enum BlockedReason { 708 // Unspecified blocked reason. 709 BLOCKED_REASON_UNSPECIFIED = 0; 710 711 // Candidates blocked due to safety. 712 SAFETY = 1; 713 714 // Candidates blocked due to other reason. 715 OTHER = 2; 716 717 // Candidates blocked due to the terms which are included from the 718 // terminology blocklist. 719 BLOCKLIST = 3; 720 721 // Candidates blocked due to prohibited content. 722 PROHIBITED_CONTENT = 4; 723 } 724 725 // Output only. Blocked reason. 726 BlockedReason block_reason = 1 [(google.api.field_behavior) = OUTPUT_ONLY]; 727 728 // Output only. Safety ratings. 729 repeated SafetyRating safety_ratings = 2 730 [(google.api.field_behavior) = OUTPUT_ONLY]; 731 732 // Output only. A readable block reason message. 733 string block_reason_message = 3 [(google.api.field_behavior) = OUTPUT_ONLY]; 734 } 735 736 // Usage metadata about response(s). 737 message UsageMetadata { 738 // Number of tokens in the request. 739 int32 prompt_token_count = 1; 740 741 // Number of tokens in the response(s). 742 int32 candidates_token_count = 2; 743 744 int32 total_token_count = 3; 745 } 746 747 // Output only. Generated candidates. 748 repeated Candidate candidates = 2 [(google.api.field_behavior) = OUTPUT_ONLY]; 749 750 // Output only. Content filter results for a prompt sent in the request. 751 // Note: Sent only in the first stream chunk. 752 // Only happens when no candidates were generated due to content violations. 753 PromptFeedback prompt_feedback = 3 754 [(google.api.field_behavior) = OUTPUT_ONLY]; 755 756 // Usage metadata about the response(s). 757 UsageMetadata usage_metadata = 4; 758} 759 760// Request message for [PredictionService.ChatCompletions] 761message ChatCompletionsRequest { 762 // Required. The name of the Endpoint requested to serve the prediction. 763 // Format: 764 // `projects/{project}/locations/{location}/endpoints/openapi` 765 string endpoint = 1 [ 766 (google.api.field_behavior) = REQUIRED, 767 (google.api.resource_reference) = { 768 type: "aiplatform.googleapis.com/Endpoint" 769 } 770 ]; 771 772 // Optional. The prediction input. Supports HTTP headers and arbitrary data 773 // payload. 774 google.api.HttpBody http_body = 2 [(google.api.field_behavior) = OPTIONAL]; 775} 776