1// Copyright 2023 Google LLC 2// 3// Licensed under the Apache License, Version 2.0 (the "License"); 4// you may not use this file except in compliance with the License. 5// You may obtain a copy of the License at 6// 7// http://www.apache.org/licenses/LICENSE-2.0 8// 9// Unless required by applicable law or agreed to in writing, software 10// distributed under the License is distributed on an "AS IS" BASIS, 11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12// See the License for the specific language governing permissions and 13// limitations under the License. 14 15syntax = "proto3"; 16 17package google.cloud.aiplatform.v1; 18 19import "google/api/field_behavior.proto"; 20import "google/cloud/aiplatform/v1/accelerator_type.proto"; 21 22option csharp_namespace = "Google.Cloud.AIPlatform.V1"; 23option go_package = "cloud.google.com/go/aiplatform/apiv1/aiplatformpb;aiplatformpb"; 24option java_multiple_files = true; 25option java_outer_classname = "MachineResourcesProto"; 26option java_package = "com.google.cloud.aiplatform.v1"; 27option php_namespace = "Google\\Cloud\\AIPlatform\\V1"; 28option ruby_package = "Google::Cloud::AIPlatform::V1"; 29 30// Specification of a single machine. 31message MachineSpec { 32 // Immutable. The type of the machine. 33 // 34 // See the [list of machine types supported for 35 // prediction](https://cloud.google.com/vertex-ai/docs/predictions/configure-compute#machine-types) 36 // 37 // See the [list of machine types supported for custom 38 // training](https://cloud.google.com/vertex-ai/docs/training/configure-compute#machine-types). 39 // 40 // For [DeployedModel][google.cloud.aiplatform.v1.DeployedModel] this field is 41 // optional, and the default value is `n1-standard-2`. For 42 // [BatchPredictionJob][google.cloud.aiplatform.v1.BatchPredictionJob] or as 43 // part of [WorkerPoolSpec][google.cloud.aiplatform.v1.WorkerPoolSpec] this 44 // field is required. 45 string machine_type = 1 [(google.api.field_behavior) = IMMUTABLE]; 46 47 // Immutable. The type of accelerator(s) that may be attached to the machine 48 // as per 49 // [accelerator_count][google.cloud.aiplatform.v1.MachineSpec.accelerator_count]. 50 AcceleratorType accelerator_type = 2 51 [(google.api.field_behavior) = IMMUTABLE]; 52 53 // The number of accelerators to attach to the machine. 54 int32 accelerator_count = 3; 55 56 // Immutable. The topology of the TPUs. Corresponds to the TPU topologies 57 // available from GKE. (Example: tpu_topology: "2x2x1"). 58 string tpu_topology = 4 [(google.api.field_behavior) = IMMUTABLE]; 59} 60 61// A description of resources that are dedicated to a DeployedModel, and 62// that need a higher degree of manual configuration. 63message DedicatedResources { 64 // Required. Immutable. The specification of a single machine used by the 65 // prediction. 66 MachineSpec machine_spec = 1 [ 67 (google.api.field_behavior) = REQUIRED, 68 (google.api.field_behavior) = IMMUTABLE 69 ]; 70 71 // Required. Immutable. The minimum number of machine replicas this 72 // DeployedModel will be always deployed on. This value must be greater than 73 // or equal to 1. 74 // 75 // If traffic against the DeployedModel increases, it may dynamically be 76 // deployed onto more replicas, and as traffic decreases, some of these extra 77 // replicas may be freed. 78 int32 min_replica_count = 2 [ 79 (google.api.field_behavior) = REQUIRED, 80 (google.api.field_behavior) = IMMUTABLE 81 ]; 82 83 // Immutable. The maximum number of replicas this DeployedModel may be 84 // deployed on when the traffic against it increases. If the requested value 85 // is too large, the deployment will error, but if deployment succeeds then 86 // the ability to scale the model to that many replicas is guaranteed (barring 87 // service outages). If traffic against the DeployedModel increases beyond 88 // what its replicas at maximum may handle, a portion of the traffic will be 89 // dropped. If this value is not provided, will use 90 // [min_replica_count][google.cloud.aiplatform.v1.DedicatedResources.min_replica_count] 91 // as the default value. 92 // 93 // The value of this field impacts the charge against Vertex CPU and GPU 94 // quotas. Specifically, you will be charged for (max_replica_count * 95 // number of cores in the selected machine type) and (max_replica_count * 96 // number of GPUs per replica in the selected machine type). 97 int32 max_replica_count = 3 [(google.api.field_behavior) = IMMUTABLE]; 98 99 // Immutable. The metric specifications that overrides a resource 100 // utilization metric (CPU utilization, accelerator's duty cycle, and so on) 101 // target value (default to 60 if not set). At most one entry is allowed per 102 // metric. 103 // 104 // If 105 // [machine_spec.accelerator_count][google.cloud.aiplatform.v1.MachineSpec.accelerator_count] 106 // is above 0, the autoscaling will be based on both CPU utilization and 107 // accelerator's duty cycle metrics and scale up when either metrics exceeds 108 // its target value while scale down if both metrics are under their target 109 // value. The default target value is 60 for both metrics. 110 // 111 // If 112 // [machine_spec.accelerator_count][google.cloud.aiplatform.v1.MachineSpec.accelerator_count] 113 // is 0, the autoscaling will be based on CPU utilization metric only with 114 // default target value 60 if not explicitly set. 115 // 116 // For example, in the case of Online Prediction, if you want to override 117 // target CPU utilization to 80, you should set 118 // [autoscaling_metric_specs.metric_name][google.cloud.aiplatform.v1.AutoscalingMetricSpec.metric_name] 119 // to `aiplatform.googleapis.com/prediction/online/cpu/utilization` and 120 // [autoscaling_metric_specs.target][google.cloud.aiplatform.v1.AutoscalingMetricSpec.target] 121 // to `80`. 122 repeated AutoscalingMetricSpec autoscaling_metric_specs = 4 123 [(google.api.field_behavior) = IMMUTABLE]; 124} 125 126// A description of resources that to large degree are decided by Vertex AI, 127// and require only a modest additional configuration. 128// Each Model supporting these resources documents its specific guidelines. 129message AutomaticResources { 130 // Immutable. The minimum number of replicas this DeployedModel will be always 131 // deployed on. If traffic against it increases, it may dynamically be 132 // deployed onto more replicas up to 133 // [max_replica_count][google.cloud.aiplatform.v1.AutomaticResources.max_replica_count], 134 // and as traffic decreases, some of these extra replicas may be freed. If the 135 // requested value is too large, the deployment will error. 136 int32 min_replica_count = 1 [(google.api.field_behavior) = IMMUTABLE]; 137 138 // Immutable. The maximum number of replicas this DeployedModel may be 139 // deployed on when the traffic against it increases. If the requested value 140 // is too large, the deployment will error, but if deployment succeeds then 141 // the ability to scale the model to that many replicas is guaranteed (barring 142 // service outages). If traffic against the DeployedModel increases beyond 143 // what its replicas at maximum may handle, a portion of the traffic will be 144 // dropped. If this value is not provided, a no upper bound for scaling under 145 // heavy traffic will be assume, though Vertex AI may be unable to scale 146 // beyond certain replica number. 147 int32 max_replica_count = 2 [(google.api.field_behavior) = IMMUTABLE]; 148} 149 150// A description of resources that are used for performing batch operations, are 151// dedicated to a Model, and need manual configuration. 152message BatchDedicatedResources { 153 // Required. Immutable. The specification of a single machine. 154 MachineSpec machine_spec = 1 [ 155 (google.api.field_behavior) = REQUIRED, 156 (google.api.field_behavior) = IMMUTABLE 157 ]; 158 159 // Immutable. The number of machine replicas used at the start of the batch 160 // operation. If not set, Vertex AI decides starting number, not greater than 161 // [max_replica_count][google.cloud.aiplatform.v1.BatchDedicatedResources.max_replica_count] 162 int32 starting_replica_count = 2 [(google.api.field_behavior) = IMMUTABLE]; 163 164 // Immutable. The maximum number of machine replicas the batch operation may 165 // be scaled to. The default value is 10. 166 int32 max_replica_count = 3 [(google.api.field_behavior) = IMMUTABLE]; 167} 168 169// Statistics information about resource consumption. 170message ResourcesConsumed { 171 // Output only. The number of replica hours used. Note that many replicas may 172 // run in parallel, and additionally any given work may be queued for some 173 // time. Therefore this value is not strictly related to wall time. 174 double replica_hours = 1 [(google.api.field_behavior) = OUTPUT_ONLY]; 175} 176 177// Represents the spec of disk options. 178message DiskSpec { 179 // Type of the boot disk (default is "pd-ssd"). 180 // Valid values: "pd-ssd" (Persistent Disk Solid State Drive) or 181 // "pd-standard" (Persistent Disk Hard Disk Drive). 182 string boot_disk_type = 1; 183 184 // Size in GB of the boot disk (default is 100GB). 185 int32 boot_disk_size_gb = 2; 186} 187 188// Represents the spec of [persistent 189// disk][https://cloud.google.com/compute/docs/disks/persistent-disks] options. 190message PersistentDiskSpec { 191 // Type of the disk (default is "pd-standard"). 192 // Valid values: "pd-ssd" (Persistent Disk Solid State Drive) 193 // "pd-standard" (Persistent Disk Hard Disk Drive) 194 // "pd-balanced" (Balanced Persistent Disk) 195 // "pd-extreme" (Extreme Persistent Disk) 196 string disk_type = 1; 197 198 // Size in GB of the disk (default is 100GB). 199 int64 disk_size_gb = 2; 200} 201 202// Represents a mount configuration for Network File System (NFS) to mount. 203message NfsMount { 204 // Required. IP address of the NFS server. 205 string server = 1 [(google.api.field_behavior) = REQUIRED]; 206 207 // Required. Source path exported from NFS server. 208 // Has to start with '/', and combined with the ip address, it indicates 209 // the source mount path in the form of `server:path` 210 string path = 2 [(google.api.field_behavior) = REQUIRED]; 211 212 // Required. Destination mount path. The NFS will be mounted for the user 213 // under /mnt/nfs/<mount_point> 214 string mount_point = 3 [(google.api.field_behavior) = REQUIRED]; 215} 216 217// The metric specification that defines the target resource utilization 218// (CPU utilization, accelerator's duty cycle, and so on) for calculating the 219// desired replica count. 220message AutoscalingMetricSpec { 221 // Required. The resource metric name. 222 // Supported metrics: 223 // 224 // * For Online Prediction: 225 // * `aiplatform.googleapis.com/prediction/online/accelerator/duty_cycle` 226 // * `aiplatform.googleapis.com/prediction/online/cpu/utilization` 227 string metric_name = 1 [(google.api.field_behavior) = REQUIRED]; 228 229 // The target resource utilization in percentage (1% - 100%) for the given 230 // metric; once the real usage deviates from the target by a certain 231 // percentage, the machine replicas change. The default value is 60 232 // (representing 60%) if not provided. 233 int32 target = 2; 234} 235 236// A set of Shielded Instance options. 237// See [Images using supported Shielded VM 238// features](https://cloud.google.com/compute/docs/instances/modifying-shielded-vm). 239message ShieldedVmConfig { 240 // Defines whether the instance has [Secure 241 // Boot](https://cloud.google.com/compute/shielded-vm/docs/shielded-vm#secure-boot) 242 // enabled. 243 // 244 // Secure Boot helps ensure that the system only runs authentic software by 245 // verifying the digital signature of all boot components, and halting the 246 // boot process if signature verification fails. 247 bool enable_secure_boot = 1; 248} 249