xref: /aosp_15_r20/external/googleapis/google/cloud/aiplatform/v1/machine_resources.proto (revision d5c09012810ac0c9f33fe448fb6da8260d444cc9)
1// Copyright 2023 Google LLC
2//
3// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at
6//
7//     http://www.apache.org/licenses/LICENSE-2.0
8//
9// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS,
11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12// See the License for the specific language governing permissions and
13// limitations under the License.
14
15syntax = "proto3";
16
17package google.cloud.aiplatform.v1;
18
19import "google/api/field_behavior.proto";
20import "google/cloud/aiplatform/v1/accelerator_type.proto";
21
22option csharp_namespace = "Google.Cloud.AIPlatform.V1";
23option go_package = "cloud.google.com/go/aiplatform/apiv1/aiplatformpb;aiplatformpb";
24option java_multiple_files = true;
25option java_outer_classname = "MachineResourcesProto";
26option java_package = "com.google.cloud.aiplatform.v1";
27option php_namespace = "Google\\Cloud\\AIPlatform\\V1";
28option ruby_package = "Google::Cloud::AIPlatform::V1";
29
30// Specification of a single machine.
31message MachineSpec {
32  // Immutable. The type of the machine.
33  //
34  // See the [list of machine types supported for
35  // prediction](https://cloud.google.com/vertex-ai/docs/predictions/configure-compute#machine-types)
36  //
37  // See the [list of machine types supported for custom
38  // training](https://cloud.google.com/vertex-ai/docs/training/configure-compute#machine-types).
39  //
40  // For [DeployedModel][google.cloud.aiplatform.v1.DeployedModel] this field is
41  // optional, and the default value is `n1-standard-2`. For
42  // [BatchPredictionJob][google.cloud.aiplatform.v1.BatchPredictionJob] or as
43  // part of [WorkerPoolSpec][google.cloud.aiplatform.v1.WorkerPoolSpec] this
44  // field is required.
45  string machine_type = 1 [(google.api.field_behavior) = IMMUTABLE];
46
47  // Immutable. The type of accelerator(s) that may be attached to the machine
48  // as per
49  // [accelerator_count][google.cloud.aiplatform.v1.MachineSpec.accelerator_count].
50  AcceleratorType accelerator_type = 2
51      [(google.api.field_behavior) = IMMUTABLE];
52
53  // The number of accelerators to attach to the machine.
54  int32 accelerator_count = 3;
55
56  // Immutable. The topology of the TPUs. Corresponds to the TPU topologies
57  // available from GKE. (Example: tpu_topology: "2x2x1").
58  string tpu_topology = 4 [(google.api.field_behavior) = IMMUTABLE];
59}
60
61// A description of resources that are dedicated to a DeployedModel, and
62// that need a higher degree of manual configuration.
63message DedicatedResources {
64  // Required. Immutable. The specification of a single machine used by the
65  // prediction.
66  MachineSpec machine_spec = 1 [
67    (google.api.field_behavior) = REQUIRED,
68    (google.api.field_behavior) = IMMUTABLE
69  ];
70
71  // Required. Immutable. The minimum number of machine replicas this
72  // DeployedModel will be always deployed on. This value must be greater than
73  // or equal to 1.
74  //
75  // If traffic against the DeployedModel increases, it may dynamically be
76  // deployed onto more replicas, and as traffic decreases, some of these extra
77  // replicas may be freed.
78  int32 min_replica_count = 2 [
79    (google.api.field_behavior) = REQUIRED,
80    (google.api.field_behavior) = IMMUTABLE
81  ];
82
83  // Immutable. The maximum number of replicas this DeployedModel may be
84  // deployed on when the traffic against it increases. If the requested value
85  // is too large, the deployment will error, but if deployment succeeds then
86  // the ability to scale the model to that many replicas is guaranteed (barring
87  // service outages). If traffic against the DeployedModel increases beyond
88  // what its replicas at maximum may handle, a portion of the traffic will be
89  // dropped. If this value is not provided, will use
90  // [min_replica_count][google.cloud.aiplatform.v1.DedicatedResources.min_replica_count]
91  // as the default value.
92  //
93  // The value of this field impacts the charge against Vertex CPU and GPU
94  // quotas. Specifically, you will be charged for (max_replica_count *
95  // number of cores in the selected machine type) and (max_replica_count *
96  // number of GPUs per replica in the selected machine type).
97  int32 max_replica_count = 3 [(google.api.field_behavior) = IMMUTABLE];
98
99  // Immutable. The metric specifications that overrides a resource
100  // utilization metric (CPU utilization, accelerator's duty cycle, and so on)
101  // target value (default to 60 if not set). At most one entry is allowed per
102  // metric.
103  //
104  // If
105  // [machine_spec.accelerator_count][google.cloud.aiplatform.v1.MachineSpec.accelerator_count]
106  // is above 0, the autoscaling will be based on both CPU utilization and
107  // accelerator's duty cycle metrics and scale up when either metrics exceeds
108  // its target value while scale down if both metrics are under their target
109  // value. The default target value is 60 for both metrics.
110  //
111  // If
112  // [machine_spec.accelerator_count][google.cloud.aiplatform.v1.MachineSpec.accelerator_count]
113  // is 0, the autoscaling will be based on CPU utilization metric only with
114  // default target value 60 if not explicitly set.
115  //
116  // For example, in the case of Online Prediction, if you want to override
117  // target CPU utilization to 80, you should set
118  // [autoscaling_metric_specs.metric_name][google.cloud.aiplatform.v1.AutoscalingMetricSpec.metric_name]
119  // to `aiplatform.googleapis.com/prediction/online/cpu/utilization` and
120  // [autoscaling_metric_specs.target][google.cloud.aiplatform.v1.AutoscalingMetricSpec.target]
121  // to `80`.
122  repeated AutoscalingMetricSpec autoscaling_metric_specs = 4
123      [(google.api.field_behavior) = IMMUTABLE];
124}
125
126// A description of resources that to large degree are decided by Vertex AI,
127// and require only a modest additional configuration.
128// Each Model supporting these resources documents its specific guidelines.
129message AutomaticResources {
130  // Immutable. The minimum number of replicas this DeployedModel will be always
131  // deployed on. If traffic against it increases, it may dynamically be
132  // deployed onto more replicas up to
133  // [max_replica_count][google.cloud.aiplatform.v1.AutomaticResources.max_replica_count],
134  // and as traffic decreases, some of these extra replicas may be freed. If the
135  // requested value is too large, the deployment will error.
136  int32 min_replica_count = 1 [(google.api.field_behavior) = IMMUTABLE];
137
138  // Immutable. The maximum number of replicas this DeployedModel may be
139  // deployed on when the traffic against it increases. If the requested value
140  // is too large, the deployment will error, but if deployment succeeds then
141  // the ability to scale the model to that many replicas is guaranteed (barring
142  // service outages). If traffic against the DeployedModel increases beyond
143  // what its replicas at maximum may handle, a portion of the traffic will be
144  // dropped. If this value is not provided, a no upper bound for scaling under
145  // heavy traffic will be assume, though Vertex AI may be unable to scale
146  // beyond certain replica number.
147  int32 max_replica_count = 2 [(google.api.field_behavior) = IMMUTABLE];
148}
149
150// A description of resources that are used for performing batch operations, are
151// dedicated to a Model, and need manual configuration.
152message BatchDedicatedResources {
153  // Required. Immutable. The specification of a single machine.
154  MachineSpec machine_spec = 1 [
155    (google.api.field_behavior) = REQUIRED,
156    (google.api.field_behavior) = IMMUTABLE
157  ];
158
159  // Immutable. The number of machine replicas used at the start of the batch
160  // operation. If not set, Vertex AI decides starting number, not greater than
161  // [max_replica_count][google.cloud.aiplatform.v1.BatchDedicatedResources.max_replica_count]
162  int32 starting_replica_count = 2 [(google.api.field_behavior) = IMMUTABLE];
163
164  // Immutable. The maximum number of machine replicas the batch operation may
165  // be scaled to. The default value is 10.
166  int32 max_replica_count = 3 [(google.api.field_behavior) = IMMUTABLE];
167}
168
169// Statistics information about resource consumption.
170message ResourcesConsumed {
171  // Output only. The number of replica hours used. Note that many replicas may
172  // run in parallel, and additionally any given work may be queued for some
173  // time. Therefore this value is not strictly related to wall time.
174  double replica_hours = 1 [(google.api.field_behavior) = OUTPUT_ONLY];
175}
176
177// Represents the spec of disk options.
178message DiskSpec {
179  // Type of the boot disk (default is "pd-ssd").
180  // Valid values: "pd-ssd" (Persistent Disk Solid State Drive) or
181  // "pd-standard" (Persistent Disk Hard Disk Drive).
182  string boot_disk_type = 1;
183
184  // Size in GB of the boot disk (default is 100GB).
185  int32 boot_disk_size_gb = 2;
186}
187
188// Represents the spec of [persistent
189// disk][https://cloud.google.com/compute/docs/disks/persistent-disks] options.
190message PersistentDiskSpec {
191  // Type of the disk (default is "pd-standard").
192  // Valid values: "pd-ssd" (Persistent Disk Solid State Drive)
193  // "pd-standard" (Persistent Disk Hard Disk Drive)
194  // "pd-balanced" (Balanced Persistent Disk)
195  // "pd-extreme" (Extreme Persistent Disk)
196  string disk_type = 1;
197
198  // Size in GB of the disk (default is 100GB).
199  int64 disk_size_gb = 2;
200}
201
202// Represents a mount configuration for Network File System (NFS) to mount.
203message NfsMount {
204  // Required. IP address of the NFS server.
205  string server = 1 [(google.api.field_behavior) = REQUIRED];
206
207  // Required. Source path exported from NFS server.
208  // Has to start with '/', and combined with the ip address, it indicates
209  // the source mount path in the form of `server:path`
210  string path = 2 [(google.api.field_behavior) = REQUIRED];
211
212  // Required. Destination mount path. The NFS will be mounted for the user
213  // under /mnt/nfs/<mount_point>
214  string mount_point = 3 [(google.api.field_behavior) = REQUIRED];
215}
216
217// The metric specification that defines the target resource utilization
218// (CPU utilization, accelerator's duty cycle, and so on) for calculating the
219// desired replica count.
220message AutoscalingMetricSpec {
221  // Required. The resource metric name.
222  // Supported metrics:
223  //
224  // * For Online Prediction:
225  // * `aiplatform.googleapis.com/prediction/online/accelerator/duty_cycle`
226  // * `aiplatform.googleapis.com/prediction/online/cpu/utilization`
227  string metric_name = 1 [(google.api.field_behavior) = REQUIRED];
228
229  // The target resource utilization in percentage (1% - 100%) for the given
230  // metric; once the real usage deviates from the target by a certain
231  // percentage, the machine replicas change. The default value is 60
232  // (representing 60%) if not provided.
233  int32 target = 2;
234}
235
236// A set of Shielded Instance options.
237// See [Images using supported Shielded VM
238// features](https://cloud.google.com/compute/docs/instances/modifying-shielded-vm).
239message ShieldedVmConfig {
240  // Defines whether the instance has [Secure
241  // Boot](https://cloud.google.com/compute/shielded-vm/docs/shielded-vm#secure-boot)
242  // enabled.
243  //
244  // Secure Boot helps ensure that the system only runs authentic software by
245  // verifying the digital signature of all boot components, and halting the
246  // boot process if signature verification fails.
247  bool enable_secure_boot = 1;
248}
249