aiplatform/v1/machine_resources.proto

// Copyright 2023 Google LLC
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

syntax = "proto3";

package google.cloud.aiplatform.v1;

import "google/api/field_behavior.proto";
import "google/cloud/aiplatform/v1/accelerator_type.proto";

option csharp_namespace = "Google.Cloud.AIPlatform.V1";
option go_package = "cloud.google.com/go/aiplatform/apiv1/aiplatformpb;aiplatformpb";
option java_multiple_files = true;
option java_outer_classname = "MachineResourcesProto";
option java_package = "com.google.cloud.aiplatform.v1";
option php_namespace = "Google\\Cloud\\AIPlatform\\V1";
option ruby_package = "Google::Cloud::AIPlatform::V1";

// Specification of a single machine.
message MachineSpec {
  // Immutable. The type of the machine.
  //
  // See the [list of machine types supported for
  // prediction](https://cloud.google.com/vertex-ai/docs/predictions/configure-compute#machine-types)
  //
  // See the [list of machine types supported for custom
  // training](https://cloud.google.com/vertex-ai/docs/training/configure-compute#machine-types).
  //
  // For [DeployedModel][google.cloud.aiplatform.v1.DeployedModel] this field is
  // optional, and the default value is `n1-standard-2`. For
  // [BatchPredictionJob][google.cloud.aiplatform.v1.BatchPredictionJob] or as
  // part of [WorkerPoolSpec][google.cloud.aiplatform.v1.WorkerPoolSpec] this
  // field is required.
  string machine_type = 1 [(google.api.field_behavior) = IMMUTABLE];

  // Immutable. The type of accelerator(s) that may be attached to the machine
  // as per
  // [accelerator_count][google.cloud.aiplatform.v1.MachineSpec.accelerator_count].
  AcceleratorType accelerator_type = 2
      [(google.api.field_behavior) = IMMUTABLE];

  // The number of accelerators to attach to the machine.
  int32 accelerator_count = 3;

  // Immutable. The topology of the TPUs. Corresponds to the TPU topologies
  // available from GKE. (Example: tpu_topology: "2x2x1").
  string tpu_topology = 4 [(google.api.field_behavior) = IMMUTABLE];
}

// A description of resources that are dedicated to a DeployedModel, and
// that need a higher degree of manual configuration.
message DedicatedResources {
  // Required. Immutable. The specification of a single machine used by the
  // prediction.
  MachineSpec machine_spec = 1 [
    (google.api.field_behavior) = REQUIRED,
    (google.api.field_behavior) = IMMUTABLE
  ];

  // Required. Immutable. The minimum number of machine replicas this
  // DeployedModel will be always deployed on. This value must be greater than
  // or equal to 1.
  //
  // If traffic against the DeployedModel increases, it may dynamically be
  // deployed onto more replicas, and as traffic decreases, some of these extra
  // replicas may be freed.
  int32 min_replica_count = 2 [
    (google.api.field_behavior) = REQUIRED,
    (google.api.field_behavior) = IMMUTABLE
  ];

  // Immutable. The maximum number of replicas this DeployedModel may be
  // deployed on when the traffic against it increases. If the requested value
  // is too large, the deployment will error, but if deployment succeeds then
  // the ability to scale the model to that many replicas is guaranteed (barring
  // service outages). If traffic against the DeployedModel increases beyond
  // what its replicas at maximum may handle, a portion of the traffic will be
  // dropped. If this value is not provided, will use
  // [min_replica_count][google.cloud.aiplatform.v1.DedicatedResources.min_replica_count]
  // as the default value.
  //
  // The value of this field impacts the charge against Vertex CPU and GPU
  // quotas. Specifically, you will be charged for (max_replica_count *
  // number of cores in the selected machine type) and (max_replica_count *
  // number of GPUs per replica in the selected machine type).
  int32 max_replica_count = 3 [(google.api.field_behavior) = IMMUTABLE];

  // Immutable. The metric specifications that overrides a resource
  // utilization metric (CPU utilization, accelerator's duty cycle, and so on)
  // target value (default to 60 if not set). At most one entry is allowed per
  // metric.
  //
  // If
  // [machine_spec.accelerator_count][google.cloud.aiplatform.v1.MachineSpec.accelerator_count]
  // is above 0, the autoscaling will be based on both CPU utilization and
  // accelerator's duty cycle metrics and scale up when either metrics exceeds
  // its target value while scale down if both metrics are under their target
  // value. The default target value is 60 for both metrics.
  //
  // If
  // [machine_spec.accelerator_count][google.cloud.aiplatform.v1.MachineSpec.accelerator_count]
  // is 0, the autoscaling will be based on CPU utilization metric only with
  // default target value 60 if not explicitly set.
  //
  // For example, in the case of Online Prediction, if you want to override
  // target CPU utilization to 80, you should set
  // [autoscaling_metric_specs.metric_name][google.cloud.aiplatform.v1.AutoscalingMetricSpec.metric_name]
  // to `aiplatform.googleapis.com/prediction/online/cpu/utilization` and
  // [autoscaling_metric_specs.target][google.cloud.aiplatform.v1.AutoscalingMetricSpec.target]
  // to `80`.
  repeated AutoscalingMetricSpec autoscaling_metric_specs = 4
      [(google.api.field_behavior) = IMMUTABLE];
}

// A description of resources that to large degree are decided by Vertex AI,
// and require only a modest additional configuration.
// Each Model supporting these resources documents its specific guidelines.
message AutomaticResources {
  // Immutable. The minimum number of replicas this DeployedModel will be always
  // deployed on. If traffic against it increases, it may dynamically be
  // deployed onto more replicas up to
  // [max_replica_count][google.cloud.aiplatform.v1.AutomaticResources.max_replica_count],
  // and as traffic decreases, some of these extra replicas may be freed. If the
  // requested value is too large, the deployment will error.
  int32 min_replica_count = 1 [(google.api.field_behavior) = IMMUTABLE];

  // Immutable. The maximum number of replicas this DeployedModel may be
  // deployed on when the traffic against it increases. If the requested value
  // is too large, the deployment will error, but if deployment succeeds then
  // the ability to scale the model to that many replicas is guaranteed (barring
  // service outages). If traffic against the DeployedModel increases beyond
  // what its replicas at maximum may handle, a portion of the traffic will be
  // dropped. If this value is not provided, a no upper bound for scaling under
  // heavy traffic will be assume, though Vertex AI may be unable to scale
  // beyond certain replica number.
  int32 max_replica_count = 2 [(google.api.field_behavior) = IMMUTABLE];
}

// A description of resources that are used for performing batch operations, are
// dedicated to a Model, and need manual configuration.
message BatchDedicatedResources {
  // Required. Immutable. The specification of a single machine.
  MachineSpec machine_spec = 1 [
    (google.api.field_behavior) = REQUIRED,
    (google.api.field_behavior) = IMMUTABLE
  ];

  // Immutable. The number of machine replicas used at the start of the batch
  // operation. If not set, Vertex AI decides starting number, not greater than
  // [max_replica_count][google.cloud.aiplatform.v1.BatchDedicatedResources.max_replica_count]
  int32 starting_replica_count = 2 [(google.api.field_behavior) = IMMUTABLE];

  // Immutable. The maximum number of machine replicas the batch operation may
  // be scaled to. The default value is 10.
  int32 max_replica_count = 3 [(google.api.field_behavior) = IMMUTABLE];
}

// Statistics information about resource consumption.
message ResourcesConsumed {
  // Output only. The number of replica hours used. Note that many replicas may
  // run in parallel, and additionally any given work may be queued for some
  // time. Therefore this value is not strictly related to wall time.
  double replica_hours = 1 [(google.api.field_behavior) = OUTPUT_ONLY];
}

// Represents the spec of disk options.
message DiskSpec {
  // Type of the boot disk (default is "pd-ssd").
  // Valid values: "pd-ssd" (Persistent Disk Solid State Drive) or
  // "pd-standard" (Persistent Disk Hard Disk Drive).
  string boot_disk_type = 1;

  // Size in GB of the boot disk (default is 100GB).
  int32 boot_disk_size_gb = 2;
}

// Represents the spec of [persistent
// disk][https://cloud.google.com/compute/docs/disks/persistent-disks] options.
message PersistentDiskSpec {
  // Type of the disk (default is "pd-standard").
  // Valid values: "pd-ssd" (Persistent Disk Solid State Drive)
  // "pd-standard" (Persistent Disk Hard Disk Drive)
  // "pd-balanced" (Balanced Persistent Disk)
  // "pd-extreme" (Extreme Persistent Disk)
  string disk_type = 1;

  // Size in GB of the disk (default is 100GB).
  int64 disk_size_gb = 2;
}

// Represents a mount configuration for Network File System (NFS) to mount.
message NfsMount {
  // Required. IP address of the NFS server.
  string server = 1 [(google.api.field_behavior) = REQUIRED];

  // Required. Source path exported from NFS server.
  // Has to start with '/', and combined with the ip address, it indicates
  // the source mount path in the form of `server:path`
  string path = 2 [(google.api.field_behavior) = REQUIRED];

  // Required. Destination mount path. The NFS will be mounted for the user
  // under /mnt/nfs/<mount_point>
  string mount_point = 3 [(google.api.field_behavior) = REQUIRED];
}

// The metric specification that defines the target resource utilization
// (CPU utilization, accelerator's duty cycle, and so on) for calculating the
// desired replica count.
message AutoscalingMetricSpec {
  // Required. The resource metric name.
  // Supported metrics:
  //
  // * For Online Prediction:
  // * `aiplatform.googleapis.com/prediction/online/accelerator/duty_cycle`
  // * `aiplatform.googleapis.com/prediction/online/cpu/utilization`
  string metric_name = 1 [(google.api.field_behavior) = REQUIRED];

  // The target resource utilization in percentage (1% - 100%) for the given
  // metric; once the real usage deviates from the target by a certain
  // percentage, the machine replicas change. The default value is 60
  // (representing 60%) if not provided.
  int32 target = 2;
}

// A set of Shielded Instance options.
// See [Images using supported Shielded VM
// features](https://cloud.google.com/compute/docs/instances/modifying-shielded-vm).
message ShieldedVmConfig {
  // Defines whether the instance has [Secure
  // Boot](https://cloud.google.com/compute/shielded-vm/docs/shielded-vm#secure-boot)
  // enabled.
  //
  // Secure Boot helps ensure that the system only runs authentic software by
  // verifying the digital signature of all boot components, and halting the
  // boot process if signature verification fails.
  bool enable_secure_boot = 1;
}