1// Copyright 2023 Google LLC 2// 3// Licensed under the Apache License, Version 2.0 (the "License"); 4// you may not use this file except in compliance with the License. 5// You may obtain a copy of the License at 6// 7// http://www.apache.org/licenses/LICENSE-2.0 8// 9// Unless required by applicable law or agreed to in writing, software 10// distributed under the License is distributed on an "AS IS" BASIS, 11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12// See the License for the specific language governing permissions and 13// limitations under the License. 14 15syntax = "proto3"; 16 17package google.cloud.aiplatform.v1beta1; 18 19import "google/api/field_behavior.proto"; 20import "google/api/resource.proto"; 21import "google/cloud/aiplatform/v1beta1/encryption_spec.proto"; 22import "google/cloud/aiplatform/v1beta1/machine_resources.proto"; 23import "google/protobuf/timestamp.proto"; 24import "google/rpc/status.proto"; 25 26option csharp_namespace = "Google.Cloud.AIPlatform.V1Beta1"; 27option go_package = "cloud.google.com/go/aiplatform/apiv1beta1/aiplatformpb;aiplatformpb"; 28option java_multiple_files = true; 29option java_outer_classname = "PersistentResourceProto"; 30option java_package = "com.google.cloud.aiplatform.v1beta1"; 31option php_namespace = "Google\\Cloud\\AIPlatform\\V1beta1"; 32option ruby_package = "Google::Cloud::AIPlatform::V1beta1"; 33 34// Represents long-lasting resources that are dedicated to users to runs custom 35// workloads. 36// A PersistentResource can have multiple node pools and each node 37// pool can have its own machine spec. 38message PersistentResource { 39 option (google.api.resource) = { 40 type: "aiplatform.googleapis.com/PersistentResource" 41 pattern: "projects/{project}/locations/{location}/persistentResources/{persistent_resource}" 42 }; 43 44 // Describes the PersistentResource state. 45 enum State { 46 // Not set. 47 STATE_UNSPECIFIED = 0; 48 49 // The PROVISIONING state indicates the persistent resources is being 50 // created. 51 PROVISIONING = 1; 52 53 // The RUNNING state indicates the persistent resource is healthy and fully 54 // usable. 55 RUNNING = 3; 56 57 // The STOPPING state indicates the persistent resource is being deleted. 58 STOPPING = 4; 59 60 // The ERROR state indicates the persistent resource may be unusable. 61 // Details can be found in the `error` field. 62 ERROR = 5; 63 64 // The REBOOTING state indicates the persistent resource is being rebooted 65 // (PR is not available right now but is expected to be ready again later). 66 REBOOTING = 6; 67 68 // The UPDATING state indicates the persistent resource is being updated. 69 UPDATING = 7; 70 } 71 72 // Immutable. Resource name of a PersistentResource. 73 string name = 1 [(google.api.field_behavior) = IMMUTABLE]; 74 75 // Optional. The display name of the PersistentResource. 76 // The name can be up to 128 characters long and can consist of any UTF-8 77 // characters. 78 string display_name = 2 [(google.api.field_behavior) = OPTIONAL]; 79 80 // Required. The spec of the pools of different resources. 81 repeated ResourcePool resource_pools = 4 82 [(google.api.field_behavior) = REQUIRED]; 83 84 // Output only. The detailed state of a Study. 85 State state = 5 [(google.api.field_behavior) = OUTPUT_ONLY]; 86 87 // Output only. Only populated when persistent resource's state is `STOPPING` 88 // or `ERROR`. 89 google.rpc.Status error = 6 [(google.api.field_behavior) = OUTPUT_ONLY]; 90 91 // Output only. Time when the PersistentResource was created. 92 google.protobuf.Timestamp create_time = 7 93 [(google.api.field_behavior) = OUTPUT_ONLY]; 94 95 // Output only. Time when the PersistentResource for the first time entered 96 // the `RUNNING` state. 97 google.protobuf.Timestamp start_time = 8 98 [(google.api.field_behavior) = OUTPUT_ONLY]; 99 100 // Output only. Time when the PersistentResource was most recently updated. 101 google.protobuf.Timestamp update_time = 9 102 [(google.api.field_behavior) = OUTPUT_ONLY]; 103 104 // Optional. The labels with user-defined metadata to organize 105 // PersistentResource. 106 // 107 // Label keys and values can be no longer than 64 characters 108 // (Unicode codepoints), can only contain lowercase letters, numeric 109 // characters, underscores and dashes. International characters are allowed. 110 // 111 // See https://goo.gl/xmQnxf for more information and examples of labels. 112 map<string, string> labels = 10 [(google.api.field_behavior) = OPTIONAL]; 113 114 // Optional. The full name of the Compute Engine 115 // [network](/compute/docs/networks-and-firewalls#networks) to peered with 116 // Vertex AI to host the persistent resources. 117 // For example, `projects/12345/global/networks/myVPC`. 118 // [Format](/compute/docs/reference/rest/v1/networks/insert) 119 // is of the form `projects/{project}/global/networks/{network}`. 120 // Where {project} is a project number, as in `12345`, and {network} is a 121 // network name. 122 // 123 // To specify this field, you must have already [configured VPC Network 124 // Peering for Vertex 125 // AI](https://cloud.google.com/vertex-ai/docs/general/vpc-peering). 126 // 127 // If this field is left unspecified, the resources aren't peered with any 128 // network. 129 string network = 11 [ 130 (google.api.field_behavior) = OPTIONAL, 131 (google.api.resource_reference) = { type: "compute.googleapis.com/Network" } 132 ]; 133 134 // Optional. Customer-managed encryption key spec for a PersistentResource. 135 // If set, this PersistentResource and all sub-resources of this 136 // PersistentResource will be secured by this key. 137 EncryptionSpec encryption_spec = 12 [(google.api.field_behavior) = OPTIONAL]; 138 139 // Optional. Persistent Resource runtime spec. 140 // For example, used for Ray cluster configuration. 141 ResourceRuntimeSpec resource_runtime_spec = 13 142 [(google.api.field_behavior) = OPTIONAL]; 143 144 // Output only. Runtime information of the Persistent Resource. 145 ResourceRuntime resource_runtime = 14 146 [(google.api.field_behavior) = OUTPUT_ONLY]; 147 148 // Optional. A list of names for the reserved IP ranges under the VPC network 149 // that can be used for this persistent resource. 150 // 151 // If set, we will deploy the persistent resource within the provided IP 152 // ranges. Otherwise, the persistent resource is deployed to any IP 153 // ranges under the provided VPC network. 154 // 155 // Example: ['vertex-ai-ip-range']. 156 repeated string reserved_ip_ranges = 15 157 [(google.api.field_behavior) = OPTIONAL]; 158} 159 160// Represents the spec of a group of resources of the same type, 161// for example machine type, disk, and accelerators, in a PersistentResource. 162message ResourcePool { 163 // The min/max number of replicas allowed if enabling autoscaling 164 message AutoscalingSpec { 165 // Optional. min replicas in the node pool, 166 // must be ≤ replica_count and < max_replica_count or will throw error 167 optional int64 min_replica_count = 1 168 [(google.api.field_behavior) = OPTIONAL]; 169 170 // Optional. max replicas in the node pool, 171 // must be ≥ replica_count and > min_replica_count or will throw error 172 optional int64 max_replica_count = 2 173 [(google.api.field_behavior) = OPTIONAL]; 174 } 175 176 // Immutable. The unique ID in a PersistentResource for referring to this 177 // resource pool. User can specify it if necessary. Otherwise, it's generated 178 // automatically. 179 string id = 1 [(google.api.field_behavior) = IMMUTABLE]; 180 181 // Required. Immutable. The specification of a single machine. 182 MachineSpec machine_spec = 2 [ 183 (google.api.field_behavior) = REQUIRED, 184 (google.api.field_behavior) = IMMUTABLE 185 ]; 186 187 // Optional. The total number of machines to use for this resource pool. 188 optional int64 replica_count = 3 [(google.api.field_behavior) = OPTIONAL]; 189 190 // Optional. Disk spec for the machine in this node pool. 191 DiskSpec disk_spec = 4 [(google.api.field_behavior) = OPTIONAL]; 192 193 // Output only. The number of machines currently in use by training jobs for 194 // this resource pool. Will replace idle_replica_count. 195 int64 used_replica_count = 6 [(google.api.field_behavior) = OUTPUT_ONLY]; 196 197 // Optional. Optional spec to configure GKE autoscaling 198 AutoscalingSpec autoscaling_spec = 7 [(google.api.field_behavior) = OPTIONAL]; 199} 200 201// Configuration for the runtime on a PersistentResource instance, including 202// but not limited to: 203// 204// * Service accounts used to run the workloads. 205// * Whether to make it a dedicated Ray Cluster. 206message ResourceRuntimeSpec { 207 // Optional. Configure the use of workload identity on the PersistentResource 208 ServiceAccountSpec service_account_spec = 2 209 [(google.api.field_behavior) = OPTIONAL]; 210 211 // Optional. Ray cluster configuration. 212 // Required when creating a dedicated RayCluster on the PersistentResource. 213 RaySpec ray_spec = 1 [(google.api.field_behavior) = OPTIONAL]; 214} 215 216// Configuration information for the Ray cluster. 217// For experimental launch, Ray cluster creation and Persistent 218// cluster creation are 1:1 mapping: We will provision all the nodes within the 219// Persistent cluster as Ray nodes. 220message RaySpec { 221 // Optional. Default image for user to choose a preferred ML framework 222 // (for example, TensorFlow or Pytorch) by choosing from [Vertex prebuilt 223 // images](https://cloud.google.com/vertex-ai/docs/training/pre-built-containers). 224 // Either this or the resource_pool_images is required. Use this field if 225 // you need all the resource pools to have the same Ray image. Otherwise, use 226 // the {@code resource_pool_images} field. 227 string image_uri = 1 [(google.api.field_behavior) = OPTIONAL]; 228 229 // Optional. Required if image_uri isn't set. A map of resource_pool_id to 230 // prebuild Ray image if user need to use different images for different 231 // head/worker pools. This map needs to cover all the resource pool ids. 232 // Example: 233 // { 234 // "ray_head_node_pool": "head image" 235 // "ray_worker_node_pool1": "worker image" 236 // "ray_worker_node_pool2": "another worker image" 237 // } 238 map<string, string> resource_pool_images = 6 239 [(google.api.field_behavior) = OPTIONAL]; 240 241 // Optional. This will be used to indicate which resource pool will serve as 242 // the Ray head node(the first node within that pool). Will use the machine 243 // from the first workerpool as the head node by default if this field isn't 244 // set. 245 string head_node_resource_pool_id = 7 246 [(google.api.field_behavior) = OPTIONAL]; 247 248 // Optional. Ray metrics configurations. 249 RayMetricSpec ray_metric_spec = 8 [(google.api.field_behavior) = OPTIONAL]; 250} 251 252// Persistent Cluster runtime information as output 253message ResourceRuntime { 254 // Output only. URIs for user to connect to the Cluster. 255 // Example: 256 // { 257 // "RAY_HEAD_NODE_INTERNAL_IP": "head-node-IP:10001" 258 // "RAY_DASHBOARD_URI": "ray-dashboard-address:8888" 259 // } 260 map<string, string> access_uris = 1 261 [(google.api.field_behavior) = OUTPUT_ONLY]; 262 263 // Output only. The resource name of NotebookRuntimeTemplate for the RoV 264 // Persistent Cluster The NotebokRuntimeTemplate is created in the same VPC 265 // (if set), and with the same Ray and Python version as the Persistent 266 // Cluster. Example: 267 // "projects/1000/locations/us-central1/notebookRuntimeTemplates/abc123" 268 string notebook_runtime_template = 2 [ 269 (google.api.field_behavior) = OUTPUT_ONLY, 270 (google.api.resource_reference) = { 271 type: "aiplatform.googleapis.com/NotebookRuntimeTemplate" 272 } 273 ]; 274} 275 276// Configuration for the use of custom service account to run the workloads. 277message ServiceAccountSpec { 278 // Required. If true, custom user-managed service account is enforced to run 279 // any workloads (for example, Vertex Jobs) on the resource. Otherwise, uses 280 // the [Vertex AI Custom Code Service 281 // Agent](https://cloud.google.com/vertex-ai/docs/general/access-control#service-agents). 282 bool enable_custom_service_account = 1 283 [(google.api.field_behavior) = REQUIRED]; 284 285 // Optional. Required when all below conditions are met 286 // * `enable_custom_service_account` is true; 287 // * any runtime is specified via `ResourceRuntimeSpec` on creation time, 288 // for example, Ray 289 // 290 // The users must have `iam.serviceAccounts.actAs` permission on this service 291 // account and then the specified runtime containers will run as it. 292 // 293 // Do not set this field if you want to submit jobs using custom service 294 // account to this PersistentResource after creation, but only specify the 295 // `service_account` inside the job. 296 string service_account = 2 [(google.api.field_behavior) = OPTIONAL]; 297} 298 299// Configuration for the Ray metrics. 300message RayMetricSpec { 301 // Optional. Flag to disable the Ray metrics collection. 302 bool disabled = 1 [(google.api.field_behavior) = OPTIONAL]; 303} 304