xref: /aosp_15_r20/external/googleapis/google/cloud/aiplatform/v1beta1/persistent_resource.proto (revision d5c09012810ac0c9f33fe448fb6da8260d444cc9)
1// Copyright 2023 Google LLC
2//
3// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at
6//
7//     http://www.apache.org/licenses/LICENSE-2.0
8//
9// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS,
11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12// See the License for the specific language governing permissions and
13// limitations under the License.
14
15syntax = "proto3";
16
17package google.cloud.aiplatform.v1beta1;
18
19import "google/api/field_behavior.proto";
20import "google/api/resource.proto";
21import "google/cloud/aiplatform/v1beta1/encryption_spec.proto";
22import "google/cloud/aiplatform/v1beta1/machine_resources.proto";
23import "google/protobuf/timestamp.proto";
24import "google/rpc/status.proto";
25
26option csharp_namespace = "Google.Cloud.AIPlatform.V1Beta1";
27option go_package = "cloud.google.com/go/aiplatform/apiv1beta1/aiplatformpb;aiplatformpb";
28option java_multiple_files = true;
29option java_outer_classname = "PersistentResourceProto";
30option java_package = "com.google.cloud.aiplatform.v1beta1";
31option php_namespace = "Google\\Cloud\\AIPlatform\\V1beta1";
32option ruby_package = "Google::Cloud::AIPlatform::V1beta1";
33
34// Represents long-lasting resources that are dedicated to users to runs custom
35// workloads.
36// A PersistentResource can have multiple node pools and each node
37// pool can have its own machine spec.
38message PersistentResource {
39  option (google.api.resource) = {
40    type: "aiplatform.googleapis.com/PersistentResource"
41    pattern: "projects/{project}/locations/{location}/persistentResources/{persistent_resource}"
42  };
43
44  // Describes the PersistentResource state.
45  enum State {
46    // Not set.
47    STATE_UNSPECIFIED = 0;
48
49    // The PROVISIONING state indicates the persistent resources is being
50    // created.
51    PROVISIONING = 1;
52
53    // The RUNNING state indicates the persistent resource is healthy and fully
54    // usable.
55    RUNNING = 3;
56
57    // The STOPPING state indicates the persistent resource is being deleted.
58    STOPPING = 4;
59
60    // The ERROR state indicates the persistent resource may be unusable.
61    // Details can be found in the `error` field.
62    ERROR = 5;
63
64    // The REBOOTING state indicates the persistent resource is being rebooted
65    // (PR is not available right now but is expected to be ready again later).
66    REBOOTING = 6;
67
68    // The UPDATING state indicates the persistent resource is being updated.
69    UPDATING = 7;
70  }
71
72  // Immutable. Resource name of a PersistentResource.
73  string name = 1 [(google.api.field_behavior) = IMMUTABLE];
74
75  // Optional. The display name of the PersistentResource.
76  // The name can be up to 128 characters long and can consist of any UTF-8
77  // characters.
78  string display_name = 2 [(google.api.field_behavior) = OPTIONAL];
79
80  // Required. The spec of the pools of different resources.
81  repeated ResourcePool resource_pools = 4
82      [(google.api.field_behavior) = REQUIRED];
83
84  // Output only. The detailed state of a Study.
85  State state = 5 [(google.api.field_behavior) = OUTPUT_ONLY];
86
87  // Output only. Only populated when persistent resource's state is `STOPPING`
88  // or `ERROR`.
89  google.rpc.Status error = 6 [(google.api.field_behavior) = OUTPUT_ONLY];
90
91  // Output only. Time when the PersistentResource was created.
92  google.protobuf.Timestamp create_time = 7
93      [(google.api.field_behavior) = OUTPUT_ONLY];
94
95  // Output only. Time when the PersistentResource for the first time entered
96  // the `RUNNING` state.
97  google.protobuf.Timestamp start_time = 8
98      [(google.api.field_behavior) = OUTPUT_ONLY];
99
100  // Output only. Time when the PersistentResource was most recently updated.
101  google.protobuf.Timestamp update_time = 9
102      [(google.api.field_behavior) = OUTPUT_ONLY];
103
104  // Optional. The labels with user-defined metadata to organize
105  // PersistentResource.
106  //
107  // Label keys and values can be no longer than 64 characters
108  // (Unicode codepoints), can only contain lowercase letters, numeric
109  // characters, underscores and dashes. International characters are allowed.
110  //
111  // See https://goo.gl/xmQnxf for more information and examples of labels.
112  map<string, string> labels = 10 [(google.api.field_behavior) = OPTIONAL];
113
114  // Optional. The full name of the Compute Engine
115  // [network](/compute/docs/networks-and-firewalls#networks) to peered with
116  // Vertex AI to host the persistent resources.
117  // For example, `projects/12345/global/networks/myVPC`.
118  // [Format](/compute/docs/reference/rest/v1/networks/insert)
119  // is of the form `projects/{project}/global/networks/{network}`.
120  // Where {project} is a project number, as in `12345`, and {network} is a
121  // network name.
122  //
123  // To specify this field, you must have already [configured VPC Network
124  // Peering for Vertex
125  // AI](https://cloud.google.com/vertex-ai/docs/general/vpc-peering).
126  //
127  // If this field is left unspecified, the resources aren't peered with any
128  // network.
129  string network = 11 [
130    (google.api.field_behavior) = OPTIONAL,
131    (google.api.resource_reference) = { type: "compute.googleapis.com/Network" }
132  ];
133
134  // Optional. Customer-managed encryption key spec for a PersistentResource.
135  // If set, this PersistentResource and all sub-resources of this
136  // PersistentResource will be secured by this key.
137  EncryptionSpec encryption_spec = 12 [(google.api.field_behavior) = OPTIONAL];
138
139  // Optional. Persistent Resource runtime spec.
140  // For example, used for Ray cluster configuration.
141  ResourceRuntimeSpec resource_runtime_spec = 13
142      [(google.api.field_behavior) = OPTIONAL];
143
144  // Output only. Runtime information of the Persistent Resource.
145  ResourceRuntime resource_runtime = 14
146      [(google.api.field_behavior) = OUTPUT_ONLY];
147
148  // Optional. A list of names for the reserved IP ranges under the VPC network
149  // that can be used for this persistent resource.
150  //
151  // If set, we will deploy the persistent resource within the provided IP
152  // ranges. Otherwise, the persistent resource is deployed to any IP
153  // ranges under the provided VPC network.
154  //
155  // Example: ['vertex-ai-ip-range'].
156  repeated string reserved_ip_ranges = 15
157      [(google.api.field_behavior) = OPTIONAL];
158}
159
160// Represents the spec of a group of resources of the same type,
161// for example machine type, disk, and accelerators, in a PersistentResource.
162message ResourcePool {
163  // The min/max number of replicas allowed if enabling autoscaling
164  message AutoscalingSpec {
165    // Optional. min replicas in the node pool,
166    // must be ≤ replica_count and < max_replica_count or will throw error
167    optional int64 min_replica_count = 1
168        [(google.api.field_behavior) = OPTIONAL];
169
170    // Optional. max replicas in the node pool,
171    // must be ≥ replica_count and > min_replica_count or will throw error
172    optional int64 max_replica_count = 2
173        [(google.api.field_behavior) = OPTIONAL];
174  }
175
176  // Immutable. The unique ID in a PersistentResource for referring to this
177  // resource pool. User can specify it if necessary. Otherwise, it's generated
178  // automatically.
179  string id = 1 [(google.api.field_behavior) = IMMUTABLE];
180
181  // Required. Immutable. The specification of a single machine.
182  MachineSpec machine_spec = 2 [
183    (google.api.field_behavior) = REQUIRED,
184    (google.api.field_behavior) = IMMUTABLE
185  ];
186
187  // Optional. The total number of machines to use for this resource pool.
188  optional int64 replica_count = 3 [(google.api.field_behavior) = OPTIONAL];
189
190  // Optional. Disk spec for the machine in this node pool.
191  DiskSpec disk_spec = 4 [(google.api.field_behavior) = OPTIONAL];
192
193  // Output only. The number of machines currently in use by training jobs for
194  // this resource pool. Will replace idle_replica_count.
195  int64 used_replica_count = 6 [(google.api.field_behavior) = OUTPUT_ONLY];
196
197  // Optional. Optional spec to configure GKE autoscaling
198  AutoscalingSpec autoscaling_spec = 7 [(google.api.field_behavior) = OPTIONAL];
199}
200
201// Configuration for the runtime on a PersistentResource instance, including
202// but not limited to:
203//
204// * Service accounts used to run the workloads.
205// * Whether to make it a dedicated Ray Cluster.
206message ResourceRuntimeSpec {
207  // Optional. Configure the use of workload identity on the PersistentResource
208  ServiceAccountSpec service_account_spec = 2
209      [(google.api.field_behavior) = OPTIONAL];
210
211  // Optional. Ray cluster configuration.
212  // Required when creating a dedicated RayCluster on the PersistentResource.
213  RaySpec ray_spec = 1 [(google.api.field_behavior) = OPTIONAL];
214}
215
216// Configuration information for the Ray cluster.
217// For experimental launch, Ray cluster creation and Persistent
218// cluster creation are 1:1 mapping: We will provision all the nodes within the
219// Persistent cluster as Ray nodes.
220message RaySpec {
221  // Optional. Default image for user to choose a preferred ML framework
222  // (for example, TensorFlow or Pytorch) by choosing from [Vertex prebuilt
223  // images](https://cloud.google.com/vertex-ai/docs/training/pre-built-containers).
224  // Either this or the resource_pool_images is required. Use this field if
225  // you need all the resource pools to have the same Ray image. Otherwise, use
226  // the {@code resource_pool_images} field.
227  string image_uri = 1 [(google.api.field_behavior) = OPTIONAL];
228
229  // Optional. Required if image_uri isn't set. A map of resource_pool_id to
230  // prebuild Ray image if user need to use different images for different
231  // head/worker pools. This map needs to cover all the resource pool ids.
232  // Example:
233  // {
234  //   "ray_head_node_pool": "head image"
235  //   "ray_worker_node_pool1": "worker image"
236  //   "ray_worker_node_pool2": "another worker image"
237  // }
238  map<string, string> resource_pool_images = 6
239      [(google.api.field_behavior) = OPTIONAL];
240
241  // Optional. This will be used to indicate which resource pool will serve as
242  // the Ray head node(the first node within that pool). Will use the machine
243  // from the first workerpool as the head node by default if this field isn't
244  // set.
245  string head_node_resource_pool_id = 7
246      [(google.api.field_behavior) = OPTIONAL];
247
248  // Optional. Ray metrics configurations.
249  RayMetricSpec ray_metric_spec = 8 [(google.api.field_behavior) = OPTIONAL];
250}
251
252// Persistent Cluster runtime information as output
253message ResourceRuntime {
254  // Output only. URIs for user to connect to the Cluster.
255  // Example:
256  // {
257  //   "RAY_HEAD_NODE_INTERNAL_IP": "head-node-IP:10001"
258  //   "RAY_DASHBOARD_URI": "ray-dashboard-address:8888"
259  // }
260  map<string, string> access_uris = 1
261      [(google.api.field_behavior) = OUTPUT_ONLY];
262
263  // Output only. The resource name of NotebookRuntimeTemplate for the RoV
264  // Persistent Cluster The NotebokRuntimeTemplate is created in the same VPC
265  // (if set), and with the same Ray and Python version as the Persistent
266  // Cluster. Example:
267  //   "projects/1000/locations/us-central1/notebookRuntimeTemplates/abc123"
268  string notebook_runtime_template = 2 [
269    (google.api.field_behavior) = OUTPUT_ONLY,
270    (google.api.resource_reference) = {
271      type: "aiplatform.googleapis.com/NotebookRuntimeTemplate"
272    }
273  ];
274}
275
276// Configuration for the use of custom service account to run the workloads.
277message ServiceAccountSpec {
278  // Required. If true, custom user-managed service account is enforced to run
279  // any workloads (for example, Vertex Jobs) on the resource. Otherwise, uses
280  // the [Vertex AI Custom Code Service
281  // Agent](https://cloud.google.com/vertex-ai/docs/general/access-control#service-agents).
282  bool enable_custom_service_account = 1
283      [(google.api.field_behavior) = REQUIRED];
284
285  // Optional. Required when all below conditions are met
286  //  * `enable_custom_service_account` is true;
287  //  * any runtime is specified via `ResourceRuntimeSpec` on creation time,
288  //    for example, Ray
289  //
290  // The users must have `iam.serviceAccounts.actAs` permission on this service
291  // account and then the specified runtime containers will run as it.
292  //
293  // Do not set this field if you want to submit jobs using custom service
294  // account to this PersistentResource after creation, but only specify the
295  // `service_account` inside the job.
296  string service_account = 2 [(google.api.field_behavior) = OPTIONAL];
297}
298
299// Configuration for the Ray metrics.
300message RayMetricSpec {
301  // Optional. Flag to disable the Ray metrics collection.
302  bool disabled = 1 [(google.api.field_behavior) = OPTIONAL];
303}
304