xref: /aosp_15_r20/external/googleapis/google/cloud/dataproc/v1/clusters.proto (revision d5c09012810ac0c9f33fe448fb6da8260d444cc9)
1// Copyright 2023 Google LLC
2//
3// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at
6//
7//     http://www.apache.org/licenses/LICENSE-2.0
8//
9// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS,
11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12// See the License for the specific language governing permissions and
13// limitations under the License.
14
15syntax = "proto3";
16
17package google.cloud.dataproc.v1;
18
19import "google/api/annotations.proto";
20import "google/api/client.proto";
21import "google/api/field_behavior.proto";
22import "google/api/resource.proto";
23import "google/cloud/dataproc/v1/operations.proto";
24import "google/cloud/dataproc/v1/shared.proto";
25import "google/longrunning/operations.proto";
26import "google/protobuf/duration.proto";
27import "google/protobuf/empty.proto";
28import "google/protobuf/field_mask.proto";
29import "google/protobuf/timestamp.proto";
30import "google/protobuf/wrappers.proto";
31import "google/type/interval.proto";
32
33option go_package = "cloud.google.com/go/dataproc/v2/apiv1/dataprocpb;dataprocpb";
34option java_multiple_files = true;
35option java_outer_classname = "ClustersProto";
36option java_package = "com.google.cloud.dataproc.v1";
37
38// The ClusterControllerService provides methods to manage clusters
39// of Compute Engine instances.
40service ClusterController {
41  option (google.api.default_host) = "dataproc.googleapis.com";
42  option (google.api.oauth_scopes) =
43      "https://www.googleapis.com/auth/cloud-platform";
44
45  // Creates a cluster in a project. The returned
46  // [Operation.metadata][google.longrunning.Operation.metadata] will be
47  // [ClusterOperationMetadata](https://cloud.google.com/dataproc/docs/reference/rpc/google.cloud.dataproc.v1#clusteroperationmetadata).
48  rpc CreateCluster(CreateClusterRequest)
49      returns (google.longrunning.Operation) {
50    option (google.api.http) = {
51      post: "/v1/projects/{project_id}/regions/{region}/clusters"
52      body: "cluster"
53    };
54    option (google.api.method_signature) = "project_id,region,cluster";
55    option (google.longrunning.operation_info) = {
56      response_type: "Cluster"
57      metadata_type: "google.cloud.dataproc.v1.ClusterOperationMetadata"
58    };
59  }
60
61  // Updates a cluster in a project. The returned
62  // [Operation.metadata][google.longrunning.Operation.metadata] will be
63  // [ClusterOperationMetadata](https://cloud.google.com/dataproc/docs/reference/rpc/google.cloud.dataproc.v1#clusteroperationmetadata).
64  // The cluster must be in a
65  // [`RUNNING`][google.cloud.dataproc.v1.ClusterStatus.State] state or an error
66  // is returned.
67  rpc UpdateCluster(UpdateClusterRequest)
68      returns (google.longrunning.Operation) {
69    option (google.api.http) = {
70      patch: "/v1/projects/{project_id}/regions/{region}/clusters/{cluster_name}"
71      body: "cluster"
72    };
73    option (google.api.method_signature) =
74        "project_id,region,cluster_name,cluster,update_mask";
75    option (google.longrunning.operation_info) = {
76      response_type: "Cluster"
77      metadata_type: "google.cloud.dataproc.v1.ClusterOperationMetadata"
78    };
79  }
80
81  // Stops a cluster in a project.
82  rpc StopCluster(StopClusterRequest) returns (google.longrunning.Operation) {
83    option (google.api.http) = {
84      post: "/v1/projects/{project_id}/regions/{region}/clusters/{cluster_name}:stop"
85      body: "*"
86    };
87    option (google.longrunning.operation_info) = {
88      response_type: "Cluster"
89      metadata_type: "google.cloud.dataproc.v1.ClusterOperationMetadata"
90    };
91  }
92
93  // Starts a cluster in a project.
94  rpc StartCluster(StartClusterRequest) returns (google.longrunning.Operation) {
95    option (google.api.http) = {
96      post: "/v1/projects/{project_id}/regions/{region}/clusters/{cluster_name}:start"
97      body: "*"
98    };
99    option (google.longrunning.operation_info) = {
100      response_type: "Cluster"
101      metadata_type: "google.cloud.dataproc.v1.ClusterOperationMetadata"
102    };
103  }
104
105  // Deletes a cluster in a project. The returned
106  // [Operation.metadata][google.longrunning.Operation.metadata] will be
107  // [ClusterOperationMetadata](https://cloud.google.com/dataproc/docs/reference/rpc/google.cloud.dataproc.v1#clusteroperationmetadata).
108  rpc DeleteCluster(DeleteClusterRequest)
109      returns (google.longrunning.Operation) {
110    option (google.api.http) = {
111      delete: "/v1/projects/{project_id}/regions/{region}/clusters/{cluster_name}"
112    };
113    option (google.api.method_signature) = "project_id,region,cluster_name";
114    option (google.longrunning.operation_info) = {
115      response_type: "google.protobuf.Empty"
116      metadata_type: "google.cloud.dataproc.v1.ClusterOperationMetadata"
117    };
118  }
119
120  // Gets the resource representation for a cluster in a project.
121  rpc GetCluster(GetClusterRequest) returns (Cluster) {
122    option (google.api.http) = {
123      get: "/v1/projects/{project_id}/regions/{region}/clusters/{cluster_name}"
124    };
125    option (google.api.method_signature) = "project_id,region,cluster_name";
126  }
127
128  // Lists all regions/{region}/clusters in a project alphabetically.
129  rpc ListClusters(ListClustersRequest) returns (ListClustersResponse) {
130    option (google.api.http) = {
131      get: "/v1/projects/{project_id}/regions/{region}/clusters"
132    };
133    option (google.api.method_signature) = "project_id,region";
134    option (google.api.method_signature) = "project_id,region,filter";
135  }
136
137  // Gets cluster diagnostic information. The returned
138  // [Operation.metadata][google.longrunning.Operation.metadata] will be
139  // [ClusterOperationMetadata](https://cloud.google.com/dataproc/docs/reference/rpc/google.cloud.dataproc.v1#clusteroperationmetadata).
140  // After the operation completes,
141  // [Operation.response][google.longrunning.Operation.response]
142  // contains
143  // [DiagnoseClusterResults](https://cloud.google.com/dataproc/docs/reference/rpc/google.cloud.dataproc.v1#diagnoseclusterresults).
144  rpc DiagnoseCluster(DiagnoseClusterRequest)
145      returns (google.longrunning.Operation) {
146    option (google.api.http) = {
147      post: "/v1/projects/{project_id}/regions/{region}/clusters/{cluster_name}:diagnose"
148      body: "*"
149    };
150    option (google.api.method_signature) = "project_id,region,cluster_name";
151    option (google.longrunning.operation_info) = {
152      response_type: "DiagnoseClusterResults"
153      metadata_type: "google.cloud.dataproc.v1.ClusterOperationMetadata"
154    };
155  }
156}
157
158// Describes the identifying information, config, and status of
159// a Dataproc cluster
160message Cluster {
161  // Required. The Google Cloud Platform project ID that the cluster belongs to.
162  string project_id = 1 [(google.api.field_behavior) = REQUIRED];
163
164  // Required. The cluster name, which must be unique within a project.
165  // The name must start with a lowercase letter, and can contain
166  // up to 51 lowercase letters, numbers, and hyphens. It cannot end
167  // with a hyphen. The name of a deleted cluster can be reused.
168  string cluster_name = 2 [(google.api.field_behavior) = REQUIRED];
169
170  // Optional. The cluster config for a cluster of Compute Engine Instances.
171  // Note that Dataproc may set default values, and values may change
172  // when clusters are updated.
173  //
174  // Exactly one of ClusterConfig or VirtualClusterConfig must be specified.
175  ClusterConfig config = 3 [(google.api.field_behavior) = OPTIONAL];
176
177  // Optional. The virtual cluster config is used when creating a Dataproc
178  // cluster that does not directly control the underlying compute resources,
179  // for example, when creating a [Dataproc-on-GKE
180  // cluster](https://cloud.google.com/dataproc/docs/guides/dpgke/dataproc-gke-overview).
181  // Dataproc may set default values, and values may change when
182  // clusters are updated. Exactly one of
183  // [config][google.cloud.dataproc.v1.Cluster.config] or
184  // [virtual_cluster_config][google.cloud.dataproc.v1.Cluster.virtual_cluster_config]
185  // must be specified.
186  VirtualClusterConfig virtual_cluster_config = 10
187      [(google.api.field_behavior) = OPTIONAL];
188
189  // Optional. The labels to associate with this cluster.
190  // Label **keys** must contain 1 to 63 characters, and must conform to
191  // [RFC 1035](https://www.ietf.org/rfc/rfc1035.txt).
192  // Label **values** may be empty, but, if present, must contain 1 to 63
193  // characters, and must conform to [RFC
194  // 1035](https://www.ietf.org/rfc/rfc1035.txt). No more than 32 labels can be
195  // associated with a cluster.
196  map<string, string> labels = 8 [(google.api.field_behavior) = OPTIONAL];
197
198  // Output only. Cluster status.
199  ClusterStatus status = 4 [(google.api.field_behavior) = OUTPUT_ONLY];
200
201  // Output only. The previous cluster status.
202  repeated ClusterStatus status_history = 7
203      [(google.api.field_behavior) = OUTPUT_ONLY];
204
205  // Output only. A cluster UUID (Unique Universal Identifier). Dataproc
206  // generates this value when it creates the cluster.
207  string cluster_uuid = 6 [(google.api.field_behavior) = OUTPUT_ONLY];
208
209  // Output only. Contains cluster daemon metrics such as HDFS and YARN stats.
210  //
211  // **Beta Feature**: This report is available for testing purposes only. It
212  // may be changed before final release.
213  ClusterMetrics metrics = 9 [(google.api.field_behavior) = OUTPUT_ONLY];
214}
215
216// The cluster config.
217message ClusterConfig {
218  // Optional. A Cloud Storage bucket used to stage job
219  // dependencies, config files, and job driver console output.
220  // If you do not specify a staging bucket, Cloud
221  // Dataproc will determine a Cloud Storage location (US,
222  // ASIA, or EU) for your cluster's staging bucket according to the
223  // Compute Engine zone where your cluster is deployed, and then create
224  // and manage this project-level, per-location bucket (see
225  // [Dataproc staging and temp
226  // buckets](https://cloud.google.com/dataproc/docs/concepts/configuring-clusters/staging-bucket)).
227  // **This field requires a Cloud Storage bucket name, not a `gs://...` URI to
228  // a Cloud Storage bucket.**
229  string config_bucket = 1 [(google.api.field_behavior) = OPTIONAL];
230
231  // Optional. A Cloud Storage bucket used to store ephemeral cluster and jobs
232  // data, such as Spark and MapReduce history files. If you do not specify a
233  // temp bucket, Dataproc will determine a Cloud Storage location (US, ASIA, or
234  // EU) for your cluster's temp bucket according to the Compute Engine zone
235  // where your cluster is deployed, and then create and manage this
236  // project-level, per-location bucket. The default bucket has a TTL of 90
237  // days, but you can use any TTL (or none) if you specify a bucket (see
238  // [Dataproc staging and temp
239  // buckets](https://cloud.google.com/dataproc/docs/concepts/configuring-clusters/staging-bucket)).
240  // **This field requires a Cloud Storage bucket name, not a `gs://...` URI to
241  // a Cloud Storage bucket.**
242  string temp_bucket = 2 [(google.api.field_behavior) = OPTIONAL];
243
244  // Optional. The shared Compute Engine config settings for
245  // all instances in a cluster.
246  GceClusterConfig gce_cluster_config = 8
247      [(google.api.field_behavior) = OPTIONAL];
248
249  // Optional. The Compute Engine config settings for
250  // the cluster's master instance.
251  InstanceGroupConfig master_config = 9
252      [(google.api.field_behavior) = OPTIONAL];
253
254  // Optional. The Compute Engine config settings for
255  // the cluster's worker instances.
256  InstanceGroupConfig worker_config = 10
257      [(google.api.field_behavior) = OPTIONAL];
258
259  // Optional. The Compute Engine config settings for
260  // a cluster's secondary worker instances
261  InstanceGroupConfig secondary_worker_config = 12
262      [(google.api.field_behavior) = OPTIONAL];
263
264  // Optional. The config settings for cluster software.
265  SoftwareConfig software_config = 13 [(google.api.field_behavior) = OPTIONAL];
266
267  // Optional. Commands to execute on each node after config is
268  // completed. By default, executables are run on master and all worker nodes.
269  // You can test a node's `role` metadata to run an executable on
270  // a master or worker node, as shown below using `curl` (you can also use
271  // `wget`):
272  //
273  //     ROLE=$(curl -H Metadata-Flavor:Google
274  //     http://metadata/computeMetadata/v1/instance/attributes/dataproc-role)
275  //     if [[ "${ROLE}" == 'Master' ]]; then
276  //       ... master specific actions ...
277  //     else
278  //       ... worker specific actions ...
279  //     fi
280  repeated NodeInitializationAction initialization_actions = 11
281      [(google.api.field_behavior) = OPTIONAL];
282
283  // Optional. Encryption settings for the cluster.
284  EncryptionConfig encryption_config = 15
285      [(google.api.field_behavior) = OPTIONAL];
286
287  // Optional. Autoscaling config for the policy associated with the cluster.
288  // Cluster does not autoscale if this field is unset.
289  AutoscalingConfig autoscaling_config = 18
290      [(google.api.field_behavior) = OPTIONAL];
291
292  // Optional. Security settings for the cluster.
293  SecurityConfig security_config = 16 [(google.api.field_behavior) = OPTIONAL];
294
295  // Optional. Lifecycle setting for the cluster.
296  LifecycleConfig lifecycle_config = 17
297      [(google.api.field_behavior) = OPTIONAL];
298
299  // Optional. Port/endpoint configuration for this cluster
300  EndpointConfig endpoint_config = 19 [(google.api.field_behavior) = OPTIONAL];
301
302  // Optional. Metastore configuration.
303  MetastoreConfig metastore_config = 20
304      [(google.api.field_behavior) = OPTIONAL];
305
306  // Optional. The config for Dataproc metrics.
307  DataprocMetricConfig dataproc_metric_config = 23
308      [(google.api.field_behavior) = OPTIONAL];
309
310  // Optional. The node group settings.
311  repeated AuxiliaryNodeGroup auxiliary_node_groups = 25
312      [(google.api.field_behavior) = OPTIONAL];
313}
314
315// The Dataproc cluster config for a cluster that does not directly control the
316// underlying compute resources, such as a [Dataproc-on-GKE
317// cluster](https://cloud.google.com/dataproc/docs/guides/dpgke/dataproc-gke-overview).
318message VirtualClusterConfig {
319  // Optional. A Cloud Storage bucket used to stage job
320  // dependencies, config files, and job driver console output.
321  // If you do not specify a staging bucket, Cloud
322  // Dataproc will determine a Cloud Storage location (US,
323  // ASIA, or EU) for your cluster's staging bucket according to the
324  // Compute Engine zone where your cluster is deployed, and then create
325  // and manage this project-level, per-location bucket (see
326  // [Dataproc staging and temp
327  // buckets](https://cloud.google.com/dataproc/docs/concepts/configuring-clusters/staging-bucket)).
328  // **This field requires a Cloud Storage bucket name, not a `gs://...` URI to
329  // a Cloud Storage bucket.**
330  string staging_bucket = 1 [(google.api.field_behavior) = OPTIONAL];
331
332  oneof infrastructure_config {
333    // Required. The configuration for running the Dataproc cluster on
334    // Kubernetes.
335    KubernetesClusterConfig kubernetes_cluster_config = 6
336        [(google.api.field_behavior) = REQUIRED];
337  }
338
339  // Optional. Configuration of auxiliary services used by this cluster.
340  AuxiliaryServicesConfig auxiliary_services_config = 7
341      [(google.api.field_behavior) = OPTIONAL];
342}
343
344// Auxiliary services configuration for a Cluster.
345message AuxiliaryServicesConfig {
346  // Optional. The Hive Metastore configuration for this workload.
347  MetastoreConfig metastore_config = 1 [(google.api.field_behavior) = OPTIONAL];
348
349  // Optional. The Spark History Server configuration for the workload.
350  SparkHistoryServerConfig spark_history_server_config = 2
351      [(google.api.field_behavior) = OPTIONAL];
352}
353
354// Endpoint config for this cluster
355message EndpointConfig {
356  // Output only. The map of port descriptions to URLs. Will only be populated
357  // if enable_http_port_access is true.
358  map<string, string> http_ports = 1
359      [(google.api.field_behavior) = OUTPUT_ONLY];
360
361  // Optional. If true, enable http access to specific ports on the cluster
362  // from external sources. Defaults to false.
363  bool enable_http_port_access = 2 [(google.api.field_behavior) = OPTIONAL];
364}
365
366// Autoscaling Policy config associated with the cluster.
367message AutoscalingConfig {
368  // Optional. The autoscaling policy used by the cluster.
369  //
370  // Only resource names including projectid and location (region) are valid.
371  // Examples:
372  //
373  // * `https://www.googleapis.com/compute/v1/projects/[project_id]/locations/[dataproc_region]/autoscalingPolicies/[policy_id]`
374  // * `projects/[project_id]/locations/[dataproc_region]/autoscalingPolicies/[policy_id]`
375  //
376  // Note that the policy must be in the same project and Dataproc region.
377  string policy_uri = 1 [(google.api.field_behavior) = OPTIONAL];
378}
379
380// Encryption settings for the cluster.
381message EncryptionConfig {
382  // Optional. The Cloud KMS key name to use for PD disk encryption for all
383  // instances in the cluster.
384  string gce_pd_kms_key_name = 1 [(google.api.field_behavior) = OPTIONAL];
385}
386
387// Common config settings for resources of Compute Engine cluster
388// instances, applicable to all instances in the cluster.
389message GceClusterConfig {
390  // `PrivateIpv6GoogleAccess` controls whether and how Dataproc cluster nodes
391  // can communicate with Google Services through gRPC over IPv6.
392  // These values are directly mapped to corresponding values in the
393  // [Compute Engine Instance
394  // fields](https://cloud.google.com/compute/docs/reference/rest/v1/instances).
395  enum PrivateIpv6GoogleAccess {
396    // If unspecified, Compute Engine default behavior will apply, which
397    // is the same as
398    // [INHERIT_FROM_SUBNETWORK][google.cloud.dataproc.v1.GceClusterConfig.PrivateIpv6GoogleAccess.INHERIT_FROM_SUBNETWORK].
399    PRIVATE_IPV6_GOOGLE_ACCESS_UNSPECIFIED = 0;
400
401    // Private access to and from Google Services configuration
402    // inherited from the subnetwork configuration. This is the
403    // default Compute Engine behavior.
404    INHERIT_FROM_SUBNETWORK = 1;
405
406    // Enables outbound private IPv6 access to Google Services from the Dataproc
407    // cluster.
408    OUTBOUND = 2;
409
410    // Enables bidirectional private IPv6 access between Google Services and the
411    // Dataproc cluster.
412    BIDIRECTIONAL = 3;
413  }
414
415  // Optional. The Compute Engine zone where the Dataproc cluster will be
416  // located. If omitted, the service will pick a zone in the cluster's Compute
417  // Engine region. On a get request, zone will always be present.
418  //
419  // A full URL, partial URI, or short name are valid. Examples:
420  //
421  // * `https://www.googleapis.com/compute/v1/projects/[project_id]/zones/[zone]`
422  // * `projects/[project_id]/zones/[zone]`
423  // * `[zone]`
424  string zone_uri = 1 [(google.api.field_behavior) = OPTIONAL];
425
426  // Optional. The Compute Engine network to be used for machine
427  // communications. Cannot be specified with subnetwork_uri. If neither
428  // `network_uri` nor `subnetwork_uri` is specified, the "default" network of
429  // the project is used, if it exists. Cannot be a "Custom Subnet Network" (see
430  // [Using Subnetworks](https://cloud.google.com/compute/docs/subnetworks) for
431  // more information).
432  //
433  // A full URL, partial URI, or short name are valid. Examples:
434  //
435  // * `https://www.googleapis.com/compute/v1/projects/[project_id]/global/networks/default`
436  // * `projects/[project_id]/global/networks/default`
437  // * `default`
438  string network_uri = 2 [(google.api.field_behavior) = OPTIONAL];
439
440  // Optional. The Compute Engine subnetwork to be used for machine
441  // communications. Cannot be specified with network_uri.
442  //
443  // A full URL, partial URI, or short name are valid. Examples:
444  //
445  // * `https://www.googleapis.com/compute/v1/projects/[project_id]/regions/[region]/subnetworks/sub0`
446  // * `projects/[project_id]/regions/[region]/subnetworks/sub0`
447  // * `sub0`
448  string subnetwork_uri = 6 [(google.api.field_behavior) = OPTIONAL];
449
450  // Optional. If true, all instances in the cluster will only have internal IP
451  // addresses. By default, clusters are not restricted to internal IP
452  // addresses, and will have ephemeral external IP addresses assigned to each
453  // instance. This `internal_ip_only` restriction can only be enabled for
454  // subnetwork enabled networks, and all off-cluster dependencies must be
455  // configured to be accessible without external IP addresses.
456  optional bool internal_ip_only = 7 [(google.api.field_behavior) = OPTIONAL];
457
458  // Optional. The type of IPv6 access for a cluster.
459  PrivateIpv6GoogleAccess private_ipv6_google_access = 12
460      [(google.api.field_behavior) = OPTIONAL];
461
462  // Optional. The [Dataproc service
463  // account](https://cloud.google.com/dataproc/docs/concepts/configuring-clusters/service-accounts#service_accounts_in_dataproc)
464  // (also see [VM Data Plane
465  // identity](https://cloud.google.com/dataproc/docs/concepts/iam/dataproc-principals#vm_service_account_data_plane_identity))
466  // used by Dataproc cluster VM instances to access Google Cloud Platform
467  // services.
468  //
469  // If not specified, the
470  // [Compute Engine default service
471  // account](https://cloud.google.com/compute/docs/access/service-accounts#default_service_account)
472  // is used.
473  string service_account = 8 [(google.api.field_behavior) = OPTIONAL];
474
475  // Optional. The URIs of service account scopes to be included in
476  // Compute Engine instances. The following base set of scopes is always
477  // included:
478  //
479  // * https://www.googleapis.com/auth/cloud.useraccounts.readonly
480  // * https://www.googleapis.com/auth/devstorage.read_write
481  // * https://www.googleapis.com/auth/logging.write
482  //
483  // If no scopes are specified, the following defaults are also provided:
484  //
485  // * https://www.googleapis.com/auth/bigquery
486  // * https://www.googleapis.com/auth/bigtable.admin.table
487  // * https://www.googleapis.com/auth/bigtable.data
488  // * https://www.googleapis.com/auth/devstorage.full_control
489  repeated string service_account_scopes = 3
490      [(google.api.field_behavior) = OPTIONAL];
491
492  // The Compute Engine tags to add to all instances (see [Tagging
493  // instances](https://cloud.google.com/compute/docs/label-or-tag-resources#tags)).
494  repeated string tags = 4;
495
496  // Optional. The Compute Engine metadata entries to add to all instances (see
497  // [Project and instance
498  // metadata](https://cloud.google.com/compute/docs/storing-retrieving-metadata#project_and_instance_metadata)).
499  map<string, string> metadata = 5 [(google.api.field_behavior) = OPTIONAL];
500
501  // Optional. Reservation Affinity for consuming Zonal reservation.
502  ReservationAffinity reservation_affinity = 11
503      [(google.api.field_behavior) = OPTIONAL];
504
505  // Optional. Node Group Affinity for sole-tenant clusters.
506  NodeGroupAffinity node_group_affinity = 13
507      [(google.api.field_behavior) = OPTIONAL];
508
509  // Optional. Shielded Instance Config for clusters using [Compute Engine
510  // Shielded
511  // VMs](https://cloud.google.com/security/shielded-cloud/shielded-vm).
512  ShieldedInstanceConfig shielded_instance_config = 14
513      [(google.api.field_behavior) = OPTIONAL];
514
515  // Optional. Confidential Instance Config for clusters using [Confidential
516  // VMs](https://cloud.google.com/compute/confidential-vm/docs).
517  ConfidentialInstanceConfig confidential_instance_config = 15
518      [(google.api.field_behavior) = OPTIONAL];
519}
520
521// Node Group Affinity for clusters using sole-tenant node groups.
522// **The Dataproc `NodeGroupAffinity` resource is not related to the
523// Dataproc [NodeGroup][google.cloud.dataproc.v1.NodeGroup] resource.**
524message NodeGroupAffinity {
525  // Required. The URI of a
526  // sole-tenant [node group
527  // resource](https://cloud.google.com/compute/docs/reference/rest/v1/nodeGroups)
528  // that the cluster will be created on.
529  //
530  // A full URL, partial URI, or node group name are valid. Examples:
531  //
532  // * `https://www.googleapis.com/compute/v1/projects/[project_id]/zones/[zone]/nodeGroups/node-group-1`
533  // * `projects/[project_id]/zones/[zone]/nodeGroups/node-group-1`
534  // * `node-group-1`
535  string node_group_uri = 1 [(google.api.field_behavior) = REQUIRED];
536}
537
538// Shielded Instance Config for clusters using [Compute Engine Shielded
539// VMs](https://cloud.google.com/security/shielded-cloud/shielded-vm).
540message ShieldedInstanceConfig {
541  // Optional. Defines whether instances have Secure Boot enabled.
542  optional bool enable_secure_boot = 1 [(google.api.field_behavior) = OPTIONAL];
543
544  // Optional. Defines whether instances have the vTPM enabled.
545  optional bool enable_vtpm = 2 [(google.api.field_behavior) = OPTIONAL];
546
547  // Optional. Defines whether instances have integrity monitoring enabled.
548  optional bool enable_integrity_monitoring = 3
549      [(google.api.field_behavior) = OPTIONAL];
550}
551
552// Confidential Instance Config for clusters using [Confidential
553// VMs](https://cloud.google.com/compute/confidential-vm/docs)
554message ConfidentialInstanceConfig {
555  // Optional. Defines whether the instance should have confidential compute
556  // enabled.
557  bool enable_confidential_compute = 1 [(google.api.field_behavior) = OPTIONAL];
558}
559
560// The config settings for Compute Engine resources in
561// an instance group, such as a master or worker group.
562message InstanceGroupConfig {
563  // Controls the use of preemptible instances within the group.
564  enum Preemptibility {
565    // Preemptibility is unspecified, the system will choose the
566    // appropriate setting for each instance group.
567    PREEMPTIBILITY_UNSPECIFIED = 0;
568
569    // Instances are non-preemptible.
570    //
571    // This option is allowed for all instance groups and is the only valid
572    // value for Master and Worker instance groups.
573    NON_PREEMPTIBLE = 1;
574
575    // Instances are [preemptible]
576    // (https://cloud.google.com/compute/docs/instances/preemptible).
577    //
578    // This option is allowed only for [secondary worker]
579    // (https://cloud.google.com/dataproc/docs/concepts/compute/secondary-vms)
580    // groups.
581    PREEMPTIBLE = 2;
582
583    // Instances are [Spot VMs]
584    // (https://cloud.google.com/compute/docs/instances/spot).
585    //
586    // This option is allowed only for [secondary worker]
587    // (https://cloud.google.com/dataproc/docs/concepts/compute/secondary-vms)
588    // groups. Spot VMs are the latest version of [preemptible VMs]
589    // (https://cloud.google.com/compute/docs/instances/preemptible), and
590    // provide additional features.
591    SPOT = 3;
592  }
593
594  // Optional. The number of VM instances in the instance group.
595  // For [HA
596  // cluster](/dataproc/docs/concepts/configuring-clusters/high-availability)
597  // [master_config](#FIELDS.master_config) groups, **must be set to 3**.
598  // For standard cluster [master_config](#FIELDS.master_config) groups,
599  // **must be set to 1**.
600  int32 num_instances = 1 [(google.api.field_behavior) = OPTIONAL];
601
602  // Output only. The list of instance names. Dataproc derives the names
603  // from `cluster_name`, `num_instances`, and the instance group.
604  repeated string instance_names = 2
605      [(google.api.field_behavior) = OUTPUT_ONLY];
606
607  // Output only. List of references to Compute Engine instances.
608  repeated InstanceReference instance_references = 11
609      [(google.api.field_behavior) = OUTPUT_ONLY];
610
611  // Optional. The Compute Engine image resource used for cluster instances.
612  //
613  // The URI can represent an image or image family.
614  //
615  // Image examples:
616  //
617  // * `https://www.googleapis.com/compute/v1/projects/[project_id]/global/images/[image-id]`
618  // * `projects/[project_id]/global/images/[image-id]`
619  // * `image-id`
620  //
621  // Image family examples. Dataproc will use the most recent
622  // image from the family:
623  //
624  // * `https://www.googleapis.com/compute/v1/projects/[project_id]/global/images/family/[custom-image-family-name]`
625  // * `projects/[project_id]/global/images/family/[custom-image-family-name]`
626  //
627  // If the URI is unspecified, it will be inferred from
628  // `SoftwareConfig.image_version` or the system default.
629  string image_uri = 3 [(google.api.field_behavior) = OPTIONAL];
630
631  // Optional. The Compute Engine machine type used for cluster instances.
632  //
633  // A full URL, partial URI, or short name are valid. Examples:
634  //
635  // * `https://www.googleapis.com/compute/v1/projects/[project_id]/zones/[zone]/machineTypes/n1-standard-2`
636  // * `projects/[project_id]/zones/[zone]/machineTypes/n1-standard-2`
637  // * `n1-standard-2`
638  //
639  // **Auto Zone Exception**: If you are using the Dataproc
640  // [Auto Zone
641  // Placement](https://cloud.google.com/dataproc/docs/concepts/configuring-clusters/auto-zone#using_auto_zone_placement)
642  // feature, you must use the short name of the machine type
643  // resource, for example, `n1-standard-2`.
644  string machine_type_uri = 4 [(google.api.field_behavior) = OPTIONAL];
645
646  // Optional. Disk option config settings.
647  DiskConfig disk_config = 5 [(google.api.field_behavior) = OPTIONAL];
648
649  // Output only. Specifies that this instance group contains preemptible
650  // instances.
651  bool is_preemptible = 6 [(google.api.field_behavior) = OUTPUT_ONLY];
652
653  // Optional. Specifies the preemptibility of the instance group.
654  //
655  // The default value for master and worker groups is
656  // `NON_PREEMPTIBLE`. This default cannot be changed.
657  //
658  // The default value for secondary instances is
659  // `PREEMPTIBLE`.
660  Preemptibility preemptibility = 10 [(google.api.field_behavior) = OPTIONAL];
661
662  // Output only. The config for Compute Engine Instance Group
663  // Manager that manages this group.
664  // This is only used for preemptible instance groups.
665  ManagedGroupConfig managed_group_config = 7
666      [(google.api.field_behavior) = OUTPUT_ONLY];
667
668  // Optional. The Compute Engine accelerator configuration for these
669  // instances.
670  repeated AcceleratorConfig accelerators = 8
671      [(google.api.field_behavior) = OPTIONAL];
672
673  // Optional. Specifies the minimum cpu platform for the Instance Group.
674  // See [Dataproc -> Minimum CPU
675  // Platform](https://cloud.google.com/dataproc/docs/concepts/compute/dataproc-min-cpu).
676  string min_cpu_platform = 9 [(google.api.field_behavior) = OPTIONAL];
677
678  // Optional. The minimum number of primary worker instances to create.
679  // If `min_num_instances` is set, cluster creation will succeed if
680  // the number of primary workers created is at least equal to the
681  // `min_num_instances` number.
682  //
683  // Example: Cluster creation request with `num_instances` = `5` and
684  // `min_num_instances` = `3`:
685  //
686  // *  If 4 VMs are created and 1 instance fails,
687  //    the failed VM is deleted. The cluster is
688  //    resized to 4 instances and placed in a `RUNNING` state.
689  // *  If 2 instances are created and 3 instances fail,
690  //    the cluster in placed in an `ERROR` state. The failed VMs
691  //    are not deleted.
692  int32 min_num_instances = 12 [(google.api.field_behavior) = OPTIONAL];
693
694  // Optional. Instance flexibility Policy allowing a mixture of VM shapes and
695  // provisioning models.
696  InstanceFlexibilityPolicy instance_flexibility_policy = 13
697      [(google.api.field_behavior) = OPTIONAL];
698
699  // Optional. Configuration to handle the startup of instances during cluster
700  // create and update process.
701  StartupConfig startup_config = 14 [(google.api.field_behavior) = OPTIONAL];
702}
703
704// Configuration to handle the startup of instances during cluster create and
705// update process.
706message StartupConfig {
707  // Optional. The config setting to enable cluster creation/ updation to be
708  // successful only after required_registration_fraction of instances are up
709  // and running. This configuration is applicable to only secondary workers for
710  // now. The cluster will fail if required_registration_fraction of instances
711  // are not available. This will include instance creation, agent registration,
712  // and service registration (if enabled).
713  optional double required_registration_fraction = 1
714      [(google.api.field_behavior) = OPTIONAL];
715}
716
717// A reference to a Compute Engine instance.
718message InstanceReference {
719  // The user-friendly name of the Compute Engine instance.
720  string instance_name = 1;
721
722  // The unique identifier of the Compute Engine instance.
723  string instance_id = 2;
724
725  // The public RSA key used for sharing data with this instance.
726  string public_key = 3;
727
728  // The public ECIES key used for sharing data with this instance.
729  string public_ecies_key = 4;
730}
731
732// Specifies the resources used to actively manage an instance group.
733message ManagedGroupConfig {
734  // Output only. The name of the Instance Template used for the Managed
735  // Instance Group.
736  string instance_template_name = 1 [(google.api.field_behavior) = OUTPUT_ONLY];
737
738  // Output only. The name of the Instance Group Manager for this group.
739  string instance_group_manager_name = 2
740      [(google.api.field_behavior) = OUTPUT_ONLY];
741
742  // Output only. The partial URI to the instance group manager for this group.
743  // E.g. projects/my-project/regions/us-central1/instanceGroupManagers/my-igm.
744  string instance_group_manager_uri = 3
745      [(google.api.field_behavior) = OUTPUT_ONLY];
746}
747
748// Instance flexibility Policy allowing a mixture of VM shapes and provisioning
749// models.
750message InstanceFlexibilityPolicy {
751  // Defines machines types and a rank to which the machines types belong.
752  message InstanceSelection {
753    // Optional. Full machine-type names, e.g. "n1-standard-16".
754    repeated string machine_types = 1 [(google.api.field_behavior) = OPTIONAL];
755
756    // Optional. Preference of this instance selection. Lower number means
757    // higher preference. Dataproc will first try to create a VM based on the
758    // machine-type with priority rank and fallback to next rank based on
759    // availability. Machine types and instance selections with the same
760    // priority have the same preference.
761    int32 rank = 2 [(google.api.field_behavior) = OPTIONAL];
762  }
763
764  // Defines a mapping from machine types to the number of VMs that are created
765  // with each machine type.
766  message InstanceSelectionResult {
767    // Output only. Full machine-type names, e.g. "n1-standard-16".
768    optional string machine_type = 1
769        [(google.api.field_behavior) = OUTPUT_ONLY];
770
771    // Output only. Number of VM provisioned with the machine_type.
772    optional int32 vm_count = 2 [(google.api.field_behavior) = OUTPUT_ONLY];
773  }
774
775  // Optional. List of instance selection options that the group will use when
776  // creating new VMs.
777  repeated InstanceSelection instance_selection_list = 2
778      [(google.api.field_behavior) = OPTIONAL];
779
780  // Output only. A list of instance selection results in the group.
781  repeated InstanceSelectionResult instance_selection_results = 3
782      [(google.api.field_behavior) = OUTPUT_ONLY];
783}
784
785// Specifies the type and number of accelerator cards attached to the instances
786// of an instance. See [GPUs on Compute
787// Engine](https://cloud.google.com/compute/docs/gpus/).
788message AcceleratorConfig {
789  // Full URL, partial URI, or short name of the accelerator type resource to
790  // expose to this instance. See
791  // [Compute Engine
792  // AcceleratorTypes](https://cloud.google.com/compute/docs/reference/v1/acceleratorTypes).
793  //
794  // Examples:
795  //
796  // * `https://www.googleapis.com/compute/v1/projects/[project_id]/zones/[zone]/acceleratorTypes/nvidia-tesla-k80`
797  // * `projects/[project_id]/zones/[zone]/acceleratorTypes/nvidia-tesla-k80`
798  // * `nvidia-tesla-k80`
799  //
800  // **Auto Zone Exception**: If you are using the Dataproc
801  // [Auto Zone
802  // Placement](https://cloud.google.com/dataproc/docs/concepts/configuring-clusters/auto-zone#using_auto_zone_placement)
803  // feature, you must use the short name of the accelerator type
804  // resource, for example, `nvidia-tesla-k80`.
805  string accelerator_type_uri = 1;
806
807  // The number of the accelerator cards of this type exposed to this instance.
808  int32 accelerator_count = 2;
809}
810
811// Specifies the config of disk options for a group of VM instances.
812message DiskConfig {
813  // Optional. Type of the boot disk (default is "pd-standard").
814  // Valid values: "pd-balanced" (Persistent Disk Balanced Solid State Drive),
815  // "pd-ssd" (Persistent Disk Solid State Drive),
816  // or "pd-standard" (Persistent Disk Hard Disk Drive).
817  // See [Disk types](https://cloud.google.com/compute/docs/disks#disk-types).
818  string boot_disk_type = 3 [(google.api.field_behavior) = OPTIONAL];
819
820  // Optional. Size in GB of the boot disk (default is 500GB).
821  int32 boot_disk_size_gb = 1 [(google.api.field_behavior) = OPTIONAL];
822
823  // Optional. Number of attached SSDs, from 0 to 8 (default is 0).
824  // If SSDs are not attached, the boot disk is used to store runtime logs and
825  // [HDFS](https://hadoop.apache.org/docs/r1.2.1/hdfs_user_guide.html) data.
826  // If one or more SSDs are attached, this runtime bulk
827  // data is spread across them, and the boot disk contains only basic
828  // config and installed binaries.
829  //
830  // Note: Local SSD options may vary by machine type and number of vCPUs
831  // selected.
832  int32 num_local_ssds = 2 [(google.api.field_behavior) = OPTIONAL];
833
834  // Optional. Interface type of local SSDs (default is "scsi").
835  // Valid values: "scsi" (Small Computer System Interface),
836  // "nvme" (Non-Volatile Memory Express).
837  // See [local SSD
838  // performance](https://cloud.google.com/compute/docs/disks/local-ssd#performance).
839  string local_ssd_interface = 4 [(google.api.field_behavior) = OPTIONAL];
840}
841
842// Node group identification and configuration information.
843message AuxiliaryNodeGroup {
844  // Required. Node group configuration.
845  NodeGroup node_group = 1 [(google.api.field_behavior) = REQUIRED];
846
847  // Optional. A node group ID. Generated if not specified.
848  //
849  // The ID must contain only letters (a-z, A-Z), numbers (0-9),
850  // underscores (_), and hyphens (-). Cannot begin or end with underscore
851  // or hyphen. Must consist of from 3 to 33 characters.
852  string node_group_id = 2 [(google.api.field_behavior) = OPTIONAL];
853}
854
855// Dataproc Node Group.
856// **The Dataproc `NodeGroup` resource is not related to the
857// Dataproc [NodeGroupAffinity][google.cloud.dataproc.v1.NodeGroupAffinity]
858// resource.**
859message NodeGroup {
860  option (google.api.resource) = {
861    type: "dataproc.googleapis.com/NodeGroup"
862    pattern: "projects/{project}/regions/{region}/clusters/{cluster}/nodeGroups/{node_group}"
863  };
864
865  // Node pool roles.
866  enum Role {
867    // Required unspecified role.
868    ROLE_UNSPECIFIED = 0;
869
870    // Job drivers run on the node pool.
871    DRIVER = 1;
872  }
873
874  // The Node group [resource name](https://aip.dev/122).
875  string name = 1;
876
877  // Required. Node group roles.
878  repeated Role roles = 2 [(google.api.field_behavior) = REQUIRED];
879
880  // Optional. The node group instance group configuration.
881  InstanceGroupConfig node_group_config = 3
882      [(google.api.field_behavior) = OPTIONAL];
883
884  // Optional. Node group labels.
885  //
886  // * Label **keys** must consist of from 1 to 63 characters and conform to
887  //   [RFC 1035](https://www.ietf.org/rfc/rfc1035.txt).
888  // * Label **values** can be empty. If specified, they must consist of from
889  //   1 to 63 characters and conform to [RFC 1035]
890  //   (https://www.ietf.org/rfc/rfc1035.txt).
891  // * The node group must have no more than 32 labels.
892  map<string, string> labels = 4 [(google.api.field_behavior) = OPTIONAL];
893}
894
895// Specifies an executable to run on a fully configured node and a
896// timeout period for executable completion.
897message NodeInitializationAction {
898  // Required. Cloud Storage URI of executable file.
899  string executable_file = 1 [(google.api.field_behavior) = REQUIRED];
900
901  // Optional. Amount of time executable has to complete. Default is
902  // 10 minutes (see JSON representation of
903  // [Duration](https://developers.google.com/protocol-buffers/docs/proto3#json)).
904  //
905  // Cluster creation fails with an explanatory error message (the
906  // name of the executable that caused the error and the exceeded timeout
907  // period) if the executable is not completed at end of the timeout period.
908  google.protobuf.Duration execution_timeout = 2
909      [(google.api.field_behavior) = OPTIONAL];
910}
911
912// The status of a cluster and its instances.
913message ClusterStatus {
914  // The cluster state.
915  enum State {
916    // The cluster state is unknown.
917    UNKNOWN = 0;
918
919    // The cluster is being created and set up. It is not ready for use.
920    CREATING = 1;
921
922    // The cluster is currently running and healthy. It is ready for use.
923    //
924    // **Note:** The cluster state changes from "creating" to "running" status
925    // after the master node(s), first two primary worker nodes (and the last
926    // primary worker node if primary workers > 2) are running.
927    RUNNING = 2;
928
929    // The cluster encountered an error. It is not ready for use.
930    ERROR = 3;
931
932    // The cluster has encountered an error while being updated. Jobs can
933    // be submitted to the cluster, but the cluster cannot be updated.
934    ERROR_DUE_TO_UPDATE = 9;
935
936    // The cluster is being deleted. It cannot be used.
937    DELETING = 4;
938
939    // The cluster is being updated. It continues to accept and process jobs.
940    UPDATING = 5;
941
942    // The cluster is being stopped. It cannot be used.
943    STOPPING = 6;
944
945    // The cluster is currently stopped. It is not ready for use.
946    STOPPED = 7;
947
948    // The cluster is being started. It is not ready for use.
949    STARTING = 8;
950
951    // The cluster is being repaired. It is not ready for use.
952    REPAIRING = 10;
953  }
954
955  // The cluster substate.
956  enum Substate {
957    // The cluster substate is unknown.
958    UNSPECIFIED = 0;
959
960    // The cluster is known to be in an unhealthy state
961    // (for example, critical daemons are not running or HDFS capacity is
962    // exhausted).
963    //
964    // Applies to RUNNING state.
965    UNHEALTHY = 1;
966
967    // The agent-reported status is out of date (may occur if
968    // Dataproc loses communication with Agent).
969    //
970    // Applies to RUNNING state.
971    STALE_STATUS = 2;
972  }
973
974  // Output only. The cluster's state.
975  State state = 1 [(google.api.field_behavior) = OUTPUT_ONLY];
976
977  // Optional. Output only. Details of cluster's state.
978  string detail = 2 [
979    (google.api.field_behavior) = OUTPUT_ONLY,
980    (google.api.field_behavior) = OPTIONAL
981  ];
982
983  // Output only. Time when this state was entered (see JSON representation of
984  // [Timestamp](https://developers.google.com/protocol-buffers/docs/proto3#json)).
985  google.protobuf.Timestamp state_start_time = 3
986      [(google.api.field_behavior) = OUTPUT_ONLY];
987
988  // Output only. Additional state information that includes
989  // status reported by the agent.
990  Substate substate = 4 [(google.api.field_behavior) = OUTPUT_ONLY];
991}
992
993// Security related configuration, including encryption, Kerberos, etc.
994message SecurityConfig {
995  // Optional. Kerberos related configuration.
996  KerberosConfig kerberos_config = 1 [(google.api.field_behavior) = OPTIONAL];
997
998  // Optional. Identity related configuration, including service account based
999  // secure multi-tenancy user mappings.
1000  IdentityConfig identity_config = 2 [(google.api.field_behavior) = OPTIONAL];
1001}
1002
1003// Specifies Kerberos related configuration.
1004message KerberosConfig {
1005  // Optional. Flag to indicate whether to Kerberize the cluster (default:
1006  // false). Set this field to true to enable Kerberos on a cluster.
1007  bool enable_kerberos = 1 [(google.api.field_behavior) = OPTIONAL];
1008
1009  // Optional. The Cloud Storage URI of a KMS encrypted file containing the root
1010  // principal password.
1011  string root_principal_password_uri = 2
1012      [(google.api.field_behavior) = OPTIONAL];
1013
1014  // Optional. The uri of the KMS key used to encrypt various sensitive
1015  // files.
1016  string kms_key_uri = 3 [(google.api.field_behavior) = OPTIONAL];
1017
1018  // Optional. The Cloud Storage URI of the keystore file used for SSL
1019  // encryption. If not provided, Dataproc will provide a self-signed
1020  // certificate.
1021  string keystore_uri = 4 [(google.api.field_behavior) = OPTIONAL];
1022
1023  // Optional. The Cloud Storage URI of the truststore file used for SSL
1024  // encryption. If not provided, Dataproc will provide a self-signed
1025  // certificate.
1026  string truststore_uri = 5 [(google.api.field_behavior) = OPTIONAL];
1027
1028  // Optional. The Cloud Storage URI of a KMS encrypted file containing the
1029  // password to the user provided keystore. For the self-signed certificate,
1030  // this password is generated by Dataproc.
1031  string keystore_password_uri = 6 [(google.api.field_behavior) = OPTIONAL];
1032
1033  // Optional. The Cloud Storage URI of a KMS encrypted file containing the
1034  // password to the user provided key. For the self-signed certificate, this
1035  // password is generated by Dataproc.
1036  string key_password_uri = 7 [(google.api.field_behavior) = OPTIONAL];
1037
1038  // Optional. The Cloud Storage URI of a KMS encrypted file containing the
1039  // password to the user provided truststore. For the self-signed certificate,
1040  // this password is generated by Dataproc.
1041  string truststore_password_uri = 8 [(google.api.field_behavior) = OPTIONAL];
1042
1043  // Optional. The remote realm the Dataproc on-cluster KDC will trust, should
1044  // the user enable cross realm trust.
1045  string cross_realm_trust_realm = 9 [(google.api.field_behavior) = OPTIONAL];
1046
1047  // Optional. The KDC (IP or hostname) for the remote trusted realm in a cross
1048  // realm trust relationship.
1049  string cross_realm_trust_kdc = 10 [(google.api.field_behavior) = OPTIONAL];
1050
1051  // Optional. The admin server (IP or hostname) for the remote trusted realm in
1052  // a cross realm trust relationship.
1053  string cross_realm_trust_admin_server = 11
1054      [(google.api.field_behavior) = OPTIONAL];
1055
1056  // Optional. The Cloud Storage URI of a KMS encrypted file containing the
1057  // shared password between the on-cluster Kerberos realm and the remote
1058  // trusted realm, in a cross realm trust relationship.
1059  string cross_realm_trust_shared_password_uri = 12
1060      [(google.api.field_behavior) = OPTIONAL];
1061
1062  // Optional. The Cloud Storage URI of a KMS encrypted file containing the
1063  // master key of the KDC database.
1064  string kdc_db_key_uri = 13 [(google.api.field_behavior) = OPTIONAL];
1065
1066  // Optional. The lifetime of the ticket granting ticket, in hours.
1067  // If not specified, or user specifies 0, then default value 10
1068  // will be used.
1069  int32 tgt_lifetime_hours = 14 [(google.api.field_behavior) = OPTIONAL];
1070
1071  // Optional. The name of the on-cluster Kerberos realm.
1072  // If not specified, the uppercased domain of hostnames will be the realm.
1073  string realm = 15 [(google.api.field_behavior) = OPTIONAL];
1074}
1075
1076// Identity related configuration, including service account based
1077// secure multi-tenancy user mappings.
1078message IdentityConfig {
1079  // Required. Map of user to service account.
1080  map<string, string> user_service_account_mapping = 1
1081      [(google.api.field_behavior) = REQUIRED];
1082}
1083
1084// Specifies the selection and config of software inside the cluster.
1085message SoftwareConfig {
1086  // Optional. The version of software inside the cluster. It must be one of the
1087  // supported [Dataproc
1088  // Versions](https://cloud.google.com/dataproc/docs/concepts/versioning/dataproc-versions#supported_dataproc_versions),
1089  // such as "1.2" (including a subminor version, such as "1.2.29"), or the
1090  // ["preview"
1091  // version](https://cloud.google.com/dataproc/docs/concepts/versioning/dataproc-versions#other_versions).
1092  // If unspecified, it defaults to the latest Debian version.
1093  string image_version = 1 [(google.api.field_behavior) = OPTIONAL];
1094
1095  // Optional. The properties to set on daemon config files.
1096  //
1097  // Property keys are specified in `prefix:property` format, for example
1098  // `core:hadoop.tmp.dir`. The following are supported prefixes
1099  // and their mappings:
1100  //
1101  // * capacity-scheduler: `capacity-scheduler.xml`
1102  // * core:   `core-site.xml`
1103  // * distcp: `distcp-default.xml`
1104  // * hdfs:   `hdfs-site.xml`
1105  // * hive:   `hive-site.xml`
1106  // * mapred: `mapred-site.xml`
1107  // * pig:    `pig.properties`
1108  // * spark:  `spark-defaults.conf`
1109  // * yarn:   `yarn-site.xml`
1110  //
1111  // For more information, see [Cluster
1112  // properties](https://cloud.google.com/dataproc/docs/concepts/cluster-properties).
1113  map<string, string> properties = 2 [(google.api.field_behavior) = OPTIONAL];
1114
1115  // Optional. The set of components to activate on the cluster.
1116  repeated Component optional_components = 3
1117      [(google.api.field_behavior) = OPTIONAL];
1118}
1119
1120// Specifies the cluster auto-delete schedule configuration.
1121message LifecycleConfig {
1122  // Optional. The duration to keep the cluster alive while idling (when no jobs
1123  // are running). Passing this threshold will cause the cluster to be
1124  // deleted. Minimum value is 5 minutes; maximum value is 14 days (see JSON
1125  // representation of
1126  // [Duration](https://developers.google.com/protocol-buffers/docs/proto3#json)).
1127  google.protobuf.Duration idle_delete_ttl = 1
1128      [(google.api.field_behavior) = OPTIONAL];
1129
1130  // Either the exact time the cluster should be deleted at or
1131  // the cluster maximum age.
1132  oneof ttl {
1133    // Optional. The time when cluster will be auto-deleted (see JSON
1134    // representation of
1135    // [Timestamp](https://developers.google.com/protocol-buffers/docs/proto3#json)).
1136    google.protobuf.Timestamp auto_delete_time = 2
1137        [(google.api.field_behavior) = OPTIONAL];
1138
1139    // Optional. The lifetime duration of cluster. The cluster will be
1140    // auto-deleted at the end of this period. Minimum value is 10 minutes;
1141    // maximum value is 14 days (see JSON representation of
1142    // [Duration](https://developers.google.com/protocol-buffers/docs/proto3#json)).
1143    google.protobuf.Duration auto_delete_ttl = 3
1144        [(google.api.field_behavior) = OPTIONAL];
1145  }
1146
1147  // Output only. The time when cluster became idle (most recent job finished)
1148  // and became eligible for deletion due to idleness (see JSON representation
1149  // of
1150  // [Timestamp](https://developers.google.com/protocol-buffers/docs/proto3#json)).
1151  google.protobuf.Timestamp idle_start_time = 4
1152      [(google.api.field_behavior) = OUTPUT_ONLY];
1153}
1154
1155// Specifies a Metastore configuration.
1156message MetastoreConfig {
1157  // Required. Resource name of an existing Dataproc Metastore service.
1158  //
1159  // Example:
1160  //
1161  // * `projects/[project_id]/locations/[dataproc_region]/services/[service-name]`
1162  string dataproc_metastore_service = 1 [
1163    (google.api.field_behavior) = REQUIRED,
1164    (google.api.resource_reference) = {
1165      type: "metastore.googleapis.com/Service"
1166    }
1167  ];
1168}
1169
1170// Contains cluster daemon metrics, such as HDFS and YARN stats.
1171//
1172// **Beta Feature**: This report is available for testing purposes only. It may
1173// be changed before final release.
1174message ClusterMetrics {
1175  // The HDFS metrics.
1176  map<string, int64> hdfs_metrics = 1;
1177
1178  // YARN metrics.
1179  map<string, int64> yarn_metrics = 2;
1180}
1181
1182// Dataproc metric config.
1183message DataprocMetricConfig {
1184  // A source for the collection of Dataproc custom metrics (see [Custom
1185  // metrics]
1186  // (https://cloud.google.com//dataproc/docs/guides/dataproc-metrics#custom_metrics)).
1187  enum MetricSource {
1188    // Required unspecified metric source.
1189    METRIC_SOURCE_UNSPECIFIED = 0;
1190
1191    // Monitoring agent metrics. If this source is enabled,
1192    // Dataproc enables the monitoring agent in Compute Engine,
1193    // and collects monitoring agent metrics, which are published
1194    // with an `agent.googleapis.com` prefix.
1195    MONITORING_AGENT_DEFAULTS = 1;
1196
1197    // HDFS metric source.
1198    HDFS = 2;
1199
1200    // Spark metric source.
1201    SPARK = 3;
1202
1203    // YARN metric source.
1204    YARN = 4;
1205
1206    // Spark History Server metric source.
1207    SPARK_HISTORY_SERVER = 5;
1208
1209    // Hiveserver2 metric source.
1210    HIVESERVER2 = 6;
1211
1212    // hivemetastore metric source
1213    HIVEMETASTORE = 7;
1214  }
1215
1216  // A Dataproc custom metric.
1217  message Metric {
1218    // Required. A standard set of metrics is collected unless `metricOverrides`
1219    // are specified for the metric source (see [Custom metrics]
1220    // (https://cloud.google.com/dataproc/docs/guides/dataproc-metrics#custom_metrics)
1221    // for more information).
1222    MetricSource metric_source = 1 [(google.api.field_behavior) = REQUIRED];
1223
1224    // Optional. Specify one or more [Custom metrics]
1225    // (https://cloud.google.com/dataproc/docs/guides/dataproc-metrics#custom_metrics)
1226    // to collect for the metric course (for the `SPARK` metric source (any
1227    // [Spark metric]
1228    // (https://spark.apache.org/docs/latest/monitoring.html#metrics) can be
1229    // specified).
1230    //
1231    // Provide metrics in the following format:
1232    // <code><var>METRIC_SOURCE</var>:<var>INSTANCE</var>:<var>GROUP</var>:<var>METRIC</var></code>
1233    // Use camelcase as appropriate.
1234    //
1235    // Examples:
1236    //
1237    // ```
1238    // yarn:ResourceManager:QueueMetrics:AppsCompleted
1239    // spark:driver:DAGScheduler:job.allJobs
1240    // sparkHistoryServer:JVM:Memory:NonHeapMemoryUsage.committed
1241    // hiveserver2:JVM:Memory:NonHeapMemoryUsage.used
1242    // ```
1243    //
1244    // Notes:
1245    //
1246    // * Only the specified overridden metrics are collected for the
1247    //   metric source. For example, if one or more `spark:executive` metrics
1248    //   are listed as metric overrides, other `SPARK` metrics are not
1249    //   collected. The collection of the metrics for other enabled custom
1250    //   metric sources is unaffected. For example, if both `SPARK` andd `YARN`
1251    //   metric sources are enabled, and overrides are provided for Spark
1252    //   metrics only, all YARN metrics are collected.
1253    repeated string metric_overrides = 2
1254        [(google.api.field_behavior) = OPTIONAL];
1255  }
1256
1257  // Required. Metrics sources to enable.
1258  repeated Metric metrics = 1 [(google.api.field_behavior) = REQUIRED];
1259}
1260
1261// A request to create a cluster.
1262message CreateClusterRequest {
1263  // Required. The ID of the Google Cloud Platform project that the cluster
1264  // belongs to.
1265  string project_id = 1 [(google.api.field_behavior) = REQUIRED];
1266
1267  // Required. The Dataproc region in which to handle the request.
1268  string region = 3 [(google.api.field_behavior) = REQUIRED];
1269
1270  // Required. The cluster to create.
1271  Cluster cluster = 2 [(google.api.field_behavior) = REQUIRED];
1272
1273  // Optional. A unique ID used to identify the request. If the server receives
1274  // two
1275  // [CreateClusterRequest](https://cloud.google.com/dataproc/docs/reference/rpc/google.cloud.dataproc.v1#google.cloud.dataproc.v1.CreateClusterRequest)s
1276  // with the same id, then the second request will be ignored and the
1277  // first [google.longrunning.Operation][google.longrunning.Operation] created
1278  // and stored in the backend is returned.
1279  //
1280  // It is recommended to always set this value to a
1281  // [UUID](https://en.wikipedia.org/wiki/Universally_unique_identifier).
1282  //
1283  // The ID must contain only letters (a-z, A-Z), numbers (0-9),
1284  // underscores (_), and hyphens (-). The maximum length is 40 characters.
1285  string request_id = 4 [(google.api.field_behavior) = OPTIONAL];
1286
1287  // Optional. Failure action when primary worker creation fails.
1288  FailureAction action_on_failed_primary_workers = 5
1289      [(google.api.field_behavior) = OPTIONAL];
1290}
1291
1292// A request to update a cluster.
1293message UpdateClusterRequest {
1294  // Required. The ID of the Google Cloud Platform project the
1295  // cluster belongs to.
1296  string project_id = 1 [(google.api.field_behavior) = REQUIRED];
1297
1298  // Required. The Dataproc region in which to handle the request.
1299  string region = 5 [(google.api.field_behavior) = REQUIRED];
1300
1301  // Required. The cluster name.
1302  string cluster_name = 2 [(google.api.field_behavior) = REQUIRED];
1303
1304  // Required. The changes to the cluster.
1305  Cluster cluster = 3 [(google.api.field_behavior) = REQUIRED];
1306
1307  // Optional. Timeout for graceful YARN decommissioning. Graceful
1308  // decommissioning allows removing nodes from the cluster without
1309  // interrupting jobs in progress. Timeout specifies how long to wait for jobs
1310  // in progress to finish before forcefully removing nodes (and potentially
1311  // interrupting jobs). Default timeout is 0 (for forceful decommission), and
1312  // the maximum allowed timeout is 1 day. (see JSON representation of
1313  // [Duration](https://developers.google.com/protocol-buffers/docs/proto3#json)).
1314  //
1315  // Only supported on Dataproc image versions 1.2 and higher.
1316  google.protobuf.Duration graceful_decommission_timeout = 6
1317      [(google.api.field_behavior) = OPTIONAL];
1318
1319  // Required. Specifies the path, relative to `Cluster`, of
1320  // the field to update. For example, to change the number of workers
1321  // in a cluster to 5, the `update_mask` parameter would be
1322  // specified as `config.worker_config.num_instances`,
1323  // and the `PATCH` request body would specify the new value, as follows:
1324  //
1325  //     {
1326  //       "config":{
1327  //         "workerConfig":{
1328  //           "numInstances":"5"
1329  //         }
1330  //       }
1331  //     }
1332  // Similarly, to change the number of preemptible workers in a cluster to 5,
1333  // the `update_mask` parameter would be
1334  // `config.secondary_worker_config.num_instances`, and the `PATCH` request
1335  // body would be set as follows:
1336  //
1337  //     {
1338  //       "config":{
1339  //         "secondaryWorkerConfig":{
1340  //           "numInstances":"5"
1341  //         }
1342  //       }
1343  //     }
1344  // <strong>Note:</strong> Currently, only the following fields can be updated:
1345  //
1346  //  <table>
1347  //  <tbody>
1348  //  <tr>
1349  //  <td><strong>Mask</strong></td>
1350  //  <td><strong>Purpose</strong></td>
1351  //  </tr>
1352  //  <tr>
1353  //  <td><strong><em>labels</em></strong></td>
1354  //  <td>Update labels</td>
1355  //  </tr>
1356  //  <tr>
1357  //  <td><strong><em>config.worker_config.num_instances</em></strong></td>
1358  //  <td>Resize primary worker group</td>
1359  //  </tr>
1360  //  <tr>
1361  //  <td><strong><em>config.secondary_worker_config.num_instances</em></strong></td>
1362  //  <td>Resize secondary worker group</td>
1363  //  </tr>
1364  //  <tr>
1365  //  <td>config.autoscaling_config.policy_uri</td><td>Use, stop using, or
1366  //  change autoscaling policies</td>
1367  //  </tr>
1368  //  </tbody>
1369  //  </table>
1370  google.protobuf.FieldMask update_mask = 4
1371      [(google.api.field_behavior) = REQUIRED];
1372
1373  // Optional. A unique ID used to identify the request. If the server
1374  // receives two
1375  // [UpdateClusterRequest](https://cloud.google.com/dataproc/docs/reference/rpc/google.cloud.dataproc.v1#google.cloud.dataproc.v1.UpdateClusterRequest)s
1376  // with the same id, then the second request will be ignored and the
1377  // first [google.longrunning.Operation][google.longrunning.Operation] created
1378  // and stored in the backend is returned.
1379  //
1380  // It is recommended to always set this value to a
1381  // [UUID](https://en.wikipedia.org/wiki/Universally_unique_identifier).
1382  //
1383  // The ID must contain only letters (a-z, A-Z), numbers (0-9),
1384  // underscores (_), and hyphens (-). The maximum length is 40 characters.
1385  string request_id = 7 [(google.api.field_behavior) = OPTIONAL];
1386}
1387
1388// A request to stop a cluster.
1389message StopClusterRequest {
1390  // Required. The ID of the Google Cloud Platform project the
1391  // cluster belongs to.
1392  string project_id = 1 [(google.api.field_behavior) = REQUIRED];
1393
1394  // Required. The Dataproc region in which to handle the request.
1395  string region = 2 [(google.api.field_behavior) = REQUIRED];
1396
1397  // Required. The cluster name.
1398  string cluster_name = 3 [(google.api.field_behavior) = REQUIRED];
1399
1400  // Optional. Specifying the `cluster_uuid` means the RPC will fail
1401  // (with error NOT_FOUND) if a cluster with the specified UUID does not exist.
1402  string cluster_uuid = 4 [(google.api.field_behavior) = OPTIONAL];
1403
1404  // Optional. A unique ID used to identify the request. If the server
1405  // receives two
1406  // [StopClusterRequest](https://cloud.google.com/dataproc/docs/reference/rpc/google.cloud.dataproc.v1#google.cloud.dataproc.v1.StopClusterRequest)s
1407  // with the same id, then the second request will be ignored and the
1408  // first [google.longrunning.Operation][google.longrunning.Operation] created
1409  // and stored in the backend is returned.
1410  //
1411  // Recommendation: Set this value to a
1412  // [UUID](https://en.wikipedia.org/wiki/Universally_unique_identifier).
1413  //
1414  // The ID must contain only letters (a-z, A-Z), numbers (0-9),
1415  // underscores (_), and hyphens (-). The maximum length is 40 characters.
1416  string request_id = 5 [(google.api.field_behavior) = OPTIONAL];
1417}
1418
1419// A request to start a cluster.
1420message StartClusterRequest {
1421  // Required. The ID of the Google Cloud Platform project the
1422  // cluster belongs to.
1423  string project_id = 1 [(google.api.field_behavior) = REQUIRED];
1424
1425  // Required. The Dataproc region in which to handle the request.
1426  string region = 2 [(google.api.field_behavior) = REQUIRED];
1427
1428  // Required. The cluster name.
1429  string cluster_name = 3 [(google.api.field_behavior) = REQUIRED];
1430
1431  // Optional. Specifying the `cluster_uuid` means the RPC will fail
1432  // (with error NOT_FOUND) if a cluster with the specified UUID does not exist.
1433  string cluster_uuid = 4 [(google.api.field_behavior) = OPTIONAL];
1434
1435  // Optional. A unique ID used to identify the request. If the server
1436  // receives two
1437  // [StartClusterRequest](https://cloud.google.com/dataproc/docs/reference/rpc/google.cloud.dataproc.v1#google.cloud.dataproc.v1.StartClusterRequest)s
1438  // with the same id, then the second request will be ignored and the
1439  // first [google.longrunning.Operation][google.longrunning.Operation] created
1440  // and stored in the backend is returned.
1441  //
1442  // Recommendation: Set this value to a
1443  // [UUID](https://en.wikipedia.org/wiki/Universally_unique_identifier).
1444  //
1445  // The ID must contain only letters (a-z, A-Z), numbers (0-9),
1446  // underscores (_), and hyphens (-). The maximum length is 40 characters.
1447  string request_id = 5 [(google.api.field_behavior) = OPTIONAL];
1448}
1449
1450// A request to delete a cluster.
1451message DeleteClusterRequest {
1452  // Required. The ID of the Google Cloud Platform project that the cluster
1453  // belongs to.
1454  string project_id = 1 [(google.api.field_behavior) = REQUIRED];
1455
1456  // Required. The Dataproc region in which to handle the request.
1457  string region = 3 [(google.api.field_behavior) = REQUIRED];
1458
1459  // Required. The cluster name.
1460  string cluster_name = 2 [(google.api.field_behavior) = REQUIRED];
1461
1462  // Optional. Specifying the `cluster_uuid` means the RPC should fail
1463  // (with error NOT_FOUND) if cluster with specified UUID does not exist.
1464  string cluster_uuid = 4 [(google.api.field_behavior) = OPTIONAL];
1465
1466  // Optional. A unique ID used to identify the request. If the server
1467  // receives two
1468  // [DeleteClusterRequest](https://cloud.google.com/dataproc/docs/reference/rpc/google.cloud.dataproc.v1#google.cloud.dataproc.v1.DeleteClusterRequest)s
1469  // with the same id, then the second request will be ignored and the
1470  // first [google.longrunning.Operation][google.longrunning.Operation] created
1471  // and stored in the backend is returned.
1472  //
1473  // It is recommended to always set this value to a
1474  // [UUID](https://en.wikipedia.org/wiki/Universally_unique_identifier).
1475  //
1476  // The ID must contain only letters (a-z, A-Z), numbers (0-9),
1477  // underscores (_), and hyphens (-). The maximum length is 40 characters.
1478  string request_id = 5 [(google.api.field_behavior) = OPTIONAL];
1479}
1480
1481// Request to get the resource representation for a cluster in a project.
1482message GetClusterRequest {
1483  // Required. The ID of the Google Cloud Platform project that the cluster
1484  // belongs to.
1485  string project_id = 1 [(google.api.field_behavior) = REQUIRED];
1486
1487  // Required. The Dataproc region in which to handle the request.
1488  string region = 3 [(google.api.field_behavior) = REQUIRED];
1489
1490  // Required. The cluster name.
1491  string cluster_name = 2 [(google.api.field_behavior) = REQUIRED];
1492}
1493
1494// A request to list the clusters in a project.
1495message ListClustersRequest {
1496  // Required. The ID of the Google Cloud Platform project that the cluster
1497  // belongs to.
1498  string project_id = 1 [(google.api.field_behavior) = REQUIRED];
1499
1500  // Required. The Dataproc region in which to handle the request.
1501  string region = 4 [(google.api.field_behavior) = REQUIRED];
1502
1503  // Optional. A filter constraining the clusters to list. Filters are
1504  // case-sensitive and have the following syntax:
1505  //
1506  // field = value [AND [field = value]] ...
1507  //
1508  // where **field** is one of `status.state`, `clusterName`, or `labels.[KEY]`,
1509  // and `[KEY]` is a label key. **value** can be `*` to match all values.
1510  // `status.state` can be one of the following: `ACTIVE`, `INACTIVE`,
1511  // `CREATING`, `RUNNING`, `ERROR`, `DELETING`, or `UPDATING`. `ACTIVE`
1512  // contains the `CREATING`, `UPDATING`, and `RUNNING` states. `INACTIVE`
1513  // contains the `DELETING` and `ERROR` states.
1514  // `clusterName` is the name of the cluster provided at creation time.
1515  // Only the logical `AND` operator is supported; space-separated items are
1516  // treated as having an implicit `AND` operator.
1517  //
1518  // Example filter:
1519  //
1520  // status.state = ACTIVE AND clusterName = mycluster
1521  // AND labels.env = staging AND labels.starred = *
1522  string filter = 5 [(google.api.field_behavior) = OPTIONAL];
1523
1524  // Optional. The standard List page size.
1525  int32 page_size = 2 [(google.api.field_behavior) = OPTIONAL];
1526
1527  // Optional. The standard List page token.
1528  string page_token = 3 [(google.api.field_behavior) = OPTIONAL];
1529}
1530
1531// The list of all clusters in a project.
1532message ListClustersResponse {
1533  // Output only. The clusters in the project.
1534  repeated Cluster clusters = 1 [(google.api.field_behavior) = OUTPUT_ONLY];
1535
1536  // Output only. This token is included in the response if there are more
1537  // results to fetch. To fetch additional results, provide this value as the
1538  // `page_token` in a subsequent `ListClustersRequest`.
1539  string next_page_token = 2 [(google.api.field_behavior) = OUTPUT_ONLY];
1540}
1541
1542// A request to collect cluster diagnostic information.
1543message DiagnoseClusterRequest {
1544  // Required. The ID of the Google Cloud Platform project that the cluster
1545  // belongs to.
1546  string project_id = 1 [(google.api.field_behavior) = REQUIRED];
1547
1548  // Required. The Dataproc region in which to handle the request.
1549  string region = 3 [(google.api.field_behavior) = REQUIRED];
1550
1551  // Required. The cluster name.
1552  string cluster_name = 2 [(google.api.field_behavior) = REQUIRED];
1553
1554  // Optional. The output Cloud Storage directory for the diagnostic
1555  // tarball. If not specified, a task-specific directory in the cluster's
1556  // staging bucket will be used.
1557  string tarball_gcs_dir = 4 [(google.api.field_behavior) = OPTIONAL];
1558
1559  // Optional. Time interval in which diagnosis should be carried out on the
1560  // cluster.
1561  google.type.Interval diagnosis_interval = 6
1562      [(google.api.field_behavior) = OPTIONAL];
1563
1564  // Optional. Specifies a list of jobs on which diagnosis is to be performed.
1565  // Format: projects/{project}/regions/{region}/jobs/{job}
1566  repeated string jobs = 10 [(google.api.field_behavior) = OPTIONAL];
1567
1568  // Optional. Specifies a list of yarn applications on which diagnosis is to be
1569  // performed.
1570  repeated string yarn_application_ids = 11
1571      [(google.api.field_behavior) = OPTIONAL];
1572}
1573
1574// The location of diagnostic output.
1575message DiagnoseClusterResults {
1576  // Output only. The Cloud Storage URI of the diagnostic output.
1577  // The output report is a plain text file with a summary of collected
1578  // diagnostics.
1579  string output_uri = 1 [(google.api.field_behavior) = OUTPUT_ONLY];
1580}
1581
1582// Reservation Affinity for consuming Zonal reservation.
1583message ReservationAffinity {
1584  // Indicates whether to consume capacity from an reservation or not.
1585  enum Type {
1586    TYPE_UNSPECIFIED = 0;
1587
1588    // Do not consume from any allocated capacity.
1589    NO_RESERVATION = 1;
1590
1591    // Consume any reservation available.
1592    ANY_RESERVATION = 2;
1593
1594    // Must consume from a specific reservation. Must specify key value fields
1595    // for specifying the reservations.
1596    SPECIFIC_RESERVATION = 3;
1597  }
1598
1599  // Optional. Type of reservation to consume
1600  Type consume_reservation_type = 1 [(google.api.field_behavior) = OPTIONAL];
1601
1602  // Optional. Corresponds to the label key of reservation resource.
1603  string key = 2 [(google.api.field_behavior) = OPTIONAL];
1604
1605  // Optional. Corresponds to the label values of reservation resource.
1606  repeated string values = 3 [(google.api.field_behavior) = OPTIONAL];
1607}
1608