1// Copyright 2022 Google LLC
2//
3// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at
6//
7//     http://www.apache.org/licenses/LICENSE-2.0
8//
9// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS,
11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12// See the License for the specific language governing permissions and
13// limitations under the License.
14
15syntax = "proto3";
16
17package google.cloud.dataproc.v1;
18
19import "google/api/annotations.proto";
20import "google/api/client.proto";
21import "google/api/field_behavior.proto";
22import "google/api/resource.proto";
23import "google/cloud/dataproc/v1/shared.proto";
24import "google/longrunning/operations.proto";
25import "google/protobuf/duration.proto";
26import "google/protobuf/field_mask.proto";
27import "google/protobuf/timestamp.proto";
28
29option go_package = "cloud.google.com/go/dataproc/v2/apiv1/dataprocpb;dataprocpb";
30option java_multiple_files = true;
31option java_outer_classname = "ClustersProto";
32option java_package = "com.google.cloud.dataproc.v1";
33
34// The ClusterControllerService provides methods to manage clusters
35// of Compute Engine instances.
36service ClusterController {
37  option (google.api.default_host) = "dataproc.googleapis.com";
38  option (google.api.oauth_scopes) =
39      "https://www.googleapis.com/auth/cloud-platform";
40
41  // Creates a cluster in a project. The returned
42  // [Operation.metadata][google.longrunning.Operation.metadata] will be
43  // [ClusterOperationMetadata](https://cloud.google.com/dataproc/docs/reference/rpc/google.cloud.dataproc.v1#clusteroperationmetadata).
44  rpc CreateCluster(CreateClusterRequest)
45      returns (google.longrunning.Operation) {
46    option (google.api.http) = {
47      post: "/v1/projects/{project_id}/regions/{region}/clusters"
48      body: "cluster"
49    };
50    option (google.api.method_signature) = "project_id,region,cluster";
51    option (google.longrunning.operation_info) = {
52      response_type: "Cluster"
53      metadata_type: "google.cloud.dataproc.v1.ClusterOperationMetadata"
54    };
55  }
56
57  // Updates a cluster in a project. The returned
58  // [Operation.metadata][google.longrunning.Operation.metadata] will be
59  // [ClusterOperationMetadata](https://cloud.google.com/dataproc/docs/reference/rpc/google.cloud.dataproc.v1#clusteroperationmetadata).
60  // The cluster must be in a
61  // [`RUNNING`][google.cloud.dataproc.v1.ClusterStatus.State] state or an error
62  // is returned.
63  rpc UpdateCluster(UpdateClusterRequest)
64      returns (google.longrunning.Operation) {
65    option (google.api.http) = {
66      patch: "/v1/projects/{project_id}/regions/{region}/clusters/{cluster_name}"
67      body: "cluster"
68    };
69    option (google.api.method_signature) =
70        "project_id,region,cluster_name,cluster,update_mask";
71    option (google.longrunning.operation_info) = {
72      response_type: "Cluster"
73      metadata_type: "google.cloud.dataproc.v1.ClusterOperationMetadata"
74    };
75  }
76
77  // Stops a cluster in a project.
78  rpc StopCluster(StopClusterRequest) returns (google.longrunning.Operation) {
79    option (google.api.http) = {
80      post: "/v1/projects/{project_id}/regions/{region}/clusters/{cluster_name}:stop"
81      body: "*"
82    };
83    option (google.longrunning.operation_info) = {
84      response_type: "Cluster"
85      metadata_type: "google.cloud.dataproc.v1.ClusterOperationMetadata"
86    };
87  }
88
89  // Starts a cluster in a project.
90  rpc StartCluster(StartClusterRequest) returns (google.longrunning.Operation) {
91    option (google.api.http) = {
92      post: "/v1/projects/{project_id}/regions/{region}/clusters/{cluster_name}:start"
93      body: "*"
94    };
95    option (google.longrunning.operation_info) = {
96      response_type: "Cluster"
97      metadata_type: "google.cloud.dataproc.v1.ClusterOperationMetadata"
98    };
99  }
100
101  // Deletes a cluster in a project. The returned
102  // [Operation.metadata][google.longrunning.Operation.metadata] will be
103  // [ClusterOperationMetadata](https://cloud.google.com/dataproc/docs/reference/rpc/google.cloud.dataproc.v1#clusteroperationmetadata).
104  rpc DeleteCluster(DeleteClusterRequest)
105      returns (google.longrunning.Operation) {
106    option (google.api.http) = {
107      delete: "/v1/projects/{project_id}/regions/{region}/clusters/{cluster_name}"
108    };
109    option (google.api.method_signature) = "project_id,region,cluster_name";
110    option (google.longrunning.operation_info) = {
111      response_type: "google.protobuf.Empty"
112      metadata_type: "google.cloud.dataproc.v1.ClusterOperationMetadata"
113    };
114  }
115
116  // Gets the resource representation for a cluster in a project.
117  rpc GetCluster(GetClusterRequest) returns (Cluster) {
118    option (google.api.http) = {
119      get: "/v1/projects/{project_id}/regions/{region}/clusters/{cluster_name}"
120    };
121    option (google.api.method_signature) = "project_id,region,cluster_name";
122  }
123
124  // Lists all regions/{region}/clusters in a project alphabetically.
125  rpc ListClusters(ListClustersRequest) returns (ListClustersResponse) {
126    option (google.api.http) = {
127      get: "/v1/projects/{project_id}/regions/{region}/clusters"
128    };
129    option (google.api.method_signature) = "project_id,region";
130    option (google.api.method_signature) = "project_id,region,filter";
131  }
132
133  // Gets cluster diagnostic information. The returned
134  // [Operation.metadata][google.longrunning.Operation.metadata] will be
135  // [ClusterOperationMetadata](https://cloud.google.com/dataproc/docs/reference/rpc/google.cloud.dataproc.v1#clusteroperationmetadata).
136  // After the operation completes,
137  // [Operation.response][google.longrunning.Operation.response]
138  // contains
139  // [DiagnoseClusterResults](https://cloud.google.com/dataproc/docs/reference/rpc/google.cloud.dataproc.v1#diagnoseclusterresults).
140  rpc DiagnoseCluster(DiagnoseClusterRequest)
141      returns (google.longrunning.Operation) {
142    option (google.api.http) = {
143      post: "/v1/projects/{project_id}/regions/{region}/clusters/{cluster_name}:diagnose"
144      body: "*"
145    };
146    option (google.api.method_signature) = "project_id,region,cluster_name";
147    option (google.longrunning.operation_info) = {
148      response_type: "DiagnoseClusterResults"
149      metadata_type: "google.cloud.dataproc.v1.ClusterOperationMetadata"
150    };
151  }
152}
153
154// Describes the identifying information, config, and status of
155// a Dataproc cluster
156message Cluster {
157  // Required. The Google Cloud Platform project ID that the cluster belongs to.
158  string project_id = 1 [(google.api.field_behavior) = REQUIRED];
159
160  // Required. The cluster name, which must be unique within a project.
161  // The name must start with a lowercase letter, and can contain
162  // up to 51 lowercase letters, numbers, and hyphens. It cannot end
163  // with a hyphen. The name of a deleted cluster can be reused.
164  string cluster_name = 2 [(google.api.field_behavior) = REQUIRED];
165
166  // Optional. The cluster config for a cluster of Compute Engine Instances.
167  // Note that Dataproc may set default values, and values may change
168  // when clusters are updated.
169  //
170  // Exactly one of ClusterConfig or VirtualClusterConfig must be specified.
171  ClusterConfig config = 3 [(google.api.field_behavior) = OPTIONAL];
172
173  // Optional. The virtual cluster config is used when creating a Dataproc
174  // cluster that does not directly control the underlying compute resources,
175  // for example, when creating a [Dataproc-on-GKE
176  // cluster](https://cloud.google.com/dataproc/docs/guides/dpgke/dataproc-gke-overview).
177  // Dataproc may set default values, and values may change when
178  // clusters are updated. Exactly one of
179  // [config][google.cloud.dataproc.v1.Cluster.config] or
180  // [virtual_cluster_config][google.cloud.dataproc.v1.Cluster.virtual_cluster_config]
181  // must be specified.
182  VirtualClusterConfig virtual_cluster_config = 10
183      [(google.api.field_behavior) = OPTIONAL];
184
185  // Optional. The labels to associate with this cluster.
186  // Label **keys** must contain 1 to 63 characters, and must conform to
187  // [RFC 1035](https://www.ietf.org/rfc/rfc1035.txt).
188  // Label **values** may be empty, but, if present, must contain 1 to 63
189  // characters, and must conform to [RFC
190  // 1035](https://www.ietf.org/rfc/rfc1035.txt). No more than 32 labels can be
191  // associated with a cluster.
192  map<string, string> labels = 8 [(google.api.field_behavior) = OPTIONAL];
193
194  // Output only. Cluster status.
195  ClusterStatus status = 4 [(google.api.field_behavior) = OUTPUT_ONLY];
196
197  // Output only. The previous cluster status.
198  repeated ClusterStatus status_history = 7
199      [(google.api.field_behavior) = OUTPUT_ONLY];
200
201  // Output only. A cluster UUID (Unique Universal Identifier). Dataproc
202  // generates this value when it creates the cluster.
203  string cluster_uuid = 6 [(google.api.field_behavior) = OUTPUT_ONLY];
204
205  // Output only. Contains cluster daemon metrics such as HDFS and YARN stats.
206  //
207  // **Beta Feature**: This report is available for testing purposes only. It
208  // may be changed before final release.
209  ClusterMetrics metrics = 9 [(google.api.field_behavior) = OUTPUT_ONLY];
210}
211
212// The cluster config.
213message ClusterConfig {
214  // Optional. A Cloud Storage bucket used to stage job
215  // dependencies, config files, and job driver console output.
216  // If you do not specify a staging bucket, Cloud
217  // Dataproc will determine a Cloud Storage location (US,
218  // ASIA, or EU) for your cluster's staging bucket according to the
219  // Compute Engine zone where your cluster is deployed, and then create
220  // and manage this project-level, per-location bucket (see
221  // [Dataproc staging and temp
222  // buckets](https://cloud.google.com/dataproc/docs/concepts/configuring-clusters/staging-bucket)).
223  // **This field requires a Cloud Storage bucket name, not a `gs://...` URI to
224  // a Cloud Storage bucket.**
225  string config_bucket = 1 [(google.api.field_behavior) = OPTIONAL];
226
227  // Optional. A Cloud Storage bucket used to store ephemeral cluster and jobs
228  // data, such as Spark and MapReduce history files. If you do not specify a
229  // temp bucket, Dataproc will determine a Cloud Storage location (US, ASIA, or
230  // EU) for your cluster's temp bucket according to the Compute Engine zone
231  // where your cluster is deployed, and then create and manage this
232  // project-level, per-location bucket. The default bucket has a TTL of 90
233  // days, but you can use any TTL (or none) if you specify a bucket (see
234  // [Dataproc staging and temp
235  // buckets](https://cloud.google.com/dataproc/docs/concepts/configuring-clusters/staging-bucket)).
236  // **This field requires a Cloud Storage bucket name, not a `gs://...` URI to
237  // a Cloud Storage bucket.**
238  string temp_bucket = 2 [(google.api.field_behavior) = OPTIONAL];
239
240  // Optional. The shared Compute Engine config settings for
241  // all instances in a cluster.
242  GceClusterConfig gce_cluster_config = 8
243      [(google.api.field_behavior) = OPTIONAL];
244
245  // Optional. The Compute Engine config settings for
246  // the cluster's master instance.
247  InstanceGroupConfig master_config = 9
248      [(google.api.field_behavior) = OPTIONAL];
249
250  // Optional. The Compute Engine config settings for
251  // the cluster's worker instances.
252  InstanceGroupConfig worker_config = 10
253      [(google.api.field_behavior) = OPTIONAL];
254
255  // Optional. The Compute Engine config settings for
256  // a cluster's secondary worker instances
257  InstanceGroupConfig secondary_worker_config = 12
258      [(google.api.field_behavior) = OPTIONAL];
259
260  // Optional. The config settings for cluster software.
261  SoftwareConfig software_config = 13 [(google.api.field_behavior) = OPTIONAL];
262
263  // Optional. Commands to execute on each node after config is
264  // completed. By default, executables are run on master and all worker nodes.
265  // You can test a node's `role` metadata to run an executable on
266  // a master or worker node, as shown below using `curl` (you can also use
267  // `wget`):
268  //
269  //     ROLE=$(curl -H Metadata-Flavor:Google
270  //     http://metadata/computeMetadata/v1/instance/attributes/dataproc-role)
271  //     if [[ "${ROLE}" == 'Master' ]]; then
272  //       ... master specific actions ...
273  //     else
274  //       ... worker specific actions ...
275  //     fi
276  repeated NodeInitializationAction initialization_actions = 11
277      [(google.api.field_behavior) = OPTIONAL];
278
279  // Optional. Encryption settings for the cluster.
280  EncryptionConfig encryption_config = 15
281      [(google.api.field_behavior) = OPTIONAL];
282
283  // Optional. Autoscaling config for the policy associated with the cluster.
284  // Cluster does not autoscale if this field is unset.
285  AutoscalingConfig autoscaling_config = 18
286      [(google.api.field_behavior) = OPTIONAL];
287
288  // Optional. Security settings for the cluster.
289  SecurityConfig security_config = 16 [(google.api.field_behavior) = OPTIONAL];
290
291  // Optional. Lifecycle setting for the cluster.
292  LifecycleConfig lifecycle_config = 17
293      [(google.api.field_behavior) = OPTIONAL];
294
295  // Optional. Port/endpoint configuration for this cluster
296  EndpointConfig endpoint_config = 19 [(google.api.field_behavior) = OPTIONAL];
297
298  // Optional. Metastore configuration.
299  MetastoreConfig metastore_config = 20
300      [(google.api.field_behavior) = OPTIONAL];
301
302  // Optional. The config for Dataproc metrics.
303  DataprocMetricConfig dataproc_metric_config = 23
304      [(google.api.field_behavior) = OPTIONAL];
305
306  // Optional. The node group settings.
307  repeated AuxiliaryNodeGroup auxiliary_node_groups = 25
308      [(google.api.field_behavior) = OPTIONAL];
309}
310
311// The Dataproc cluster config for a cluster that does not directly control the
312// underlying compute resources, such as a [Dataproc-on-GKE
313// cluster](https://cloud.google.com/dataproc/docs/guides/dpgke/dataproc-gke-overview).
314message VirtualClusterConfig {
315  // Optional. A Cloud Storage bucket used to stage job
316  // dependencies, config files, and job driver console output.
317  // If you do not specify a staging bucket, Cloud
318  // Dataproc will determine a Cloud Storage location (US,
319  // ASIA, or EU) for your cluster's staging bucket according to the
320  // Compute Engine zone where your cluster is deployed, and then create
321  // and manage this project-level, per-location bucket (see
322  // [Dataproc staging and temp
323  // buckets](https://cloud.google.com/dataproc/docs/concepts/configuring-clusters/staging-bucket)).
324  // **This field requires a Cloud Storage bucket name, not a `gs://...` URI to
325  // a Cloud Storage bucket.**
326  string staging_bucket = 1 [(google.api.field_behavior) = OPTIONAL];
327
328  oneof infrastructure_config {
329    // Required. The configuration for running the Dataproc cluster on
330    // Kubernetes.
331    KubernetesClusterConfig kubernetes_cluster_config = 6
332        [(google.api.field_behavior) = REQUIRED];
333  }
334
335  // Optional. Configuration of auxiliary services used by this cluster.
336  AuxiliaryServicesConfig auxiliary_services_config = 7
337      [(google.api.field_behavior) = OPTIONAL];
338}
339
340// Auxiliary services configuration for a Cluster.
341message AuxiliaryServicesConfig {
342  // Optional. The Hive Metastore configuration for this workload.
343  MetastoreConfig metastore_config = 1 [(google.api.field_behavior) = OPTIONAL];
344
345  // Optional. The Spark History Server configuration for the workload.
346  SparkHistoryServerConfig spark_history_server_config = 2
347      [(google.api.field_behavior) = OPTIONAL];
348}
349
350// Endpoint config for this cluster
351message EndpointConfig {
352  // Output only. The map of port descriptions to URLs. Will only be populated
353  // if enable_http_port_access is true.
354  map<string, string> http_ports = 1
355      [(google.api.field_behavior) = OUTPUT_ONLY];
356
357  // Optional. If true, enable http access to specific ports on the cluster
358  // from external sources. Defaults to false.
359  bool enable_http_port_access = 2 [(google.api.field_behavior) = OPTIONAL];
360}
361
362// Autoscaling Policy config associated with the cluster.
363message AutoscalingConfig {
364  // Optional. The autoscaling policy used by the cluster.
365  //
366  // Only resource names including projectid and location (region) are valid.
367  // Examples:
368  //
369  // * `https://www.googleapis.com/compute/v1/projects/[project_id]/locations/[dataproc_region]/autoscalingPolicies/[policy_id]`
370  // * `projects/[project_id]/locations/[dataproc_region]/autoscalingPolicies/[policy_id]`
371  //
372  // Note that the policy must be in the same project and Dataproc region.
373  string policy_uri = 1 [(google.api.field_behavior) = OPTIONAL];
374}
375
376// Encryption settings for the cluster.
377message EncryptionConfig {
378  // Optional. The Cloud KMS key name to use for PD disk encryption for all
379  // instances in the cluster.
380  string gce_pd_kms_key_name = 1 [(google.api.field_behavior) = OPTIONAL];
381}
382
383// Common config settings for resources of Compute Engine cluster
384// instances, applicable to all instances in the cluster.
385message GceClusterConfig {
386  // `PrivateIpv6GoogleAccess` controls whether and how Dataproc cluster nodes
387  // can communicate with Google Services through gRPC over IPv6.
388  // These values are directly mapped to corresponding values in the
389  // [Compute Engine Instance
390  // fields](https://cloud.google.com/compute/docs/reference/rest/v1/instances).
391  enum PrivateIpv6GoogleAccess {
392    // If unspecified, Compute Engine default behavior will apply, which
393    // is the same as
394    // [INHERIT_FROM_SUBNETWORK][google.cloud.dataproc.v1.GceClusterConfig.PrivateIpv6GoogleAccess.INHERIT_FROM_SUBNETWORK].
395    PRIVATE_IPV6_GOOGLE_ACCESS_UNSPECIFIED = 0;
396
397    // Private access to and from Google Services configuration
398    // inherited from the subnetwork configuration. This is the
399    // default Compute Engine behavior.
400    INHERIT_FROM_SUBNETWORK = 1;
401
402    // Enables outbound private IPv6 access to Google Services from the Dataproc
403    // cluster.
404    OUTBOUND = 2;
405
406    // Enables bidirectional private IPv6 access between Google Services and the
407    // Dataproc cluster.
408    BIDIRECTIONAL = 3;
409  }
410
411  // Optional. The Compute Engine zone where the Dataproc cluster will be
412  // located. If omitted, the service will pick a zone in the cluster's Compute
413  // Engine region. On a get request, zone will always be present.
414  //
415  // A full URL, partial URI, or short name are valid. Examples:
416  //
417  // * `https://www.googleapis.com/compute/v1/projects/[project_id]/zones/[zone]`
418  // * `projects/[project_id]/zones/[zone]`
419  // * `[zone]`
420  string zone_uri = 1 [(google.api.field_behavior) = OPTIONAL];
421
422  // Optional. The Compute Engine network to be used for machine
423  // communications. Cannot be specified with subnetwork_uri. If neither
424  // `network_uri` nor `subnetwork_uri` is specified, the "default" network of
425  // the project is used, if it exists. Cannot be a "Custom Subnet Network" (see
426  // [Using Subnetworks](https://cloud.google.com/compute/docs/subnetworks) for
427  // more information).
428  //
429  // A full URL, partial URI, or short name are valid. Examples:
430  //
431  // * `https://www.googleapis.com/compute/v1/projects/[project_id]/global/networks/default`
432  // * `projects/[project_id]/global/networks/default`
433  // * `default`
434  string network_uri = 2 [(google.api.field_behavior) = OPTIONAL];
435
436  // Optional. The Compute Engine subnetwork to be used for machine
437  // communications. Cannot be specified with network_uri.
438  //
439  // A full URL, partial URI, or short name are valid. Examples:
440  //
441  // * `https://www.googleapis.com/compute/v1/projects/[project_id]/regions/[region]/subnetworks/sub0`
442  // * `projects/[project_id]/regions/[region]/subnetworks/sub0`
443  // * `sub0`
444  string subnetwork_uri = 6 [(google.api.field_behavior) = OPTIONAL];
445
446  // Optional. If true, all instances in the cluster will only have internal IP
447  // addresses. By default, clusters are not restricted to internal IP
448  // addresses, and will have ephemeral external IP addresses assigned to each
449  // instance. This `internal_ip_only` restriction can only be enabled for
450  // subnetwork enabled networks, and all off-cluster dependencies must be
451  // configured to be accessible without external IP addresses.
452  optional bool internal_ip_only = 7 [(google.api.field_behavior) = OPTIONAL];
453
454  // Optional. The type of IPv6 access for a cluster.
455  PrivateIpv6GoogleAccess private_ipv6_google_access = 12
456      [(google.api.field_behavior) = OPTIONAL];
457
458  // Optional. The [Dataproc service
459  // account](https://cloud.google.com/dataproc/docs/concepts/configuring-clusters/service-accounts#service_accounts_in_dataproc)
460  // (also see [VM Data Plane
461  // identity](https://cloud.google.com/dataproc/docs/concepts/iam/dataproc-principals#vm_service_account_data_plane_identity))
462  // used by Dataproc cluster VM instances to access Google Cloud Platform
463  // services.
464  //
465  // If not specified, the
466  // [Compute Engine default service
467  // account](https://cloud.google.com/compute/docs/access/service-accounts#default_service_account)
468  // is used.
469  string service_account = 8 [(google.api.field_behavior) = OPTIONAL];
470
471  // Optional. The URIs of service account scopes to be included in
472  // Compute Engine instances. The following base set of scopes is always
473  // included:
474  //
475  // * https://www.googleapis.com/auth/cloud.useraccounts.readonly
476  // * https://www.googleapis.com/auth/devstorage.read_write
477  // * https://www.googleapis.com/auth/logging.write
478  //
479  // If no scopes are specified, the following defaults are also provided:
480  //
481  // * https://www.googleapis.com/auth/bigquery
482  // * https://www.googleapis.com/auth/bigtable.admin.table
483  // * https://www.googleapis.com/auth/bigtable.data
484  // * https://www.googleapis.com/auth/devstorage.full_control
485  repeated string service_account_scopes = 3
486      [(google.api.field_behavior) = OPTIONAL];
487
488  // The Compute Engine tags to add to all instances (see [Tagging
489  // instances](https://cloud.google.com/compute/docs/label-or-tag-resources#tags)).
490  repeated string tags = 4;
491
492  // The Compute Engine metadata entries to add to all instances (see
493  // [Project and instance
494  // metadata](https://cloud.google.com/compute/docs/storing-retrieving-metadata#project_and_instance_metadata)).
495  map<string, string> metadata = 5;
496
497  // Optional. Reservation Affinity for consuming Zonal reservation.
498  ReservationAffinity reservation_affinity = 11
499      [(google.api.field_behavior) = OPTIONAL];
500
501  // Optional. Node Group Affinity for sole-tenant clusters.
502  NodeGroupAffinity node_group_affinity = 13
503      [(google.api.field_behavior) = OPTIONAL];
504
505  // Optional. Shielded Instance Config for clusters using [Compute Engine
506  // Shielded
507  // VMs](https://cloud.google.com/security/shielded-cloud/shielded-vm).
508  ShieldedInstanceConfig shielded_instance_config = 14
509      [(google.api.field_behavior) = OPTIONAL];
510
511  // Optional. Confidential Instance Config for clusters using [Confidential
512  // VMs](https://cloud.google.com/compute/confidential-vm/docs).
513  ConfidentialInstanceConfig confidential_instance_config = 15
514      [(google.api.field_behavior) = OPTIONAL];
515}
516
517// Node Group Affinity for clusters using sole-tenant node groups.
518// **The Dataproc `NodeGroupAffinity` resource is not related to the
519// Dataproc [NodeGroup][google.cloud.dataproc.v1.NodeGroup] resource.**
520message NodeGroupAffinity {
521  // Required. The URI of a
522  // sole-tenant [node group
523  // resource](https://cloud.google.com/compute/docs/reference/rest/v1/nodeGroups)
524  // that the cluster will be created on.
525  //
526  // A full URL, partial URI, or node group name are valid. Examples:
527  //
528  // * `https://www.googleapis.com/compute/v1/projects/[project_id]/zones/[zone]/nodeGroups/node-group-1`
529  // * `projects/[project_id]/zones/[zone]/nodeGroups/node-group-1`
530  // * `node-group-1`
531  string node_group_uri = 1 [(google.api.field_behavior) = REQUIRED];
532}
533
534// Shielded Instance Config for clusters using [Compute Engine Shielded
535// VMs](https://cloud.google.com/security/shielded-cloud/shielded-vm).
536message ShieldedInstanceConfig {
537  // Optional. Defines whether instances have Secure Boot enabled.
538  optional bool enable_secure_boot = 1 [(google.api.field_behavior) = OPTIONAL];
539
540  // Optional. Defines whether instances have the vTPM enabled.
541  optional bool enable_vtpm = 2 [(google.api.field_behavior) = OPTIONAL];
542
543  // Optional. Defines whether instances have integrity monitoring enabled.
544  optional bool enable_integrity_monitoring = 3
545      [(google.api.field_behavior) = OPTIONAL];
546}
547
548// Confidential Instance Config for clusters using [Confidential
549// VMs](https://cloud.google.com/compute/confidential-vm/docs)
550message ConfidentialInstanceConfig {
551  // Optional. Defines whether the instance should have confidential compute
552  // enabled.
553  bool enable_confidential_compute = 1 [(google.api.field_behavior) = OPTIONAL];
554}
555
556// The config settings for Compute Engine resources in
557// an instance group, such as a master or worker group.
558message InstanceGroupConfig {
559  // Controls the use of preemptible instances within the group.
560  enum Preemptibility {
561    // Preemptibility is unspecified, the system will choose the
562    // appropriate setting for each instance group.
563    PREEMPTIBILITY_UNSPECIFIED = 0;
564
565    // Instances are non-preemptible.
566    //
567    // This option is allowed for all instance groups and is the only valid
568    // value for Master and Worker instance groups.
569    NON_PREEMPTIBLE = 1;
570
571    // Instances are [preemptible]
572    // (https://cloud.google.com/compute/docs/instances/preemptible).
573    //
574    // This option is allowed only for [secondary worker]
575    // (https://cloud.google.com/dataproc/docs/concepts/compute/secondary-vms)
576    // groups.
577    PREEMPTIBLE = 2;
578
579    // Instances are [Spot VMs]
580    // (https://cloud.google.com/compute/docs/instances/spot).
581    //
582    // This option is allowed only for [secondary worker]
583    // (https://cloud.google.com/dataproc/docs/concepts/compute/secondary-vms)
584    // groups. Spot VMs are the latest version of [preemptible VMs]
585    // (https://cloud.google.com/compute/docs/instances/preemptible), and
586    // provide additional features.
587    SPOT = 3;
588  }
589
590  // Optional. The number of VM instances in the instance group.
591  // For [HA
592  // cluster](/dataproc/docs/concepts/configuring-clusters/high-availability)
593  // [master_config](#FIELDS.master_config) groups, **must be set to 3**.
594  // For standard cluster [master_config](#FIELDS.master_config) groups,
595  // **must be set to 1**.
596  int32 num_instances = 1 [(google.api.field_behavior) = OPTIONAL];
597
598  // Output only. The list of instance names. Dataproc derives the names
599  // from `cluster_name`, `num_instances`, and the instance group.
600  repeated string instance_names = 2
601      [(google.api.field_behavior) = OUTPUT_ONLY];
602
603  // Optional. The Compute Engine image resource used for cluster instances.
604  //
605  // The URI can represent an image or image family.
606  //
607  // Image examples:
608  //
609  // * `https://www.googleapis.com/compute/v1/projects/[project_id]/global/images/[image-id]`
610  // * `projects/[project_id]/global/images/[image-id]`
611  // * `image-id`
612  //
613  // Image family examples. Dataproc will use the most recent
614  // image from the family:
615  //
616  // * `https://www.googleapis.com/compute/v1/projects/[project_id]/global/images/family/[custom-image-family-name]`
617  // * `projects/[project_id]/global/images/family/[custom-image-family-name]`
618  //
619  // If the URI is unspecified, it will be inferred from
620  // `SoftwareConfig.image_version` or the system default.
621  string image_uri = 3 [(google.api.field_behavior) = OPTIONAL];
622
623  // Optional. The Compute Engine machine type used for cluster instances.
624  //
625  // A full URL, partial URI, or short name are valid. Examples:
626  //
627  // * `https://www.googleapis.com/compute/v1/projects/[project_id]/zones/[zone]/machineTypes/n1-standard-2`
628  // * `projects/[project_id]/zones/[zone]/machineTypes/n1-standard-2`
629  // * `n1-standard-2`
630  //
631  // **Auto Zone Exception**: If you are using the Dataproc
632  // [Auto Zone
633  // Placement](https://cloud.google.com/dataproc/docs/concepts/configuring-clusters/auto-zone#using_auto_zone_placement)
634  // feature, you must use the short name of the machine type
635  // resource, for example, `n1-standard-2`.
636  string machine_type_uri = 4 [(google.api.field_behavior) = OPTIONAL];
637
638  // Optional. Disk option config settings.
639  DiskConfig disk_config = 5 [(google.api.field_behavior) = OPTIONAL];
640
641  // Output only. Specifies that this instance group contains preemptible
642  // instances.
643  bool is_preemptible = 6 [(google.api.field_behavior) = OUTPUT_ONLY];
644
645  // Optional. Specifies the preemptibility of the instance group.
646  //
647  // The default value for master and worker groups is
648  // `NON_PREEMPTIBLE`. This default cannot be changed.
649  //
650  // The default value for secondary instances is
651  // `PREEMPTIBLE`.
652  Preemptibility preemptibility = 10 [(google.api.field_behavior) = OPTIONAL];
653
654  // Output only. The config for Compute Engine Instance Group
655  // Manager that manages this group.
656  // This is only used for preemptible instance groups.
657  ManagedGroupConfig managed_group_config = 7
658      [(google.api.field_behavior) = OUTPUT_ONLY];
659
660  // Optional. The Compute Engine accelerator configuration for these
661  // instances.
662  repeated AcceleratorConfig accelerators = 8
663      [(google.api.field_behavior) = OPTIONAL];
664
665  // Optional. Specifies the minimum cpu platform for the Instance Group.
666  // See [Dataproc -> Minimum CPU
667  // Platform](https://cloud.google.com/dataproc/docs/concepts/compute/dataproc-min-cpu).
668  string min_cpu_platform = 9 [(google.api.field_behavior) = OPTIONAL];
669}
670
671// Specifies the resources used to actively manage an instance group.
672message ManagedGroupConfig {
673  // Output only. The name of the Instance Template used for the Managed
674  // Instance Group.
675  string instance_template_name = 1 [(google.api.field_behavior) = OUTPUT_ONLY];
676
677  // Output only. The name of the Instance Group Manager for this group.
678  string instance_group_manager_name = 2
679      [(google.api.field_behavior) = OUTPUT_ONLY];
680}
681
682// Specifies the type and number of accelerator cards attached to the instances
683// of an instance. See [GPUs on Compute
684// Engine](https://cloud.google.com/compute/docs/gpus/).
685message AcceleratorConfig {
686  // Full URL, partial URI, or short name of the accelerator type resource to
687  // expose to this instance. See
688  // [Compute Engine
689  // AcceleratorTypes](https://cloud.google.com/compute/docs/reference/v1/acceleratorTypes).
690  //
691  // Examples:
692  //
693  // * `https://www.googleapis.com/compute/v1/projects/[project_id]/zones/[zone]/acceleratorTypes/nvidia-tesla-k80`
694  // * `projects/[project_id]/zones/[zone]/acceleratorTypes/nvidia-tesla-k80`
695  // * `nvidia-tesla-k80`
696  //
697  // **Auto Zone Exception**: If you are using the Dataproc
698  // [Auto Zone
699  // Placement](https://cloud.google.com/dataproc/docs/concepts/configuring-clusters/auto-zone#using_auto_zone_placement)
700  // feature, you must use the short name of the accelerator type
701  // resource, for example, `nvidia-tesla-k80`.
702  string accelerator_type_uri = 1;
703
704  // The number of the accelerator cards of this type exposed to this instance.
705  int32 accelerator_count = 2;
706}
707
708// Specifies the config of disk options for a group of VM instances.
709message DiskConfig {
710  // Optional. Type of the boot disk (default is "pd-standard").
711  // Valid values: "pd-balanced" (Persistent Disk Balanced Solid State Drive),
712  // "pd-ssd" (Persistent Disk Solid State Drive),
713  // or "pd-standard" (Persistent Disk Hard Disk Drive).
714  // See [Disk types](https://cloud.google.com/compute/docs/disks#disk-types).
715  string boot_disk_type = 3 [(google.api.field_behavior) = OPTIONAL];
716
717  // Optional. Size in GB of the boot disk (default is 500GB).
718  int32 boot_disk_size_gb = 1 [(google.api.field_behavior) = OPTIONAL];
719
720  // Optional. Number of attached SSDs, from 0 to 8 (default is 0).
721  // If SSDs are not attached, the boot disk is used to store runtime logs and
722  // [HDFS](https://hadoop.apache.org/docs/r1.2.1/hdfs_user_guide.html) data.
723  // If one or more SSDs are attached, this runtime bulk
724  // data is spread across them, and the boot disk contains only basic
725  // config and installed binaries.
726  //
727  // Note: Local SSD options may vary by machine type and number of vCPUs
728  // selected.
729  int32 num_local_ssds = 2 [(google.api.field_behavior) = OPTIONAL];
730
731  // Optional. Interface type of local SSDs (default is "scsi").
732  // Valid values: "scsi" (Small Computer System Interface),
733  // "nvme" (Non-Volatile Memory Express).
734  // See [local SSD
735  // performance](https://cloud.google.com/compute/docs/disks/local-ssd#performance).
736  string local_ssd_interface = 4 [(google.api.field_behavior) = OPTIONAL];
737}
738
739// Node group identification and configuration information.
740message AuxiliaryNodeGroup {
741  // Required. Node group configuration.
742  NodeGroup node_group = 1 [(google.api.field_behavior) = REQUIRED];
743
744  // Optional. A node group ID. Generated if not specified.
745  //
746  // The ID must contain only letters (a-z, A-Z), numbers (0-9),
747  // underscores (_), and hyphens (-). Cannot begin or end with underscore
748  // or hyphen. Must consist of from 3 to 33 characters.
749  string node_group_id = 2 [(google.api.field_behavior) = OPTIONAL];
750}
751
752// Dataproc Node Group.
753// **The Dataproc `NodeGroup` resource is not related to the
754// Dataproc [NodeGroupAffinity][google.cloud.dataproc.v1.NodeGroupAffinity]
755// resource.**
756message NodeGroup {
757  option (google.api.resource) = {
758    type: "dataproc.googleapis.com/NodeGroup"
759    pattern: "projects/{project}/regions/{region}/clusters/{cluster}/nodeGroups/{node_group}"
760  };
761
762  // Node group roles.
763  enum Role {
764    // Required unspecified role.
765    ROLE_UNSPECIFIED = 0;
766
767    // Job drivers run on the node group.
768    DRIVER = 1;
769  }
770
771  // The Node group [resource name](https://aip.dev/122).
772  string name = 1;
773
774  // Required. Node group roles.
775  repeated Role roles = 2 [(google.api.field_behavior) = REQUIRED];
776
777  // Optional. The node group instance group configuration.
778  InstanceGroupConfig node_group_config = 3
779      [(google.api.field_behavior) = OPTIONAL];
780
781  // Optional. Node group labels.
782  //
783  // * Label **keys** must consist of from 1 to 63 characters and conform to
784  //   [RFC 1035](https://www.ietf.org/rfc/rfc1035.txt).
785  // * Label **values** can be empty. If specified, they must consist of from
786  //   1 to 63 characters and conform to [RFC 1035]
787  //   (https://www.ietf.org/rfc/rfc1035.txt).
788  // * The node group must have no more than 32 labels.
789  map<string, string> labels = 4 [(google.api.field_behavior) = OPTIONAL];
790}
791
792// Specifies an executable to run on a fully configured node and a
793// timeout period for executable completion.
794message NodeInitializationAction {
795  // Required. Cloud Storage URI of executable file.
796  string executable_file = 1 [(google.api.field_behavior) = REQUIRED];
797
798  // Optional. Amount of time executable has to complete. Default is
799  // 10 minutes (see JSON representation of
800  // [Duration](https://developers.google.com/protocol-buffers/docs/proto3#json)).
801  //
802  // Cluster creation fails with an explanatory error message (the
803  // name of the executable that caused the error and the exceeded timeout
804  // period) if the executable is not completed at end of the timeout period.
805  google.protobuf.Duration execution_timeout = 2
806      [(google.api.field_behavior) = OPTIONAL];
807}
808
809// The status of a cluster and its instances.
810message ClusterStatus {
811  // The cluster state.
812  enum State {
813    // The cluster state is unknown.
814    UNKNOWN = 0;
815
816    // The cluster is being created and set up. It is not ready for use.
817    CREATING = 1;
818
819    // The cluster is currently running and healthy. It is ready for use.
820    //
821    // **Note:** The cluster state changes from "creating" to "running" status
822    // after the master node(s), first two primary worker nodes (and the last
823    // primary worker node if primary workers > 2) are running.
824    RUNNING = 2;
825
826    // The cluster encountered an error. It is not ready for use.
827    ERROR = 3;
828
829    // The cluster has encountered an error while being updated. Jobs can
830    // be submitted to the cluster, but the cluster cannot be updated.
831    ERROR_DUE_TO_UPDATE = 9;
832
833    // The cluster is being deleted. It cannot be used.
834    DELETING = 4;
835
836    // The cluster is being updated. It continues to accept and process jobs.
837    UPDATING = 5;
838
839    // The cluster is being stopped. It cannot be used.
840    STOPPING = 6;
841
842    // The cluster is currently stopped. It is not ready for use.
843    STOPPED = 7;
844
845    // The cluster is being started. It is not ready for use.
846    STARTING = 8;
847  }
848
849  // The cluster substate.
850  enum Substate {
851    // The cluster substate is unknown.
852    UNSPECIFIED = 0;
853
854    // The cluster is known to be in an unhealthy state
855    // (for example, critical daemons are not running or HDFS capacity is
856    // exhausted).
857    //
858    // Applies to RUNNING state.
859    UNHEALTHY = 1;
860
861    // The agent-reported status is out of date (may occur if
862    // Dataproc loses communication with Agent).
863    //
864    // Applies to RUNNING state.
865    STALE_STATUS = 2;
866  }
867
868  // Output only. The cluster's state.
869  State state = 1 [(google.api.field_behavior) = OUTPUT_ONLY];
870
871  // Optional. Output only. Details of cluster's state.
872  string detail = 2 [
873    (google.api.field_behavior) = OUTPUT_ONLY,
874    (google.api.field_behavior) = OPTIONAL
875  ];
876
877  // Output only. Time when this state was entered (see JSON representation of
878  // [Timestamp](https://developers.google.com/protocol-buffers/docs/proto3#json)).
879  google.protobuf.Timestamp state_start_time = 3
880      [(google.api.field_behavior) = OUTPUT_ONLY];
881
882  // Output only. Additional state information that includes
883  // status reported by the agent.
884  Substate substate = 4 [(google.api.field_behavior) = OUTPUT_ONLY];
885}
886
887// Security related configuration, including encryption, Kerberos, etc.
888message SecurityConfig {
889  // Optional. Kerberos related configuration.
890  KerberosConfig kerberos_config = 1 [(google.api.field_behavior) = OPTIONAL];
891
892  // Optional. Identity related configuration, including service account based
893  // secure multi-tenancy user mappings.
894  IdentityConfig identity_config = 2 [(google.api.field_behavior) = OPTIONAL];
895}
896
897// Specifies Kerberos related configuration.
898message KerberosConfig {
899  // Optional. Flag to indicate whether to Kerberize the cluster (default:
900  // false). Set this field to true to enable Kerberos on a cluster.
901  bool enable_kerberos = 1 [(google.api.field_behavior) = OPTIONAL];
902
903  // Optional. The Cloud Storage URI of a KMS encrypted file containing the root
904  // principal password.
905  string root_principal_password_uri = 2
906      [(google.api.field_behavior) = OPTIONAL];
907
908  // Optional. The uri of the KMS key used to encrypt various sensitive
909  // files.
910  string kms_key_uri = 3 [(google.api.field_behavior) = OPTIONAL];
911
912  // Optional. The Cloud Storage URI of the keystore file used for SSL
913  // encryption. If not provided, Dataproc will provide a self-signed
914  // certificate.
915  string keystore_uri = 4 [(google.api.field_behavior) = OPTIONAL];
916
917  // Optional. The Cloud Storage URI of the truststore file used for SSL
918  // encryption. If not provided, Dataproc will provide a self-signed
919  // certificate.
920  string truststore_uri = 5 [(google.api.field_behavior) = OPTIONAL];
921
922  // Optional. The Cloud Storage URI of a KMS encrypted file containing the
923  // password to the user provided keystore. For the self-signed certificate,
924  // this password is generated by Dataproc.
925  string keystore_password_uri = 6 [(google.api.field_behavior) = OPTIONAL];
926
927  // Optional. The Cloud Storage URI of a KMS encrypted file containing the
928  // password to the user provided key. For the self-signed certificate, this
929  // password is generated by Dataproc.
930  string key_password_uri = 7 [(google.api.field_behavior) = OPTIONAL];
931
932  // Optional. The Cloud Storage URI of a KMS encrypted file containing the
933  // password to the user provided truststore. For the self-signed certificate,
934  // this password is generated by Dataproc.
935  string truststore_password_uri = 8 [(google.api.field_behavior) = OPTIONAL];
936
937  // Optional. The remote realm the Dataproc on-cluster KDC will trust, should
938  // the user enable cross realm trust.
939  string cross_realm_trust_realm = 9 [(google.api.field_behavior) = OPTIONAL];
940
941  // Optional. The KDC (IP or hostname) for the remote trusted realm in a cross
942  // realm trust relationship.
943  string cross_realm_trust_kdc = 10 [(google.api.field_behavior) = OPTIONAL];
944
945  // Optional. The admin server (IP or hostname) for the remote trusted realm in
946  // a cross realm trust relationship.
947  string cross_realm_trust_admin_server = 11
948      [(google.api.field_behavior) = OPTIONAL];
949
950  // Optional. The Cloud Storage URI of a KMS encrypted file containing the
951  // shared password between the on-cluster Kerberos realm and the remote
952  // trusted realm, in a cross realm trust relationship.
953  string cross_realm_trust_shared_password_uri = 12
954      [(google.api.field_behavior) = OPTIONAL];
955
956  // Optional. The Cloud Storage URI of a KMS encrypted file containing the
957  // master key of the KDC database.
958  string kdc_db_key_uri = 13 [(google.api.field_behavior) = OPTIONAL];
959
960  // Optional. The lifetime of the ticket granting ticket, in hours.
961  // If not specified, or user specifies 0, then default value 10
962  // will be used.
963  int32 tgt_lifetime_hours = 14 [(google.api.field_behavior) = OPTIONAL];
964
965  // Optional. The name of the on-cluster Kerberos realm.
966  // If not specified, the uppercased domain of hostnames will be the realm.
967  string realm = 15 [(google.api.field_behavior) = OPTIONAL];
968}
969
970// Identity related configuration, including service account based
971// secure multi-tenancy user mappings.
972message IdentityConfig {
973  // Required. Map of user to service account.
974  map<string, string> user_service_account_mapping = 1
975      [(google.api.field_behavior) = REQUIRED];
976}
977
978// Specifies the selection and config of software inside the cluster.
979message SoftwareConfig {
980  // Optional. The version of software inside the cluster. It must be one of the
981  // supported [Dataproc
982  // Versions](https://cloud.google.com/dataproc/docs/concepts/versioning/dataproc-versions#supported_dataproc_versions),
983  // such as "1.2" (including a subminor version, such as "1.2.29"), or the
984  // ["preview"
985  // version](https://cloud.google.com/dataproc/docs/concepts/versioning/dataproc-versions#other_versions).
986  // If unspecified, it defaults to the latest Debian version.
987  string image_version = 1 [(google.api.field_behavior) = OPTIONAL];
988
989  // Optional. The properties to set on daemon config files.
990  //
991  // Property keys are specified in `prefix:property` format, for example
992  // `core:hadoop.tmp.dir`. The following are supported prefixes
993  // and their mappings:
994  //
995  // * capacity-scheduler: `capacity-scheduler.xml`
996  // * core:   `core-site.xml`
997  // * distcp: `distcp-default.xml`
998  // * hdfs:   `hdfs-site.xml`
999  // * hive:   `hive-site.xml`
1000  // * mapred: `mapred-site.xml`
1001  // * pig:    `pig.properties`
1002  // * spark:  `spark-defaults.conf`
1003  // * yarn:   `yarn-site.xml`
1004  //
1005  // For more information, see [Cluster
1006  // properties](https://cloud.google.com/dataproc/docs/concepts/cluster-properties).
1007  map<string, string> properties = 2 [(google.api.field_behavior) = OPTIONAL];
1008
1009  // Optional. The set of components to activate on the cluster.
1010  repeated Component optional_components = 3
1011      [(google.api.field_behavior) = OPTIONAL];
1012}
1013
1014// Specifies the cluster auto-delete schedule configuration.
1015message LifecycleConfig {
1016  // Optional. The duration to keep the cluster alive while idling (when no jobs
1017  // are running). Passing this threshold will cause the cluster to be
1018  // deleted. Minimum value is 5 minutes; maximum value is 14 days (see JSON
1019  // representation of
1020  // [Duration](https://developers.google.com/protocol-buffers/docs/proto3#json)).
1021  google.protobuf.Duration idle_delete_ttl = 1
1022      [(google.api.field_behavior) = OPTIONAL];
1023
1024  // Either the exact time the cluster should be deleted at or
1025  // the cluster maximum age.
1026  oneof ttl {
1027    // Optional. The time when cluster will be auto-deleted (see JSON
1028    // representation of
1029    // [Timestamp](https://developers.google.com/protocol-buffers/docs/proto3#json)).
1030    google.protobuf.Timestamp auto_delete_time = 2
1031        [(google.api.field_behavior) = OPTIONAL];
1032
1033    // Optional. The lifetime duration of cluster. The cluster will be
1034    // auto-deleted at the end of this period. Minimum value is 10 minutes;
1035    // maximum value is 14 days (see JSON representation of
1036    // [Duration](https://developers.google.com/protocol-buffers/docs/proto3#json)).
1037    google.protobuf.Duration auto_delete_ttl = 3
1038        [(google.api.field_behavior) = OPTIONAL];
1039  }
1040
1041  // Output only. The time when cluster became idle (most recent job finished)
1042  // and became eligible for deletion due to idleness (see JSON representation
1043  // of
1044  // [Timestamp](https://developers.google.com/protocol-buffers/docs/proto3#json)).
1045  google.protobuf.Timestamp idle_start_time = 4
1046      [(google.api.field_behavior) = OUTPUT_ONLY];
1047}
1048
1049// Specifies a Metastore configuration.
1050message MetastoreConfig {
1051  // Required. Resource name of an existing Dataproc Metastore service.
1052  //
1053  // Example:
1054  //
1055  // * `projects/[project_id]/locations/[dataproc_region]/services/[service-name]`
1056  string dataproc_metastore_service = 1 [
1057    (google.api.field_behavior) = REQUIRED,
1058    (google.api.resource_reference) = {
1059      type: "metastore.googleapis.com/Service"
1060    }
1061  ];
1062}
1063
1064// Contains cluster daemon metrics, such as HDFS and YARN stats.
1065//
1066// **Beta Feature**: This report is available for testing purposes only. It may
1067// be changed before final release.
1068message ClusterMetrics {
1069  // The HDFS metrics.
1070  map<string, int64> hdfs_metrics = 1;
1071
1072  // YARN metrics.
1073  map<string, int64> yarn_metrics = 2;
1074}
1075
1076// Dataproc metric config.
1077message DataprocMetricConfig {
1078  // A source for the collection of Dataproc OSS metrics (see [available OSS
1079  // metrics]
1080  // (https://cloud.google.com//dataproc/docs/guides/monitoring#available_oss_metrics)).
1081  enum MetricSource {
1082    // Required unspecified metric source.
1083    METRIC_SOURCE_UNSPECIFIED = 0;
1084
1085    // Default monitoring agent metrics. If this source is enabled,
1086    // Dataproc enables the monitoring agent in Compute Engine,
1087    // and collects default monitoring agent metrics, which are published
1088    // with an `agent.googleapis.com` prefix.
1089    MONITORING_AGENT_DEFAULTS = 1;
1090
1091    // HDFS metric source.
1092    HDFS = 2;
1093
1094    // Spark metric source.
1095    SPARK = 3;
1096
1097    // YARN metric source.
1098    YARN = 4;
1099
1100    // Spark History Server metric source.
1101    SPARK_HISTORY_SERVER = 5;
1102
1103    // Hiveserver2 metric source.
1104    HIVESERVER2 = 6;
1105
1106    // hivemetastore metric source
1107    HIVEMETASTORE = 7;
1108  }
1109
1110  // A Dataproc OSS metric.
1111  message Metric {
1112    // Required. Default metrics are collected unless `metricOverrides` are
1113    // specified for the metric source (see [Available OSS metrics]
1114    // (https://cloud.google.com/dataproc/docs/guides/monitoring#available_oss_metrics)
1115    // for more information).
1116    MetricSource metric_source = 1 [(google.api.field_behavior) = REQUIRED];
1117
1118    // Optional. Specify one or more [available OSS metrics]
1119    // (https://cloud.google.com/dataproc/docs/guides/monitoring#available_oss_metrics)
1120    // to collect for the metric course (for the `SPARK` metric source, any
1121    // [Spark metric]
1122    // (https://spark.apache.org/docs/latest/monitoring.html#metrics) can be
1123    // specified).
1124    //
1125    // Provide metrics in the following format:
1126    // <code><var>METRIC_SOURCE</var>:<var>INSTANCE</var>:<var>GROUP</var>:<var>METRIC</var></code>
1127    // Use camelcase as appropriate.
1128    //
1129    // Examples:
1130    //
1131    // ```
1132    // yarn:ResourceManager:QueueMetrics:AppsCompleted
1133    // spark:driver:DAGScheduler:job.allJobs
1134    // sparkHistoryServer:JVM:Memory:NonHeapMemoryUsage.committed
1135    // hiveserver2:JVM:Memory:NonHeapMemoryUsage.used
1136    // ```
1137    //
1138    // Notes:
1139    //
1140    // * Only the specified overridden metrics will be collected for the
1141    //   metric source. For example, if one or more `spark:executive` metrics
1142    //   are listed as metric overrides, other `SPARK` metrics will not be
1143    //   collected. The collection of the default metrics for other OSS metric
1144    //   sources is unaffected. For example, if both `SPARK` andd `YARN` metric
1145    //   sources are enabled, and overrides are provided for Spark metrics only,
1146    //   all default YARN metrics will be collected.
1147    repeated string metric_overrides = 2
1148        [(google.api.field_behavior) = OPTIONAL];
1149  }
1150
1151  // Required. Metrics sources to enable.
1152  repeated Metric metrics = 1 [(google.api.field_behavior) = REQUIRED];
1153}
1154
1155// A request to create a cluster.
1156message CreateClusterRequest {
1157  // Required. The ID of the Google Cloud Platform project that the cluster
1158  // belongs to.
1159  string project_id = 1 [(google.api.field_behavior) = REQUIRED];
1160
1161  // Required. The Dataproc region in which to handle the request.
1162  string region = 3 [(google.api.field_behavior) = REQUIRED];
1163
1164  // Required. The cluster to create.
1165  Cluster cluster = 2 [(google.api.field_behavior) = REQUIRED];
1166
1167  // Optional. A unique ID used to identify the request. If the server receives
1168  // two
1169  // [CreateClusterRequest](https://cloud.google.com/dataproc/docs/reference/rpc/google.cloud.dataproc.v1#google.cloud.dataproc.v1.CreateClusterRequest)s
1170  // with the same id, then the second request will be ignored and the
1171  // first [google.longrunning.Operation][google.longrunning.Operation] created
1172  // and stored in the backend is returned.
1173  //
1174  // It is recommended to always set this value to a
1175  // [UUID](https://en.wikipedia.org/wiki/Universally_unique_identifier).
1176  //
1177  // The ID must contain only letters (a-z, A-Z), numbers (0-9),
1178  // underscores (_), and hyphens (-). The maximum length is 40 characters.
1179  string request_id = 4 [(google.api.field_behavior) = OPTIONAL];
1180
1181  // Optional. Failure action when primary worker creation fails.
1182  FailureAction action_on_failed_primary_workers = 5
1183      [(google.api.field_behavior) = OPTIONAL];
1184}
1185
1186// A request to update a cluster.
1187message UpdateClusterRequest {
1188  // Required. The ID of the Google Cloud Platform project the
1189  // cluster belongs to.
1190  string project_id = 1 [(google.api.field_behavior) = REQUIRED];
1191
1192  // Required. The Dataproc region in which to handle the request.
1193  string region = 5 [(google.api.field_behavior) = REQUIRED];
1194
1195  // Required. The cluster name.
1196  string cluster_name = 2 [(google.api.field_behavior) = REQUIRED];
1197
1198  // Required. The changes to the cluster.
1199  Cluster cluster = 3 [(google.api.field_behavior) = REQUIRED];
1200
1201  // Optional. Timeout for graceful YARN decommissioning. Graceful
1202  // decommissioning allows removing nodes from the cluster without
1203  // interrupting jobs in progress. Timeout specifies how long to wait for jobs
1204  // in progress to finish before forcefully removing nodes (and potentially
1205  // interrupting jobs). Default timeout is 0 (for forceful decommission), and
1206  // the maximum allowed timeout is 1 day. (see JSON representation of
1207  // [Duration](https://developers.google.com/protocol-buffers/docs/proto3#json)).
1208  //
1209  // Only supported on Dataproc image versions 1.2 and higher.
1210  google.protobuf.Duration graceful_decommission_timeout = 6
1211      [(google.api.field_behavior) = OPTIONAL];
1212
1213  // Required. Specifies the path, relative to `Cluster`, of
1214  // the field to update. For example, to change the number of workers
1215  // in a cluster to 5, the `update_mask` parameter would be
1216  // specified as `config.worker_config.num_instances`,
1217  // and the `PATCH` request body would specify the new value, as follows:
1218  //
1219  //     {
1220  //       "config":{
1221  //         "workerConfig":{
1222  //           "numInstances":"5"
1223  //         }
1224  //       }
1225  //     }
1226  // Similarly, to change the number of preemptible workers in a cluster to 5,
1227  // the `update_mask` parameter would be
1228  // `config.secondary_worker_config.num_instances`, and the `PATCH` request
1229  // body would be set as follows:
1230  //
1231  //     {
1232  //       "config":{
1233  //         "secondaryWorkerConfig":{
1234  //           "numInstances":"5"
1235  //         }
1236  //       }
1237  //     }
1238  // <strong>Note:</strong> Currently, only the following fields can be updated:
1239  //
1240  //  <table>
1241  //  <tbody>
1242  //  <tr>
1243  //  <td><strong>Mask</strong></td>
1244  //  <td><strong>Purpose</strong></td>
1245  //  </tr>
1246  //  <tr>
1247  //  <td><strong><em>labels</em></strong></td>
1248  //  <td>Update labels</td>
1249  //  </tr>
1250  //  <tr>
1251  //  <td><strong><em>config.worker_config.num_instances</em></strong></td>
1252  //  <td>Resize primary worker group</td>
1253  //  </tr>
1254  //  <tr>
1255  //  <td><strong><em>config.secondary_worker_config.num_instances</em></strong></td>
1256  //  <td>Resize secondary worker group</td>
1257  //  </tr>
1258  //  <tr>
1259  //  <td>config.autoscaling_config.policy_uri</td><td>Use, stop using, or
1260  //  change autoscaling policies</td>
1261  //  </tr>
1262  //  </tbody>
1263  //  </table>
1264  google.protobuf.FieldMask update_mask = 4
1265      [(google.api.field_behavior) = REQUIRED];
1266
1267  // Optional. A unique ID used to identify the request. If the server
1268  // receives two
1269  // [UpdateClusterRequest](https://cloud.google.com/dataproc/docs/reference/rpc/google.cloud.dataproc.v1#google.cloud.dataproc.v1.UpdateClusterRequest)s
1270  // with the same id, then the second request will be ignored and the
1271  // first [google.longrunning.Operation][google.longrunning.Operation] created
1272  // and stored in the backend is returned.
1273  //
1274  // It is recommended to always set this value to a
1275  // [UUID](https://en.wikipedia.org/wiki/Universally_unique_identifier).
1276  //
1277  // The ID must contain only letters (a-z, A-Z), numbers (0-9),
1278  // underscores (_), and hyphens (-). The maximum length is 40 characters.
1279  string request_id = 7 [(google.api.field_behavior) = OPTIONAL];
1280}
1281
1282// A request to stop a cluster.
1283message StopClusterRequest {
1284  // Required. The ID of the Google Cloud Platform project the
1285  // cluster belongs to.
1286  string project_id = 1 [(google.api.field_behavior) = REQUIRED];
1287
1288  // Required. The Dataproc region in which to handle the request.
1289  string region = 2 [(google.api.field_behavior) = REQUIRED];
1290
1291  // Required. The cluster name.
1292  string cluster_name = 3 [(google.api.field_behavior) = REQUIRED];
1293
1294  // Optional. Specifying the `cluster_uuid` means the RPC will fail
1295  // (with error NOT_FOUND) if a cluster with the specified UUID does not exist.
1296  string cluster_uuid = 4 [(google.api.field_behavior) = OPTIONAL];
1297
1298  // Optional. A unique ID used to identify the request. If the server
1299  // receives two
1300  // [StopClusterRequest](https://cloud.google.com/dataproc/docs/reference/rpc/google.cloud.dataproc.v1#google.cloud.dataproc.v1.StopClusterRequest)s
1301  // with the same id, then the second request will be ignored and the
1302  // first [google.longrunning.Operation][google.longrunning.Operation] created
1303  // and stored in the backend is returned.
1304  //
1305  // Recommendation: Set this value to a
1306  // [UUID](https://en.wikipedia.org/wiki/Universally_unique_identifier).
1307  //
1308  // The ID must contain only letters (a-z, A-Z), numbers (0-9),
1309  // underscores (_), and hyphens (-). The maximum length is 40 characters.
1310  string request_id = 5 [(google.api.field_behavior) = OPTIONAL];
1311}
1312
1313// A request to start a cluster.
1314message StartClusterRequest {
1315  // Required. The ID of the Google Cloud Platform project the
1316  // cluster belongs to.
1317  string project_id = 1 [(google.api.field_behavior) = REQUIRED];
1318
1319  // Required. The Dataproc region in which to handle the request.
1320  string region = 2 [(google.api.field_behavior) = REQUIRED];
1321
1322  // Required. The cluster name.
1323  string cluster_name = 3 [(google.api.field_behavior) = REQUIRED];
1324
1325  // Optional. Specifying the `cluster_uuid` means the RPC will fail
1326  // (with error NOT_FOUND) if a cluster with the specified UUID does not exist.
1327  string cluster_uuid = 4 [(google.api.field_behavior) = OPTIONAL];
1328
1329  // Optional. A unique ID used to identify the request. If the server
1330  // receives two
1331  // [StartClusterRequest](https://cloud.google.com/dataproc/docs/reference/rpc/google.cloud.dataproc.v1#google.cloud.dataproc.v1.StartClusterRequest)s
1332  // with the same id, then the second request will be ignored and the
1333  // first [google.longrunning.Operation][google.longrunning.Operation] created
1334  // and stored in the backend is returned.
1335  //
1336  // Recommendation: Set this value to a
1337  // [UUID](https://en.wikipedia.org/wiki/Universally_unique_identifier).
1338  //
1339  // The ID must contain only letters (a-z, A-Z), numbers (0-9),
1340  // underscores (_), and hyphens (-). The maximum length is 40 characters.
1341  string request_id = 5 [(google.api.field_behavior) = OPTIONAL];
1342}
1343
1344// A request to delete a cluster.
1345message DeleteClusterRequest {
1346  // Required. The ID of the Google Cloud Platform project that the cluster
1347  // belongs to.
1348  string project_id = 1 [(google.api.field_behavior) = REQUIRED];
1349
1350  // Required. The Dataproc region in which to handle the request.
1351  string region = 3 [(google.api.field_behavior) = REQUIRED];
1352
1353  // Required. The cluster name.
1354  string cluster_name = 2 [(google.api.field_behavior) = REQUIRED];
1355
1356  // Optional. Specifying the `cluster_uuid` means the RPC should fail
1357  // (with error NOT_FOUND) if cluster with specified UUID does not exist.
1358  string cluster_uuid = 4 [(google.api.field_behavior) = OPTIONAL];
1359
1360  // Optional. A unique ID used to identify the request. If the server
1361  // receives two
1362  // [DeleteClusterRequest](https://cloud.google.com/dataproc/docs/reference/rpc/google.cloud.dataproc.v1#google.cloud.dataproc.v1.DeleteClusterRequest)s
1363  // with the same id, then the second request will be ignored and the
1364  // first [google.longrunning.Operation][google.longrunning.Operation] created
1365  // and stored in the backend is returned.
1366  //
1367  // It is recommended to always set this value to a
1368  // [UUID](https://en.wikipedia.org/wiki/Universally_unique_identifier).
1369  //
1370  // The ID must contain only letters (a-z, A-Z), numbers (0-9),
1371  // underscores (_), and hyphens (-). The maximum length is 40 characters.
1372  string request_id = 5 [(google.api.field_behavior) = OPTIONAL];
1373}
1374
1375// Request to get the resource representation for a cluster in a project.
1376message GetClusterRequest {
1377  // Required. The ID of the Google Cloud Platform project that the cluster
1378  // belongs to.
1379  string project_id = 1 [(google.api.field_behavior) = REQUIRED];
1380
1381  // Required. The Dataproc region in which to handle the request.
1382  string region = 3 [(google.api.field_behavior) = REQUIRED];
1383
1384  // Required. The cluster name.
1385  string cluster_name = 2 [(google.api.field_behavior) = REQUIRED];
1386}
1387
1388// A request to list the clusters in a project.
1389message ListClustersRequest {
1390  // Required. The ID of the Google Cloud Platform project that the cluster
1391  // belongs to.
1392  string project_id = 1 [(google.api.field_behavior) = REQUIRED];
1393
1394  // Required. The Dataproc region in which to handle the request.
1395  string region = 4 [(google.api.field_behavior) = REQUIRED];
1396
1397  // Optional. A filter constraining the clusters to list. Filters are
1398  // case-sensitive and have the following syntax:
1399  //
1400  // field = value [AND [field = value]] ...
1401  //
1402  // where **field** is one of `status.state`, `clusterName`, or `labels.[KEY]`,
1403  // and `[KEY]` is a label key. **value** can be `*` to match all values.
1404  // `status.state` can be one of the following: `ACTIVE`, `INACTIVE`,
1405  // `CREATING`, `RUNNING`, `ERROR`, `DELETING`, or `UPDATING`. `ACTIVE`
1406  // contains the `CREATING`, `UPDATING`, and `RUNNING` states. `INACTIVE`
1407  // contains the `DELETING` and `ERROR` states.
1408  // `clusterName` is the name of the cluster provided at creation time.
1409  // Only the logical `AND` operator is supported; space-separated items are
1410  // treated as having an implicit `AND` operator.
1411  //
1412  // Example filter:
1413  //
1414  // status.state = ACTIVE AND clusterName = mycluster
1415  // AND labels.env = staging AND labels.starred = *
1416  string filter = 5 [(google.api.field_behavior) = OPTIONAL];
1417
1418  // Optional. The standard List page size.
1419  int32 page_size = 2 [(google.api.field_behavior) = OPTIONAL];
1420
1421  // Optional. The standard List page token.
1422  string page_token = 3 [(google.api.field_behavior) = OPTIONAL];
1423}
1424
1425// The list of all clusters in a project.
1426message ListClustersResponse {
1427  // Output only. The clusters in the project.
1428  repeated Cluster clusters = 1 [(google.api.field_behavior) = OUTPUT_ONLY];
1429
1430  // Output only. This token is included in the response if there are more
1431  // results to fetch. To fetch additional results, provide this value as the
1432  // `page_token` in a subsequent `ListClustersRequest`.
1433  string next_page_token = 2 [(google.api.field_behavior) = OUTPUT_ONLY];
1434}
1435
1436// A request to collect cluster diagnostic information.
1437message DiagnoseClusterRequest {
1438  // Required. The ID of the Google Cloud Platform project that the cluster
1439  // belongs to.
1440  string project_id = 1 [(google.api.field_behavior) = REQUIRED];
1441
1442  // Required. The Dataproc region in which to handle the request.
1443  string region = 3 [(google.api.field_behavior) = REQUIRED];
1444
1445  // Required. The cluster name.
1446  string cluster_name = 2 [(google.api.field_behavior) = REQUIRED];
1447}
1448
1449// The location of diagnostic output.
1450message DiagnoseClusterResults {
1451  // Output only. The Cloud Storage URI of the diagnostic output.
1452  // The output report is a plain text file with a summary of collected
1453  // diagnostics.
1454  string output_uri = 1 [(google.api.field_behavior) = OUTPUT_ONLY];
1455}
1456
1457// Reservation Affinity for consuming Zonal reservation.
1458message ReservationAffinity {
1459  // Indicates whether to consume capacity from an reservation or not.
1460  enum Type {
1461    TYPE_UNSPECIFIED = 0;
1462
1463    // Do not consume from any allocated capacity.
1464    NO_RESERVATION = 1;
1465
1466    // Consume any reservation available.
1467    ANY_RESERVATION = 2;
1468
1469    // Must consume from a specific reservation. Must specify key value fields
1470    // for specifying the reservations.
1471    SPECIFIC_RESERVATION = 3;
1472  }
1473
1474  // Optional. Type of reservation to consume
1475  Type consume_reservation_type = 1 [(google.api.field_behavior) = OPTIONAL];
1476
1477  // Optional. Corresponds to the label key of reservation resource.
1478  string key = 2 [(google.api.field_behavior) = OPTIONAL];
1479
1480  // Optional. Corresponds to the label values of reservation resource.
1481  repeated string values = 3 [(google.api.field_behavior) = OPTIONAL];
1482}
1483