1// Copyright 2023 Google LLC 2// 3// Licensed under the Apache License, Version 2.0 (the "License"); 4// you may not use this file except in compliance with the License. 5// You may obtain a copy of the License at 6// 7// http://www.apache.org/licenses/LICENSE-2.0 8// 9// Unless required by applicable law or agreed to in writing, software 10// distributed under the License is distributed on an "AS IS" BASIS, 11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12// See the License for the specific language governing permissions and 13// limitations under the License. 14 15syntax = "proto3"; 16 17package google.cloud.dataproc.v1; 18 19import "google/api/annotations.proto"; 20import "google/api/client.proto"; 21import "google/api/field_behavior.proto"; 22import "google/api/resource.proto"; 23import "google/cloud/dataproc/v1/operations.proto"; 24import "google/cloud/dataproc/v1/shared.proto"; 25import "google/longrunning/operations.proto"; 26import "google/protobuf/duration.proto"; 27import "google/protobuf/empty.proto"; 28import "google/protobuf/field_mask.proto"; 29import "google/protobuf/timestamp.proto"; 30import "google/protobuf/wrappers.proto"; 31import "google/type/interval.proto"; 32 33option go_package = "cloud.google.com/go/dataproc/v2/apiv1/dataprocpb;dataprocpb"; 34option java_multiple_files = true; 35option java_outer_classname = "ClustersProto"; 36option java_package = "com.google.cloud.dataproc.v1"; 37 38// The ClusterControllerService provides methods to manage clusters 39// of Compute Engine instances. 40service ClusterController { 41 option (google.api.default_host) = "dataproc.googleapis.com"; 42 option (google.api.oauth_scopes) = 43 "https://www.googleapis.com/auth/cloud-platform"; 44 45 // Creates a cluster in a project. The returned 46 // [Operation.metadata][google.longrunning.Operation.metadata] will be 47 // [ClusterOperationMetadata](https://cloud.google.com/dataproc/docs/reference/rpc/google.cloud.dataproc.v1#clusteroperationmetadata). 48 rpc CreateCluster(CreateClusterRequest) 49 returns (google.longrunning.Operation) { 50 option (google.api.http) = { 51 post: "/v1/projects/{project_id}/regions/{region}/clusters" 52 body: "cluster" 53 }; 54 option (google.api.method_signature) = "project_id,region,cluster"; 55 option (google.longrunning.operation_info) = { 56 response_type: "Cluster" 57 metadata_type: "google.cloud.dataproc.v1.ClusterOperationMetadata" 58 }; 59 } 60 61 // Updates a cluster in a project. The returned 62 // [Operation.metadata][google.longrunning.Operation.metadata] will be 63 // [ClusterOperationMetadata](https://cloud.google.com/dataproc/docs/reference/rpc/google.cloud.dataproc.v1#clusteroperationmetadata). 64 // The cluster must be in a 65 // [`RUNNING`][google.cloud.dataproc.v1.ClusterStatus.State] state or an error 66 // is returned. 67 rpc UpdateCluster(UpdateClusterRequest) 68 returns (google.longrunning.Operation) { 69 option (google.api.http) = { 70 patch: "/v1/projects/{project_id}/regions/{region}/clusters/{cluster_name}" 71 body: "cluster" 72 }; 73 option (google.api.method_signature) = 74 "project_id,region,cluster_name,cluster,update_mask"; 75 option (google.longrunning.operation_info) = { 76 response_type: "Cluster" 77 metadata_type: "google.cloud.dataproc.v1.ClusterOperationMetadata" 78 }; 79 } 80 81 // Stops a cluster in a project. 82 rpc StopCluster(StopClusterRequest) returns (google.longrunning.Operation) { 83 option (google.api.http) = { 84 post: "/v1/projects/{project_id}/regions/{region}/clusters/{cluster_name}:stop" 85 body: "*" 86 }; 87 option (google.longrunning.operation_info) = { 88 response_type: "Cluster" 89 metadata_type: "google.cloud.dataproc.v1.ClusterOperationMetadata" 90 }; 91 } 92 93 // Starts a cluster in a project. 94 rpc StartCluster(StartClusterRequest) returns (google.longrunning.Operation) { 95 option (google.api.http) = { 96 post: "/v1/projects/{project_id}/regions/{region}/clusters/{cluster_name}:start" 97 body: "*" 98 }; 99 option (google.longrunning.operation_info) = { 100 response_type: "Cluster" 101 metadata_type: "google.cloud.dataproc.v1.ClusterOperationMetadata" 102 }; 103 } 104 105 // Deletes a cluster in a project. The returned 106 // [Operation.metadata][google.longrunning.Operation.metadata] will be 107 // [ClusterOperationMetadata](https://cloud.google.com/dataproc/docs/reference/rpc/google.cloud.dataproc.v1#clusteroperationmetadata). 108 rpc DeleteCluster(DeleteClusterRequest) 109 returns (google.longrunning.Operation) { 110 option (google.api.http) = { 111 delete: "/v1/projects/{project_id}/regions/{region}/clusters/{cluster_name}" 112 }; 113 option (google.api.method_signature) = "project_id,region,cluster_name"; 114 option (google.longrunning.operation_info) = { 115 response_type: "google.protobuf.Empty" 116 metadata_type: "google.cloud.dataproc.v1.ClusterOperationMetadata" 117 }; 118 } 119 120 // Gets the resource representation for a cluster in a project. 121 rpc GetCluster(GetClusterRequest) returns (Cluster) { 122 option (google.api.http) = { 123 get: "/v1/projects/{project_id}/regions/{region}/clusters/{cluster_name}" 124 }; 125 option (google.api.method_signature) = "project_id,region,cluster_name"; 126 } 127 128 // Lists all regions/{region}/clusters in a project alphabetically. 129 rpc ListClusters(ListClustersRequest) returns (ListClustersResponse) { 130 option (google.api.http) = { 131 get: "/v1/projects/{project_id}/regions/{region}/clusters" 132 }; 133 option (google.api.method_signature) = "project_id,region"; 134 option (google.api.method_signature) = "project_id,region,filter"; 135 } 136 137 // Gets cluster diagnostic information. The returned 138 // [Operation.metadata][google.longrunning.Operation.metadata] will be 139 // [ClusterOperationMetadata](https://cloud.google.com/dataproc/docs/reference/rpc/google.cloud.dataproc.v1#clusteroperationmetadata). 140 // After the operation completes, 141 // [Operation.response][google.longrunning.Operation.response] 142 // contains 143 // [DiagnoseClusterResults](https://cloud.google.com/dataproc/docs/reference/rpc/google.cloud.dataproc.v1#diagnoseclusterresults). 144 rpc DiagnoseCluster(DiagnoseClusterRequest) 145 returns (google.longrunning.Operation) { 146 option (google.api.http) = { 147 post: "/v1/projects/{project_id}/regions/{region}/clusters/{cluster_name}:diagnose" 148 body: "*" 149 }; 150 option (google.api.method_signature) = "project_id,region,cluster_name"; 151 option (google.longrunning.operation_info) = { 152 response_type: "DiagnoseClusterResults" 153 metadata_type: "google.cloud.dataproc.v1.ClusterOperationMetadata" 154 }; 155 } 156} 157 158// Describes the identifying information, config, and status of 159// a Dataproc cluster 160message Cluster { 161 // Required. The Google Cloud Platform project ID that the cluster belongs to. 162 string project_id = 1 [(google.api.field_behavior) = REQUIRED]; 163 164 // Required. The cluster name, which must be unique within a project. 165 // The name must start with a lowercase letter, and can contain 166 // up to 51 lowercase letters, numbers, and hyphens. It cannot end 167 // with a hyphen. The name of a deleted cluster can be reused. 168 string cluster_name = 2 [(google.api.field_behavior) = REQUIRED]; 169 170 // Optional. The cluster config for a cluster of Compute Engine Instances. 171 // Note that Dataproc may set default values, and values may change 172 // when clusters are updated. 173 // 174 // Exactly one of ClusterConfig or VirtualClusterConfig must be specified. 175 ClusterConfig config = 3 [(google.api.field_behavior) = OPTIONAL]; 176 177 // Optional. The virtual cluster config is used when creating a Dataproc 178 // cluster that does not directly control the underlying compute resources, 179 // for example, when creating a [Dataproc-on-GKE 180 // cluster](https://cloud.google.com/dataproc/docs/guides/dpgke/dataproc-gke-overview). 181 // Dataproc may set default values, and values may change when 182 // clusters are updated. Exactly one of 183 // [config][google.cloud.dataproc.v1.Cluster.config] or 184 // [virtual_cluster_config][google.cloud.dataproc.v1.Cluster.virtual_cluster_config] 185 // must be specified. 186 VirtualClusterConfig virtual_cluster_config = 10 187 [(google.api.field_behavior) = OPTIONAL]; 188 189 // Optional. The labels to associate with this cluster. 190 // Label **keys** must contain 1 to 63 characters, and must conform to 191 // [RFC 1035](https://www.ietf.org/rfc/rfc1035.txt). 192 // Label **values** may be empty, but, if present, must contain 1 to 63 193 // characters, and must conform to [RFC 194 // 1035](https://www.ietf.org/rfc/rfc1035.txt). No more than 32 labels can be 195 // associated with a cluster. 196 map<string, string> labels = 8 [(google.api.field_behavior) = OPTIONAL]; 197 198 // Output only. Cluster status. 199 ClusterStatus status = 4 [(google.api.field_behavior) = OUTPUT_ONLY]; 200 201 // Output only. The previous cluster status. 202 repeated ClusterStatus status_history = 7 203 [(google.api.field_behavior) = OUTPUT_ONLY]; 204 205 // Output only. A cluster UUID (Unique Universal Identifier). Dataproc 206 // generates this value when it creates the cluster. 207 string cluster_uuid = 6 [(google.api.field_behavior) = OUTPUT_ONLY]; 208 209 // Output only. Contains cluster daemon metrics such as HDFS and YARN stats. 210 // 211 // **Beta Feature**: This report is available for testing purposes only. It 212 // may be changed before final release. 213 ClusterMetrics metrics = 9 [(google.api.field_behavior) = OUTPUT_ONLY]; 214} 215 216// The cluster config. 217message ClusterConfig { 218 // Optional. A Cloud Storage bucket used to stage job 219 // dependencies, config files, and job driver console output. 220 // If you do not specify a staging bucket, Cloud 221 // Dataproc will determine a Cloud Storage location (US, 222 // ASIA, or EU) for your cluster's staging bucket according to the 223 // Compute Engine zone where your cluster is deployed, and then create 224 // and manage this project-level, per-location bucket (see 225 // [Dataproc staging and temp 226 // buckets](https://cloud.google.com/dataproc/docs/concepts/configuring-clusters/staging-bucket)). 227 // **This field requires a Cloud Storage bucket name, not a `gs://...` URI to 228 // a Cloud Storage bucket.** 229 string config_bucket = 1 [(google.api.field_behavior) = OPTIONAL]; 230 231 // Optional. A Cloud Storage bucket used to store ephemeral cluster and jobs 232 // data, such as Spark and MapReduce history files. If you do not specify a 233 // temp bucket, Dataproc will determine a Cloud Storage location (US, ASIA, or 234 // EU) for your cluster's temp bucket according to the Compute Engine zone 235 // where your cluster is deployed, and then create and manage this 236 // project-level, per-location bucket. The default bucket has a TTL of 90 237 // days, but you can use any TTL (or none) if you specify a bucket (see 238 // [Dataproc staging and temp 239 // buckets](https://cloud.google.com/dataproc/docs/concepts/configuring-clusters/staging-bucket)). 240 // **This field requires a Cloud Storage bucket name, not a `gs://...` URI to 241 // a Cloud Storage bucket.** 242 string temp_bucket = 2 [(google.api.field_behavior) = OPTIONAL]; 243 244 // Optional. The shared Compute Engine config settings for 245 // all instances in a cluster. 246 GceClusterConfig gce_cluster_config = 8 247 [(google.api.field_behavior) = OPTIONAL]; 248 249 // Optional. The Compute Engine config settings for 250 // the cluster's master instance. 251 InstanceGroupConfig master_config = 9 252 [(google.api.field_behavior) = OPTIONAL]; 253 254 // Optional. The Compute Engine config settings for 255 // the cluster's worker instances. 256 InstanceGroupConfig worker_config = 10 257 [(google.api.field_behavior) = OPTIONAL]; 258 259 // Optional. The Compute Engine config settings for 260 // a cluster's secondary worker instances 261 InstanceGroupConfig secondary_worker_config = 12 262 [(google.api.field_behavior) = OPTIONAL]; 263 264 // Optional. The config settings for cluster software. 265 SoftwareConfig software_config = 13 [(google.api.field_behavior) = OPTIONAL]; 266 267 // Optional. Commands to execute on each node after config is 268 // completed. By default, executables are run on master and all worker nodes. 269 // You can test a node's `role` metadata to run an executable on 270 // a master or worker node, as shown below using `curl` (you can also use 271 // `wget`): 272 // 273 // ROLE=$(curl -H Metadata-Flavor:Google 274 // http://metadata/computeMetadata/v1/instance/attributes/dataproc-role) 275 // if [[ "${ROLE}" == 'Master' ]]; then 276 // ... master specific actions ... 277 // else 278 // ... worker specific actions ... 279 // fi 280 repeated NodeInitializationAction initialization_actions = 11 281 [(google.api.field_behavior) = OPTIONAL]; 282 283 // Optional. Encryption settings for the cluster. 284 EncryptionConfig encryption_config = 15 285 [(google.api.field_behavior) = OPTIONAL]; 286 287 // Optional. Autoscaling config for the policy associated with the cluster. 288 // Cluster does not autoscale if this field is unset. 289 AutoscalingConfig autoscaling_config = 18 290 [(google.api.field_behavior) = OPTIONAL]; 291 292 // Optional. Security settings for the cluster. 293 SecurityConfig security_config = 16 [(google.api.field_behavior) = OPTIONAL]; 294 295 // Optional. Lifecycle setting for the cluster. 296 LifecycleConfig lifecycle_config = 17 297 [(google.api.field_behavior) = OPTIONAL]; 298 299 // Optional. Port/endpoint configuration for this cluster 300 EndpointConfig endpoint_config = 19 [(google.api.field_behavior) = OPTIONAL]; 301 302 // Optional. Metastore configuration. 303 MetastoreConfig metastore_config = 20 304 [(google.api.field_behavior) = OPTIONAL]; 305 306 // Optional. The config for Dataproc metrics. 307 DataprocMetricConfig dataproc_metric_config = 23 308 [(google.api.field_behavior) = OPTIONAL]; 309 310 // Optional. The node group settings. 311 repeated AuxiliaryNodeGroup auxiliary_node_groups = 25 312 [(google.api.field_behavior) = OPTIONAL]; 313} 314 315// The Dataproc cluster config for a cluster that does not directly control the 316// underlying compute resources, such as a [Dataproc-on-GKE 317// cluster](https://cloud.google.com/dataproc/docs/guides/dpgke/dataproc-gke-overview). 318message VirtualClusterConfig { 319 // Optional. A Cloud Storage bucket used to stage job 320 // dependencies, config files, and job driver console output. 321 // If you do not specify a staging bucket, Cloud 322 // Dataproc will determine a Cloud Storage location (US, 323 // ASIA, or EU) for your cluster's staging bucket according to the 324 // Compute Engine zone where your cluster is deployed, and then create 325 // and manage this project-level, per-location bucket (see 326 // [Dataproc staging and temp 327 // buckets](https://cloud.google.com/dataproc/docs/concepts/configuring-clusters/staging-bucket)). 328 // **This field requires a Cloud Storage bucket name, not a `gs://...` URI to 329 // a Cloud Storage bucket.** 330 string staging_bucket = 1 [(google.api.field_behavior) = OPTIONAL]; 331 332 oneof infrastructure_config { 333 // Required. The configuration for running the Dataproc cluster on 334 // Kubernetes. 335 KubernetesClusterConfig kubernetes_cluster_config = 6 336 [(google.api.field_behavior) = REQUIRED]; 337 } 338 339 // Optional. Configuration of auxiliary services used by this cluster. 340 AuxiliaryServicesConfig auxiliary_services_config = 7 341 [(google.api.field_behavior) = OPTIONAL]; 342} 343 344// Auxiliary services configuration for a Cluster. 345message AuxiliaryServicesConfig { 346 // Optional. The Hive Metastore configuration for this workload. 347 MetastoreConfig metastore_config = 1 [(google.api.field_behavior) = OPTIONAL]; 348 349 // Optional. The Spark History Server configuration for the workload. 350 SparkHistoryServerConfig spark_history_server_config = 2 351 [(google.api.field_behavior) = OPTIONAL]; 352} 353 354// Endpoint config for this cluster 355message EndpointConfig { 356 // Output only. The map of port descriptions to URLs. Will only be populated 357 // if enable_http_port_access is true. 358 map<string, string> http_ports = 1 359 [(google.api.field_behavior) = OUTPUT_ONLY]; 360 361 // Optional. If true, enable http access to specific ports on the cluster 362 // from external sources. Defaults to false. 363 bool enable_http_port_access = 2 [(google.api.field_behavior) = OPTIONAL]; 364} 365 366// Autoscaling Policy config associated with the cluster. 367message AutoscalingConfig { 368 // Optional. The autoscaling policy used by the cluster. 369 // 370 // Only resource names including projectid and location (region) are valid. 371 // Examples: 372 // 373 // * `https://www.googleapis.com/compute/v1/projects/[project_id]/locations/[dataproc_region]/autoscalingPolicies/[policy_id]` 374 // * `projects/[project_id]/locations/[dataproc_region]/autoscalingPolicies/[policy_id]` 375 // 376 // Note that the policy must be in the same project and Dataproc region. 377 string policy_uri = 1 [(google.api.field_behavior) = OPTIONAL]; 378} 379 380// Encryption settings for the cluster. 381message EncryptionConfig { 382 // Optional. The Cloud KMS key name to use for PD disk encryption for all 383 // instances in the cluster. 384 string gce_pd_kms_key_name = 1 [(google.api.field_behavior) = OPTIONAL]; 385} 386 387// Common config settings for resources of Compute Engine cluster 388// instances, applicable to all instances in the cluster. 389message GceClusterConfig { 390 // `PrivateIpv6GoogleAccess` controls whether and how Dataproc cluster nodes 391 // can communicate with Google Services through gRPC over IPv6. 392 // These values are directly mapped to corresponding values in the 393 // [Compute Engine Instance 394 // fields](https://cloud.google.com/compute/docs/reference/rest/v1/instances). 395 enum PrivateIpv6GoogleAccess { 396 // If unspecified, Compute Engine default behavior will apply, which 397 // is the same as 398 // [INHERIT_FROM_SUBNETWORK][google.cloud.dataproc.v1.GceClusterConfig.PrivateIpv6GoogleAccess.INHERIT_FROM_SUBNETWORK]. 399 PRIVATE_IPV6_GOOGLE_ACCESS_UNSPECIFIED = 0; 400 401 // Private access to and from Google Services configuration 402 // inherited from the subnetwork configuration. This is the 403 // default Compute Engine behavior. 404 INHERIT_FROM_SUBNETWORK = 1; 405 406 // Enables outbound private IPv6 access to Google Services from the Dataproc 407 // cluster. 408 OUTBOUND = 2; 409 410 // Enables bidirectional private IPv6 access between Google Services and the 411 // Dataproc cluster. 412 BIDIRECTIONAL = 3; 413 } 414 415 // Optional. The Compute Engine zone where the Dataproc cluster will be 416 // located. If omitted, the service will pick a zone in the cluster's Compute 417 // Engine region. On a get request, zone will always be present. 418 // 419 // A full URL, partial URI, or short name are valid. Examples: 420 // 421 // * `https://www.googleapis.com/compute/v1/projects/[project_id]/zones/[zone]` 422 // * `projects/[project_id]/zones/[zone]` 423 // * `[zone]` 424 string zone_uri = 1 [(google.api.field_behavior) = OPTIONAL]; 425 426 // Optional. The Compute Engine network to be used for machine 427 // communications. Cannot be specified with subnetwork_uri. If neither 428 // `network_uri` nor `subnetwork_uri` is specified, the "default" network of 429 // the project is used, if it exists. Cannot be a "Custom Subnet Network" (see 430 // [Using Subnetworks](https://cloud.google.com/compute/docs/subnetworks) for 431 // more information). 432 // 433 // A full URL, partial URI, or short name are valid. Examples: 434 // 435 // * `https://www.googleapis.com/compute/v1/projects/[project_id]/global/networks/default` 436 // * `projects/[project_id]/global/networks/default` 437 // * `default` 438 string network_uri = 2 [(google.api.field_behavior) = OPTIONAL]; 439 440 // Optional. The Compute Engine subnetwork to be used for machine 441 // communications. Cannot be specified with network_uri. 442 // 443 // A full URL, partial URI, or short name are valid. Examples: 444 // 445 // * `https://www.googleapis.com/compute/v1/projects/[project_id]/regions/[region]/subnetworks/sub0` 446 // * `projects/[project_id]/regions/[region]/subnetworks/sub0` 447 // * `sub0` 448 string subnetwork_uri = 6 [(google.api.field_behavior) = OPTIONAL]; 449 450 // Optional. If true, all instances in the cluster will only have internal IP 451 // addresses. By default, clusters are not restricted to internal IP 452 // addresses, and will have ephemeral external IP addresses assigned to each 453 // instance. This `internal_ip_only` restriction can only be enabled for 454 // subnetwork enabled networks, and all off-cluster dependencies must be 455 // configured to be accessible without external IP addresses. 456 optional bool internal_ip_only = 7 [(google.api.field_behavior) = OPTIONAL]; 457 458 // Optional. The type of IPv6 access for a cluster. 459 PrivateIpv6GoogleAccess private_ipv6_google_access = 12 460 [(google.api.field_behavior) = OPTIONAL]; 461 462 // Optional. The [Dataproc service 463 // account](https://cloud.google.com/dataproc/docs/concepts/configuring-clusters/service-accounts#service_accounts_in_dataproc) 464 // (also see [VM Data Plane 465 // identity](https://cloud.google.com/dataproc/docs/concepts/iam/dataproc-principals#vm_service_account_data_plane_identity)) 466 // used by Dataproc cluster VM instances to access Google Cloud Platform 467 // services. 468 // 469 // If not specified, the 470 // [Compute Engine default service 471 // account](https://cloud.google.com/compute/docs/access/service-accounts#default_service_account) 472 // is used. 473 string service_account = 8 [(google.api.field_behavior) = OPTIONAL]; 474 475 // Optional. The URIs of service account scopes to be included in 476 // Compute Engine instances. The following base set of scopes is always 477 // included: 478 // 479 // * https://www.googleapis.com/auth/cloud.useraccounts.readonly 480 // * https://www.googleapis.com/auth/devstorage.read_write 481 // * https://www.googleapis.com/auth/logging.write 482 // 483 // If no scopes are specified, the following defaults are also provided: 484 // 485 // * https://www.googleapis.com/auth/bigquery 486 // * https://www.googleapis.com/auth/bigtable.admin.table 487 // * https://www.googleapis.com/auth/bigtable.data 488 // * https://www.googleapis.com/auth/devstorage.full_control 489 repeated string service_account_scopes = 3 490 [(google.api.field_behavior) = OPTIONAL]; 491 492 // The Compute Engine tags to add to all instances (see [Tagging 493 // instances](https://cloud.google.com/compute/docs/label-or-tag-resources#tags)). 494 repeated string tags = 4; 495 496 // Optional. The Compute Engine metadata entries to add to all instances (see 497 // [Project and instance 498 // metadata](https://cloud.google.com/compute/docs/storing-retrieving-metadata#project_and_instance_metadata)). 499 map<string, string> metadata = 5 [(google.api.field_behavior) = OPTIONAL]; 500 501 // Optional. Reservation Affinity for consuming Zonal reservation. 502 ReservationAffinity reservation_affinity = 11 503 [(google.api.field_behavior) = OPTIONAL]; 504 505 // Optional. Node Group Affinity for sole-tenant clusters. 506 NodeGroupAffinity node_group_affinity = 13 507 [(google.api.field_behavior) = OPTIONAL]; 508 509 // Optional. Shielded Instance Config for clusters using [Compute Engine 510 // Shielded 511 // VMs](https://cloud.google.com/security/shielded-cloud/shielded-vm). 512 ShieldedInstanceConfig shielded_instance_config = 14 513 [(google.api.field_behavior) = OPTIONAL]; 514 515 // Optional. Confidential Instance Config for clusters using [Confidential 516 // VMs](https://cloud.google.com/compute/confidential-vm/docs). 517 ConfidentialInstanceConfig confidential_instance_config = 15 518 [(google.api.field_behavior) = OPTIONAL]; 519} 520 521// Node Group Affinity for clusters using sole-tenant node groups. 522// **The Dataproc `NodeGroupAffinity` resource is not related to the 523// Dataproc [NodeGroup][google.cloud.dataproc.v1.NodeGroup] resource.** 524message NodeGroupAffinity { 525 // Required. The URI of a 526 // sole-tenant [node group 527 // resource](https://cloud.google.com/compute/docs/reference/rest/v1/nodeGroups) 528 // that the cluster will be created on. 529 // 530 // A full URL, partial URI, or node group name are valid. Examples: 531 // 532 // * `https://www.googleapis.com/compute/v1/projects/[project_id]/zones/[zone]/nodeGroups/node-group-1` 533 // * `projects/[project_id]/zones/[zone]/nodeGroups/node-group-1` 534 // * `node-group-1` 535 string node_group_uri = 1 [(google.api.field_behavior) = REQUIRED]; 536} 537 538// Shielded Instance Config for clusters using [Compute Engine Shielded 539// VMs](https://cloud.google.com/security/shielded-cloud/shielded-vm). 540message ShieldedInstanceConfig { 541 // Optional. Defines whether instances have Secure Boot enabled. 542 optional bool enable_secure_boot = 1 [(google.api.field_behavior) = OPTIONAL]; 543 544 // Optional. Defines whether instances have the vTPM enabled. 545 optional bool enable_vtpm = 2 [(google.api.field_behavior) = OPTIONAL]; 546 547 // Optional. Defines whether instances have integrity monitoring enabled. 548 optional bool enable_integrity_monitoring = 3 549 [(google.api.field_behavior) = OPTIONAL]; 550} 551 552// Confidential Instance Config for clusters using [Confidential 553// VMs](https://cloud.google.com/compute/confidential-vm/docs) 554message ConfidentialInstanceConfig { 555 // Optional. Defines whether the instance should have confidential compute 556 // enabled. 557 bool enable_confidential_compute = 1 [(google.api.field_behavior) = OPTIONAL]; 558} 559 560// The config settings for Compute Engine resources in 561// an instance group, such as a master or worker group. 562message InstanceGroupConfig { 563 // Controls the use of preemptible instances within the group. 564 enum Preemptibility { 565 // Preemptibility is unspecified, the system will choose the 566 // appropriate setting for each instance group. 567 PREEMPTIBILITY_UNSPECIFIED = 0; 568 569 // Instances are non-preemptible. 570 // 571 // This option is allowed for all instance groups and is the only valid 572 // value for Master and Worker instance groups. 573 NON_PREEMPTIBLE = 1; 574 575 // Instances are [preemptible] 576 // (https://cloud.google.com/compute/docs/instances/preemptible). 577 // 578 // This option is allowed only for [secondary worker] 579 // (https://cloud.google.com/dataproc/docs/concepts/compute/secondary-vms) 580 // groups. 581 PREEMPTIBLE = 2; 582 583 // Instances are [Spot VMs] 584 // (https://cloud.google.com/compute/docs/instances/spot). 585 // 586 // This option is allowed only for [secondary worker] 587 // (https://cloud.google.com/dataproc/docs/concepts/compute/secondary-vms) 588 // groups. Spot VMs are the latest version of [preemptible VMs] 589 // (https://cloud.google.com/compute/docs/instances/preemptible), and 590 // provide additional features. 591 SPOT = 3; 592 } 593 594 // Optional. The number of VM instances in the instance group. 595 // For [HA 596 // cluster](/dataproc/docs/concepts/configuring-clusters/high-availability) 597 // [master_config](#FIELDS.master_config) groups, **must be set to 3**. 598 // For standard cluster [master_config](#FIELDS.master_config) groups, 599 // **must be set to 1**. 600 int32 num_instances = 1 [(google.api.field_behavior) = OPTIONAL]; 601 602 // Output only. The list of instance names. Dataproc derives the names 603 // from `cluster_name`, `num_instances`, and the instance group. 604 repeated string instance_names = 2 605 [(google.api.field_behavior) = OUTPUT_ONLY]; 606 607 // Output only. List of references to Compute Engine instances. 608 repeated InstanceReference instance_references = 11 609 [(google.api.field_behavior) = OUTPUT_ONLY]; 610 611 // Optional. The Compute Engine image resource used for cluster instances. 612 // 613 // The URI can represent an image or image family. 614 // 615 // Image examples: 616 // 617 // * `https://www.googleapis.com/compute/v1/projects/[project_id]/global/images/[image-id]` 618 // * `projects/[project_id]/global/images/[image-id]` 619 // * `image-id` 620 // 621 // Image family examples. Dataproc will use the most recent 622 // image from the family: 623 // 624 // * `https://www.googleapis.com/compute/v1/projects/[project_id]/global/images/family/[custom-image-family-name]` 625 // * `projects/[project_id]/global/images/family/[custom-image-family-name]` 626 // 627 // If the URI is unspecified, it will be inferred from 628 // `SoftwareConfig.image_version` or the system default. 629 string image_uri = 3 [(google.api.field_behavior) = OPTIONAL]; 630 631 // Optional. The Compute Engine machine type used for cluster instances. 632 // 633 // A full URL, partial URI, or short name are valid. Examples: 634 // 635 // * `https://www.googleapis.com/compute/v1/projects/[project_id]/zones/[zone]/machineTypes/n1-standard-2` 636 // * `projects/[project_id]/zones/[zone]/machineTypes/n1-standard-2` 637 // * `n1-standard-2` 638 // 639 // **Auto Zone Exception**: If you are using the Dataproc 640 // [Auto Zone 641 // Placement](https://cloud.google.com/dataproc/docs/concepts/configuring-clusters/auto-zone#using_auto_zone_placement) 642 // feature, you must use the short name of the machine type 643 // resource, for example, `n1-standard-2`. 644 string machine_type_uri = 4 [(google.api.field_behavior) = OPTIONAL]; 645 646 // Optional. Disk option config settings. 647 DiskConfig disk_config = 5 [(google.api.field_behavior) = OPTIONAL]; 648 649 // Output only. Specifies that this instance group contains preemptible 650 // instances. 651 bool is_preemptible = 6 [(google.api.field_behavior) = OUTPUT_ONLY]; 652 653 // Optional. Specifies the preemptibility of the instance group. 654 // 655 // The default value for master and worker groups is 656 // `NON_PREEMPTIBLE`. This default cannot be changed. 657 // 658 // The default value for secondary instances is 659 // `PREEMPTIBLE`. 660 Preemptibility preemptibility = 10 [(google.api.field_behavior) = OPTIONAL]; 661 662 // Output only. The config for Compute Engine Instance Group 663 // Manager that manages this group. 664 // This is only used for preemptible instance groups. 665 ManagedGroupConfig managed_group_config = 7 666 [(google.api.field_behavior) = OUTPUT_ONLY]; 667 668 // Optional. The Compute Engine accelerator configuration for these 669 // instances. 670 repeated AcceleratorConfig accelerators = 8 671 [(google.api.field_behavior) = OPTIONAL]; 672 673 // Optional. Specifies the minimum cpu platform for the Instance Group. 674 // See [Dataproc -> Minimum CPU 675 // Platform](https://cloud.google.com/dataproc/docs/concepts/compute/dataproc-min-cpu). 676 string min_cpu_platform = 9 [(google.api.field_behavior) = OPTIONAL]; 677 678 // Optional. The minimum number of primary worker instances to create. 679 // If `min_num_instances` is set, cluster creation will succeed if 680 // the number of primary workers created is at least equal to the 681 // `min_num_instances` number. 682 // 683 // Example: Cluster creation request with `num_instances` = `5` and 684 // `min_num_instances` = `3`: 685 // 686 // * If 4 VMs are created and 1 instance fails, 687 // the failed VM is deleted. The cluster is 688 // resized to 4 instances and placed in a `RUNNING` state. 689 // * If 2 instances are created and 3 instances fail, 690 // the cluster in placed in an `ERROR` state. The failed VMs 691 // are not deleted. 692 int32 min_num_instances = 12 [(google.api.field_behavior) = OPTIONAL]; 693 694 // Optional. Instance flexibility Policy allowing a mixture of VM shapes and 695 // provisioning models. 696 InstanceFlexibilityPolicy instance_flexibility_policy = 13 697 [(google.api.field_behavior) = OPTIONAL]; 698 699 // Optional. Configuration to handle the startup of instances during cluster 700 // create and update process. 701 StartupConfig startup_config = 14 [(google.api.field_behavior) = OPTIONAL]; 702} 703 704// Configuration to handle the startup of instances during cluster create and 705// update process. 706message StartupConfig { 707 // Optional. The config setting to enable cluster creation/ updation to be 708 // successful only after required_registration_fraction of instances are up 709 // and running. This configuration is applicable to only secondary workers for 710 // now. The cluster will fail if required_registration_fraction of instances 711 // are not available. This will include instance creation, agent registration, 712 // and service registration (if enabled). 713 optional double required_registration_fraction = 1 714 [(google.api.field_behavior) = OPTIONAL]; 715} 716 717// A reference to a Compute Engine instance. 718message InstanceReference { 719 // The user-friendly name of the Compute Engine instance. 720 string instance_name = 1; 721 722 // The unique identifier of the Compute Engine instance. 723 string instance_id = 2; 724 725 // The public RSA key used for sharing data with this instance. 726 string public_key = 3; 727 728 // The public ECIES key used for sharing data with this instance. 729 string public_ecies_key = 4; 730} 731 732// Specifies the resources used to actively manage an instance group. 733message ManagedGroupConfig { 734 // Output only. The name of the Instance Template used for the Managed 735 // Instance Group. 736 string instance_template_name = 1 [(google.api.field_behavior) = OUTPUT_ONLY]; 737 738 // Output only. The name of the Instance Group Manager for this group. 739 string instance_group_manager_name = 2 740 [(google.api.field_behavior) = OUTPUT_ONLY]; 741 742 // Output only. The partial URI to the instance group manager for this group. 743 // E.g. projects/my-project/regions/us-central1/instanceGroupManagers/my-igm. 744 string instance_group_manager_uri = 3 745 [(google.api.field_behavior) = OUTPUT_ONLY]; 746} 747 748// Instance flexibility Policy allowing a mixture of VM shapes and provisioning 749// models. 750message InstanceFlexibilityPolicy { 751 // Defines machines types and a rank to which the machines types belong. 752 message InstanceSelection { 753 // Optional. Full machine-type names, e.g. "n1-standard-16". 754 repeated string machine_types = 1 [(google.api.field_behavior) = OPTIONAL]; 755 756 // Optional. Preference of this instance selection. Lower number means 757 // higher preference. Dataproc will first try to create a VM based on the 758 // machine-type with priority rank and fallback to next rank based on 759 // availability. Machine types and instance selections with the same 760 // priority have the same preference. 761 int32 rank = 2 [(google.api.field_behavior) = OPTIONAL]; 762 } 763 764 // Defines a mapping from machine types to the number of VMs that are created 765 // with each machine type. 766 message InstanceSelectionResult { 767 // Output only. Full machine-type names, e.g. "n1-standard-16". 768 optional string machine_type = 1 769 [(google.api.field_behavior) = OUTPUT_ONLY]; 770 771 // Output only. Number of VM provisioned with the machine_type. 772 optional int32 vm_count = 2 [(google.api.field_behavior) = OUTPUT_ONLY]; 773 } 774 775 // Optional. List of instance selection options that the group will use when 776 // creating new VMs. 777 repeated InstanceSelection instance_selection_list = 2 778 [(google.api.field_behavior) = OPTIONAL]; 779 780 // Output only. A list of instance selection results in the group. 781 repeated InstanceSelectionResult instance_selection_results = 3 782 [(google.api.field_behavior) = OUTPUT_ONLY]; 783} 784 785// Specifies the type and number of accelerator cards attached to the instances 786// of an instance. See [GPUs on Compute 787// Engine](https://cloud.google.com/compute/docs/gpus/). 788message AcceleratorConfig { 789 // Full URL, partial URI, or short name of the accelerator type resource to 790 // expose to this instance. See 791 // [Compute Engine 792 // AcceleratorTypes](https://cloud.google.com/compute/docs/reference/v1/acceleratorTypes). 793 // 794 // Examples: 795 // 796 // * `https://www.googleapis.com/compute/v1/projects/[project_id]/zones/[zone]/acceleratorTypes/nvidia-tesla-k80` 797 // * `projects/[project_id]/zones/[zone]/acceleratorTypes/nvidia-tesla-k80` 798 // * `nvidia-tesla-k80` 799 // 800 // **Auto Zone Exception**: If you are using the Dataproc 801 // [Auto Zone 802 // Placement](https://cloud.google.com/dataproc/docs/concepts/configuring-clusters/auto-zone#using_auto_zone_placement) 803 // feature, you must use the short name of the accelerator type 804 // resource, for example, `nvidia-tesla-k80`. 805 string accelerator_type_uri = 1; 806 807 // The number of the accelerator cards of this type exposed to this instance. 808 int32 accelerator_count = 2; 809} 810 811// Specifies the config of disk options for a group of VM instances. 812message DiskConfig { 813 // Optional. Type of the boot disk (default is "pd-standard"). 814 // Valid values: "pd-balanced" (Persistent Disk Balanced Solid State Drive), 815 // "pd-ssd" (Persistent Disk Solid State Drive), 816 // or "pd-standard" (Persistent Disk Hard Disk Drive). 817 // See [Disk types](https://cloud.google.com/compute/docs/disks#disk-types). 818 string boot_disk_type = 3 [(google.api.field_behavior) = OPTIONAL]; 819 820 // Optional. Size in GB of the boot disk (default is 500GB). 821 int32 boot_disk_size_gb = 1 [(google.api.field_behavior) = OPTIONAL]; 822 823 // Optional. Number of attached SSDs, from 0 to 8 (default is 0). 824 // If SSDs are not attached, the boot disk is used to store runtime logs and 825 // [HDFS](https://hadoop.apache.org/docs/r1.2.1/hdfs_user_guide.html) data. 826 // If one or more SSDs are attached, this runtime bulk 827 // data is spread across them, and the boot disk contains only basic 828 // config and installed binaries. 829 // 830 // Note: Local SSD options may vary by machine type and number of vCPUs 831 // selected. 832 int32 num_local_ssds = 2 [(google.api.field_behavior) = OPTIONAL]; 833 834 // Optional. Interface type of local SSDs (default is "scsi"). 835 // Valid values: "scsi" (Small Computer System Interface), 836 // "nvme" (Non-Volatile Memory Express). 837 // See [local SSD 838 // performance](https://cloud.google.com/compute/docs/disks/local-ssd#performance). 839 string local_ssd_interface = 4 [(google.api.field_behavior) = OPTIONAL]; 840} 841 842// Node group identification and configuration information. 843message AuxiliaryNodeGroup { 844 // Required. Node group configuration. 845 NodeGroup node_group = 1 [(google.api.field_behavior) = REQUIRED]; 846 847 // Optional. A node group ID. Generated if not specified. 848 // 849 // The ID must contain only letters (a-z, A-Z), numbers (0-9), 850 // underscores (_), and hyphens (-). Cannot begin or end with underscore 851 // or hyphen. Must consist of from 3 to 33 characters. 852 string node_group_id = 2 [(google.api.field_behavior) = OPTIONAL]; 853} 854 855// Dataproc Node Group. 856// **The Dataproc `NodeGroup` resource is not related to the 857// Dataproc [NodeGroupAffinity][google.cloud.dataproc.v1.NodeGroupAffinity] 858// resource.** 859message NodeGroup { 860 option (google.api.resource) = { 861 type: "dataproc.googleapis.com/NodeGroup" 862 pattern: "projects/{project}/regions/{region}/clusters/{cluster}/nodeGroups/{node_group}" 863 }; 864 865 // Node pool roles. 866 enum Role { 867 // Required unspecified role. 868 ROLE_UNSPECIFIED = 0; 869 870 // Job drivers run on the node pool. 871 DRIVER = 1; 872 } 873 874 // The Node group [resource name](https://aip.dev/122). 875 string name = 1; 876 877 // Required. Node group roles. 878 repeated Role roles = 2 [(google.api.field_behavior) = REQUIRED]; 879 880 // Optional. The node group instance group configuration. 881 InstanceGroupConfig node_group_config = 3 882 [(google.api.field_behavior) = OPTIONAL]; 883 884 // Optional. Node group labels. 885 // 886 // * Label **keys** must consist of from 1 to 63 characters and conform to 887 // [RFC 1035](https://www.ietf.org/rfc/rfc1035.txt). 888 // * Label **values** can be empty. If specified, they must consist of from 889 // 1 to 63 characters and conform to [RFC 1035] 890 // (https://www.ietf.org/rfc/rfc1035.txt). 891 // * The node group must have no more than 32 labels. 892 map<string, string> labels = 4 [(google.api.field_behavior) = OPTIONAL]; 893} 894 895// Specifies an executable to run on a fully configured node and a 896// timeout period for executable completion. 897message NodeInitializationAction { 898 // Required. Cloud Storage URI of executable file. 899 string executable_file = 1 [(google.api.field_behavior) = REQUIRED]; 900 901 // Optional. Amount of time executable has to complete. Default is 902 // 10 minutes (see JSON representation of 903 // [Duration](https://developers.google.com/protocol-buffers/docs/proto3#json)). 904 // 905 // Cluster creation fails with an explanatory error message (the 906 // name of the executable that caused the error and the exceeded timeout 907 // period) if the executable is not completed at end of the timeout period. 908 google.protobuf.Duration execution_timeout = 2 909 [(google.api.field_behavior) = OPTIONAL]; 910} 911 912// The status of a cluster and its instances. 913message ClusterStatus { 914 // The cluster state. 915 enum State { 916 // The cluster state is unknown. 917 UNKNOWN = 0; 918 919 // The cluster is being created and set up. It is not ready for use. 920 CREATING = 1; 921 922 // The cluster is currently running and healthy. It is ready for use. 923 // 924 // **Note:** The cluster state changes from "creating" to "running" status 925 // after the master node(s), first two primary worker nodes (and the last 926 // primary worker node if primary workers > 2) are running. 927 RUNNING = 2; 928 929 // The cluster encountered an error. It is not ready for use. 930 ERROR = 3; 931 932 // The cluster has encountered an error while being updated. Jobs can 933 // be submitted to the cluster, but the cluster cannot be updated. 934 ERROR_DUE_TO_UPDATE = 9; 935 936 // The cluster is being deleted. It cannot be used. 937 DELETING = 4; 938 939 // The cluster is being updated. It continues to accept and process jobs. 940 UPDATING = 5; 941 942 // The cluster is being stopped. It cannot be used. 943 STOPPING = 6; 944 945 // The cluster is currently stopped. It is not ready for use. 946 STOPPED = 7; 947 948 // The cluster is being started. It is not ready for use. 949 STARTING = 8; 950 951 // The cluster is being repaired. It is not ready for use. 952 REPAIRING = 10; 953 } 954 955 // The cluster substate. 956 enum Substate { 957 // The cluster substate is unknown. 958 UNSPECIFIED = 0; 959 960 // The cluster is known to be in an unhealthy state 961 // (for example, critical daemons are not running or HDFS capacity is 962 // exhausted). 963 // 964 // Applies to RUNNING state. 965 UNHEALTHY = 1; 966 967 // The agent-reported status is out of date (may occur if 968 // Dataproc loses communication with Agent). 969 // 970 // Applies to RUNNING state. 971 STALE_STATUS = 2; 972 } 973 974 // Output only. The cluster's state. 975 State state = 1 [(google.api.field_behavior) = OUTPUT_ONLY]; 976 977 // Optional. Output only. Details of cluster's state. 978 string detail = 2 [ 979 (google.api.field_behavior) = OUTPUT_ONLY, 980 (google.api.field_behavior) = OPTIONAL 981 ]; 982 983 // Output only. Time when this state was entered (see JSON representation of 984 // [Timestamp](https://developers.google.com/protocol-buffers/docs/proto3#json)). 985 google.protobuf.Timestamp state_start_time = 3 986 [(google.api.field_behavior) = OUTPUT_ONLY]; 987 988 // Output only. Additional state information that includes 989 // status reported by the agent. 990 Substate substate = 4 [(google.api.field_behavior) = OUTPUT_ONLY]; 991} 992 993// Security related configuration, including encryption, Kerberos, etc. 994message SecurityConfig { 995 // Optional. Kerberos related configuration. 996 KerberosConfig kerberos_config = 1 [(google.api.field_behavior) = OPTIONAL]; 997 998 // Optional. Identity related configuration, including service account based 999 // secure multi-tenancy user mappings. 1000 IdentityConfig identity_config = 2 [(google.api.field_behavior) = OPTIONAL]; 1001} 1002 1003// Specifies Kerberos related configuration. 1004message KerberosConfig { 1005 // Optional. Flag to indicate whether to Kerberize the cluster (default: 1006 // false). Set this field to true to enable Kerberos on a cluster. 1007 bool enable_kerberos = 1 [(google.api.field_behavior) = OPTIONAL]; 1008 1009 // Optional. The Cloud Storage URI of a KMS encrypted file containing the root 1010 // principal password. 1011 string root_principal_password_uri = 2 1012 [(google.api.field_behavior) = OPTIONAL]; 1013 1014 // Optional. The uri of the KMS key used to encrypt various sensitive 1015 // files. 1016 string kms_key_uri = 3 [(google.api.field_behavior) = OPTIONAL]; 1017 1018 // Optional. The Cloud Storage URI of the keystore file used for SSL 1019 // encryption. If not provided, Dataproc will provide a self-signed 1020 // certificate. 1021 string keystore_uri = 4 [(google.api.field_behavior) = OPTIONAL]; 1022 1023 // Optional. The Cloud Storage URI of the truststore file used for SSL 1024 // encryption. If not provided, Dataproc will provide a self-signed 1025 // certificate. 1026 string truststore_uri = 5 [(google.api.field_behavior) = OPTIONAL]; 1027 1028 // Optional. The Cloud Storage URI of a KMS encrypted file containing the 1029 // password to the user provided keystore. For the self-signed certificate, 1030 // this password is generated by Dataproc. 1031 string keystore_password_uri = 6 [(google.api.field_behavior) = OPTIONAL]; 1032 1033 // Optional. The Cloud Storage URI of a KMS encrypted file containing the 1034 // password to the user provided key. For the self-signed certificate, this 1035 // password is generated by Dataproc. 1036 string key_password_uri = 7 [(google.api.field_behavior) = OPTIONAL]; 1037 1038 // Optional. The Cloud Storage URI of a KMS encrypted file containing the 1039 // password to the user provided truststore. For the self-signed certificate, 1040 // this password is generated by Dataproc. 1041 string truststore_password_uri = 8 [(google.api.field_behavior) = OPTIONAL]; 1042 1043 // Optional. The remote realm the Dataproc on-cluster KDC will trust, should 1044 // the user enable cross realm trust. 1045 string cross_realm_trust_realm = 9 [(google.api.field_behavior) = OPTIONAL]; 1046 1047 // Optional. The KDC (IP or hostname) for the remote trusted realm in a cross 1048 // realm trust relationship. 1049 string cross_realm_trust_kdc = 10 [(google.api.field_behavior) = OPTIONAL]; 1050 1051 // Optional. The admin server (IP or hostname) for the remote trusted realm in 1052 // a cross realm trust relationship. 1053 string cross_realm_trust_admin_server = 11 1054 [(google.api.field_behavior) = OPTIONAL]; 1055 1056 // Optional. The Cloud Storage URI of a KMS encrypted file containing the 1057 // shared password between the on-cluster Kerberos realm and the remote 1058 // trusted realm, in a cross realm trust relationship. 1059 string cross_realm_trust_shared_password_uri = 12 1060 [(google.api.field_behavior) = OPTIONAL]; 1061 1062 // Optional. The Cloud Storage URI of a KMS encrypted file containing the 1063 // master key of the KDC database. 1064 string kdc_db_key_uri = 13 [(google.api.field_behavior) = OPTIONAL]; 1065 1066 // Optional. The lifetime of the ticket granting ticket, in hours. 1067 // If not specified, or user specifies 0, then default value 10 1068 // will be used. 1069 int32 tgt_lifetime_hours = 14 [(google.api.field_behavior) = OPTIONAL]; 1070 1071 // Optional. The name of the on-cluster Kerberos realm. 1072 // If not specified, the uppercased domain of hostnames will be the realm. 1073 string realm = 15 [(google.api.field_behavior) = OPTIONAL]; 1074} 1075 1076// Identity related configuration, including service account based 1077// secure multi-tenancy user mappings. 1078message IdentityConfig { 1079 // Required. Map of user to service account. 1080 map<string, string> user_service_account_mapping = 1 1081 [(google.api.field_behavior) = REQUIRED]; 1082} 1083 1084// Specifies the selection and config of software inside the cluster. 1085message SoftwareConfig { 1086 // Optional. The version of software inside the cluster. It must be one of the 1087 // supported [Dataproc 1088 // Versions](https://cloud.google.com/dataproc/docs/concepts/versioning/dataproc-versions#supported_dataproc_versions), 1089 // such as "1.2" (including a subminor version, such as "1.2.29"), or the 1090 // ["preview" 1091 // version](https://cloud.google.com/dataproc/docs/concepts/versioning/dataproc-versions#other_versions). 1092 // If unspecified, it defaults to the latest Debian version. 1093 string image_version = 1 [(google.api.field_behavior) = OPTIONAL]; 1094 1095 // Optional. The properties to set on daemon config files. 1096 // 1097 // Property keys are specified in `prefix:property` format, for example 1098 // `core:hadoop.tmp.dir`. The following are supported prefixes 1099 // and their mappings: 1100 // 1101 // * capacity-scheduler: `capacity-scheduler.xml` 1102 // * core: `core-site.xml` 1103 // * distcp: `distcp-default.xml` 1104 // * hdfs: `hdfs-site.xml` 1105 // * hive: `hive-site.xml` 1106 // * mapred: `mapred-site.xml` 1107 // * pig: `pig.properties` 1108 // * spark: `spark-defaults.conf` 1109 // * yarn: `yarn-site.xml` 1110 // 1111 // For more information, see [Cluster 1112 // properties](https://cloud.google.com/dataproc/docs/concepts/cluster-properties). 1113 map<string, string> properties = 2 [(google.api.field_behavior) = OPTIONAL]; 1114 1115 // Optional. The set of components to activate on the cluster. 1116 repeated Component optional_components = 3 1117 [(google.api.field_behavior) = OPTIONAL]; 1118} 1119 1120// Specifies the cluster auto-delete schedule configuration. 1121message LifecycleConfig { 1122 // Optional. The duration to keep the cluster alive while idling (when no jobs 1123 // are running). Passing this threshold will cause the cluster to be 1124 // deleted. Minimum value is 5 minutes; maximum value is 14 days (see JSON 1125 // representation of 1126 // [Duration](https://developers.google.com/protocol-buffers/docs/proto3#json)). 1127 google.protobuf.Duration idle_delete_ttl = 1 1128 [(google.api.field_behavior) = OPTIONAL]; 1129 1130 // Either the exact time the cluster should be deleted at or 1131 // the cluster maximum age. 1132 oneof ttl { 1133 // Optional. The time when cluster will be auto-deleted (see JSON 1134 // representation of 1135 // [Timestamp](https://developers.google.com/protocol-buffers/docs/proto3#json)). 1136 google.protobuf.Timestamp auto_delete_time = 2 1137 [(google.api.field_behavior) = OPTIONAL]; 1138 1139 // Optional. The lifetime duration of cluster. The cluster will be 1140 // auto-deleted at the end of this period. Minimum value is 10 minutes; 1141 // maximum value is 14 days (see JSON representation of 1142 // [Duration](https://developers.google.com/protocol-buffers/docs/proto3#json)). 1143 google.protobuf.Duration auto_delete_ttl = 3 1144 [(google.api.field_behavior) = OPTIONAL]; 1145 } 1146 1147 // Output only. The time when cluster became idle (most recent job finished) 1148 // and became eligible for deletion due to idleness (see JSON representation 1149 // of 1150 // [Timestamp](https://developers.google.com/protocol-buffers/docs/proto3#json)). 1151 google.protobuf.Timestamp idle_start_time = 4 1152 [(google.api.field_behavior) = OUTPUT_ONLY]; 1153} 1154 1155// Specifies a Metastore configuration. 1156message MetastoreConfig { 1157 // Required. Resource name of an existing Dataproc Metastore service. 1158 // 1159 // Example: 1160 // 1161 // * `projects/[project_id]/locations/[dataproc_region]/services/[service-name]` 1162 string dataproc_metastore_service = 1 [ 1163 (google.api.field_behavior) = REQUIRED, 1164 (google.api.resource_reference) = { 1165 type: "metastore.googleapis.com/Service" 1166 } 1167 ]; 1168} 1169 1170// Contains cluster daemon metrics, such as HDFS and YARN stats. 1171// 1172// **Beta Feature**: This report is available for testing purposes only. It may 1173// be changed before final release. 1174message ClusterMetrics { 1175 // The HDFS metrics. 1176 map<string, int64> hdfs_metrics = 1; 1177 1178 // YARN metrics. 1179 map<string, int64> yarn_metrics = 2; 1180} 1181 1182// Dataproc metric config. 1183message DataprocMetricConfig { 1184 // A source for the collection of Dataproc custom metrics (see [Custom 1185 // metrics] 1186 // (https://cloud.google.com//dataproc/docs/guides/dataproc-metrics#custom_metrics)). 1187 enum MetricSource { 1188 // Required unspecified metric source. 1189 METRIC_SOURCE_UNSPECIFIED = 0; 1190 1191 // Monitoring agent metrics. If this source is enabled, 1192 // Dataproc enables the monitoring agent in Compute Engine, 1193 // and collects monitoring agent metrics, which are published 1194 // with an `agent.googleapis.com` prefix. 1195 MONITORING_AGENT_DEFAULTS = 1; 1196 1197 // HDFS metric source. 1198 HDFS = 2; 1199 1200 // Spark metric source. 1201 SPARK = 3; 1202 1203 // YARN metric source. 1204 YARN = 4; 1205 1206 // Spark History Server metric source. 1207 SPARK_HISTORY_SERVER = 5; 1208 1209 // Hiveserver2 metric source. 1210 HIVESERVER2 = 6; 1211 1212 // hivemetastore metric source 1213 HIVEMETASTORE = 7; 1214 } 1215 1216 // A Dataproc custom metric. 1217 message Metric { 1218 // Required. A standard set of metrics is collected unless `metricOverrides` 1219 // are specified for the metric source (see [Custom metrics] 1220 // (https://cloud.google.com/dataproc/docs/guides/dataproc-metrics#custom_metrics) 1221 // for more information). 1222 MetricSource metric_source = 1 [(google.api.field_behavior) = REQUIRED]; 1223 1224 // Optional. Specify one or more [Custom metrics] 1225 // (https://cloud.google.com/dataproc/docs/guides/dataproc-metrics#custom_metrics) 1226 // to collect for the metric course (for the `SPARK` metric source (any 1227 // [Spark metric] 1228 // (https://spark.apache.org/docs/latest/monitoring.html#metrics) can be 1229 // specified). 1230 // 1231 // Provide metrics in the following format: 1232 // <code><var>METRIC_SOURCE</var>:<var>INSTANCE</var>:<var>GROUP</var>:<var>METRIC</var></code> 1233 // Use camelcase as appropriate. 1234 // 1235 // Examples: 1236 // 1237 // ``` 1238 // yarn:ResourceManager:QueueMetrics:AppsCompleted 1239 // spark:driver:DAGScheduler:job.allJobs 1240 // sparkHistoryServer:JVM:Memory:NonHeapMemoryUsage.committed 1241 // hiveserver2:JVM:Memory:NonHeapMemoryUsage.used 1242 // ``` 1243 // 1244 // Notes: 1245 // 1246 // * Only the specified overridden metrics are collected for the 1247 // metric source. For example, if one or more `spark:executive` metrics 1248 // are listed as metric overrides, other `SPARK` metrics are not 1249 // collected. The collection of the metrics for other enabled custom 1250 // metric sources is unaffected. For example, if both `SPARK` andd `YARN` 1251 // metric sources are enabled, and overrides are provided for Spark 1252 // metrics only, all YARN metrics are collected. 1253 repeated string metric_overrides = 2 1254 [(google.api.field_behavior) = OPTIONAL]; 1255 } 1256 1257 // Required. Metrics sources to enable. 1258 repeated Metric metrics = 1 [(google.api.field_behavior) = REQUIRED]; 1259} 1260 1261// A request to create a cluster. 1262message CreateClusterRequest { 1263 // Required. The ID of the Google Cloud Platform project that the cluster 1264 // belongs to. 1265 string project_id = 1 [(google.api.field_behavior) = REQUIRED]; 1266 1267 // Required. The Dataproc region in which to handle the request. 1268 string region = 3 [(google.api.field_behavior) = REQUIRED]; 1269 1270 // Required. The cluster to create. 1271 Cluster cluster = 2 [(google.api.field_behavior) = REQUIRED]; 1272 1273 // Optional. A unique ID used to identify the request. If the server receives 1274 // two 1275 // [CreateClusterRequest](https://cloud.google.com/dataproc/docs/reference/rpc/google.cloud.dataproc.v1#google.cloud.dataproc.v1.CreateClusterRequest)s 1276 // with the same id, then the second request will be ignored and the 1277 // first [google.longrunning.Operation][google.longrunning.Operation] created 1278 // and stored in the backend is returned. 1279 // 1280 // It is recommended to always set this value to a 1281 // [UUID](https://en.wikipedia.org/wiki/Universally_unique_identifier). 1282 // 1283 // The ID must contain only letters (a-z, A-Z), numbers (0-9), 1284 // underscores (_), and hyphens (-). The maximum length is 40 characters. 1285 string request_id = 4 [(google.api.field_behavior) = OPTIONAL]; 1286 1287 // Optional. Failure action when primary worker creation fails. 1288 FailureAction action_on_failed_primary_workers = 5 1289 [(google.api.field_behavior) = OPTIONAL]; 1290} 1291 1292// A request to update a cluster. 1293message UpdateClusterRequest { 1294 // Required. The ID of the Google Cloud Platform project the 1295 // cluster belongs to. 1296 string project_id = 1 [(google.api.field_behavior) = REQUIRED]; 1297 1298 // Required. The Dataproc region in which to handle the request. 1299 string region = 5 [(google.api.field_behavior) = REQUIRED]; 1300 1301 // Required. The cluster name. 1302 string cluster_name = 2 [(google.api.field_behavior) = REQUIRED]; 1303 1304 // Required. The changes to the cluster. 1305 Cluster cluster = 3 [(google.api.field_behavior) = REQUIRED]; 1306 1307 // Optional. Timeout for graceful YARN decommissioning. Graceful 1308 // decommissioning allows removing nodes from the cluster without 1309 // interrupting jobs in progress. Timeout specifies how long to wait for jobs 1310 // in progress to finish before forcefully removing nodes (and potentially 1311 // interrupting jobs). Default timeout is 0 (for forceful decommission), and 1312 // the maximum allowed timeout is 1 day. (see JSON representation of 1313 // [Duration](https://developers.google.com/protocol-buffers/docs/proto3#json)). 1314 // 1315 // Only supported on Dataproc image versions 1.2 and higher. 1316 google.protobuf.Duration graceful_decommission_timeout = 6 1317 [(google.api.field_behavior) = OPTIONAL]; 1318 1319 // Required. Specifies the path, relative to `Cluster`, of 1320 // the field to update. For example, to change the number of workers 1321 // in a cluster to 5, the `update_mask` parameter would be 1322 // specified as `config.worker_config.num_instances`, 1323 // and the `PATCH` request body would specify the new value, as follows: 1324 // 1325 // { 1326 // "config":{ 1327 // "workerConfig":{ 1328 // "numInstances":"5" 1329 // } 1330 // } 1331 // } 1332 // Similarly, to change the number of preemptible workers in a cluster to 5, 1333 // the `update_mask` parameter would be 1334 // `config.secondary_worker_config.num_instances`, and the `PATCH` request 1335 // body would be set as follows: 1336 // 1337 // { 1338 // "config":{ 1339 // "secondaryWorkerConfig":{ 1340 // "numInstances":"5" 1341 // } 1342 // } 1343 // } 1344 // <strong>Note:</strong> Currently, only the following fields can be updated: 1345 // 1346 // <table> 1347 // <tbody> 1348 // <tr> 1349 // <td><strong>Mask</strong></td> 1350 // <td><strong>Purpose</strong></td> 1351 // </tr> 1352 // <tr> 1353 // <td><strong><em>labels</em></strong></td> 1354 // <td>Update labels</td> 1355 // </tr> 1356 // <tr> 1357 // <td><strong><em>config.worker_config.num_instances</em></strong></td> 1358 // <td>Resize primary worker group</td> 1359 // </tr> 1360 // <tr> 1361 // <td><strong><em>config.secondary_worker_config.num_instances</em></strong></td> 1362 // <td>Resize secondary worker group</td> 1363 // </tr> 1364 // <tr> 1365 // <td>config.autoscaling_config.policy_uri</td><td>Use, stop using, or 1366 // change autoscaling policies</td> 1367 // </tr> 1368 // </tbody> 1369 // </table> 1370 google.protobuf.FieldMask update_mask = 4 1371 [(google.api.field_behavior) = REQUIRED]; 1372 1373 // Optional. A unique ID used to identify the request. If the server 1374 // receives two 1375 // [UpdateClusterRequest](https://cloud.google.com/dataproc/docs/reference/rpc/google.cloud.dataproc.v1#google.cloud.dataproc.v1.UpdateClusterRequest)s 1376 // with the same id, then the second request will be ignored and the 1377 // first [google.longrunning.Operation][google.longrunning.Operation] created 1378 // and stored in the backend is returned. 1379 // 1380 // It is recommended to always set this value to a 1381 // [UUID](https://en.wikipedia.org/wiki/Universally_unique_identifier). 1382 // 1383 // The ID must contain only letters (a-z, A-Z), numbers (0-9), 1384 // underscores (_), and hyphens (-). The maximum length is 40 characters. 1385 string request_id = 7 [(google.api.field_behavior) = OPTIONAL]; 1386} 1387 1388// A request to stop a cluster. 1389message StopClusterRequest { 1390 // Required. The ID of the Google Cloud Platform project the 1391 // cluster belongs to. 1392 string project_id = 1 [(google.api.field_behavior) = REQUIRED]; 1393 1394 // Required. The Dataproc region in which to handle the request. 1395 string region = 2 [(google.api.field_behavior) = REQUIRED]; 1396 1397 // Required. The cluster name. 1398 string cluster_name = 3 [(google.api.field_behavior) = REQUIRED]; 1399 1400 // Optional. Specifying the `cluster_uuid` means the RPC will fail 1401 // (with error NOT_FOUND) if a cluster with the specified UUID does not exist. 1402 string cluster_uuid = 4 [(google.api.field_behavior) = OPTIONAL]; 1403 1404 // Optional. A unique ID used to identify the request. If the server 1405 // receives two 1406 // [StopClusterRequest](https://cloud.google.com/dataproc/docs/reference/rpc/google.cloud.dataproc.v1#google.cloud.dataproc.v1.StopClusterRequest)s 1407 // with the same id, then the second request will be ignored and the 1408 // first [google.longrunning.Operation][google.longrunning.Operation] created 1409 // and stored in the backend is returned. 1410 // 1411 // Recommendation: Set this value to a 1412 // [UUID](https://en.wikipedia.org/wiki/Universally_unique_identifier). 1413 // 1414 // The ID must contain only letters (a-z, A-Z), numbers (0-9), 1415 // underscores (_), and hyphens (-). The maximum length is 40 characters. 1416 string request_id = 5 [(google.api.field_behavior) = OPTIONAL]; 1417} 1418 1419// A request to start a cluster. 1420message StartClusterRequest { 1421 // Required. The ID of the Google Cloud Platform project the 1422 // cluster belongs to. 1423 string project_id = 1 [(google.api.field_behavior) = REQUIRED]; 1424 1425 // Required. The Dataproc region in which to handle the request. 1426 string region = 2 [(google.api.field_behavior) = REQUIRED]; 1427 1428 // Required. The cluster name. 1429 string cluster_name = 3 [(google.api.field_behavior) = REQUIRED]; 1430 1431 // Optional. Specifying the `cluster_uuid` means the RPC will fail 1432 // (with error NOT_FOUND) if a cluster with the specified UUID does not exist. 1433 string cluster_uuid = 4 [(google.api.field_behavior) = OPTIONAL]; 1434 1435 // Optional. A unique ID used to identify the request. If the server 1436 // receives two 1437 // [StartClusterRequest](https://cloud.google.com/dataproc/docs/reference/rpc/google.cloud.dataproc.v1#google.cloud.dataproc.v1.StartClusterRequest)s 1438 // with the same id, then the second request will be ignored and the 1439 // first [google.longrunning.Operation][google.longrunning.Operation] created 1440 // and stored in the backend is returned. 1441 // 1442 // Recommendation: Set this value to a 1443 // [UUID](https://en.wikipedia.org/wiki/Universally_unique_identifier). 1444 // 1445 // The ID must contain only letters (a-z, A-Z), numbers (0-9), 1446 // underscores (_), and hyphens (-). The maximum length is 40 characters. 1447 string request_id = 5 [(google.api.field_behavior) = OPTIONAL]; 1448} 1449 1450// A request to delete a cluster. 1451message DeleteClusterRequest { 1452 // Required. The ID of the Google Cloud Platform project that the cluster 1453 // belongs to. 1454 string project_id = 1 [(google.api.field_behavior) = REQUIRED]; 1455 1456 // Required. The Dataproc region in which to handle the request. 1457 string region = 3 [(google.api.field_behavior) = REQUIRED]; 1458 1459 // Required. The cluster name. 1460 string cluster_name = 2 [(google.api.field_behavior) = REQUIRED]; 1461 1462 // Optional. Specifying the `cluster_uuid` means the RPC should fail 1463 // (with error NOT_FOUND) if cluster with specified UUID does not exist. 1464 string cluster_uuid = 4 [(google.api.field_behavior) = OPTIONAL]; 1465 1466 // Optional. A unique ID used to identify the request. If the server 1467 // receives two 1468 // [DeleteClusterRequest](https://cloud.google.com/dataproc/docs/reference/rpc/google.cloud.dataproc.v1#google.cloud.dataproc.v1.DeleteClusterRequest)s 1469 // with the same id, then the second request will be ignored and the 1470 // first [google.longrunning.Operation][google.longrunning.Operation] created 1471 // and stored in the backend is returned. 1472 // 1473 // It is recommended to always set this value to a 1474 // [UUID](https://en.wikipedia.org/wiki/Universally_unique_identifier). 1475 // 1476 // The ID must contain only letters (a-z, A-Z), numbers (0-9), 1477 // underscores (_), and hyphens (-). The maximum length is 40 characters. 1478 string request_id = 5 [(google.api.field_behavior) = OPTIONAL]; 1479} 1480 1481// Request to get the resource representation for a cluster in a project. 1482message GetClusterRequest { 1483 // Required. The ID of the Google Cloud Platform project that the cluster 1484 // belongs to. 1485 string project_id = 1 [(google.api.field_behavior) = REQUIRED]; 1486 1487 // Required. The Dataproc region in which to handle the request. 1488 string region = 3 [(google.api.field_behavior) = REQUIRED]; 1489 1490 // Required. The cluster name. 1491 string cluster_name = 2 [(google.api.field_behavior) = REQUIRED]; 1492} 1493 1494// A request to list the clusters in a project. 1495message ListClustersRequest { 1496 // Required. The ID of the Google Cloud Platform project that the cluster 1497 // belongs to. 1498 string project_id = 1 [(google.api.field_behavior) = REQUIRED]; 1499 1500 // Required. The Dataproc region in which to handle the request. 1501 string region = 4 [(google.api.field_behavior) = REQUIRED]; 1502 1503 // Optional. A filter constraining the clusters to list. Filters are 1504 // case-sensitive and have the following syntax: 1505 // 1506 // field = value [AND [field = value]] ... 1507 // 1508 // where **field** is one of `status.state`, `clusterName`, or `labels.[KEY]`, 1509 // and `[KEY]` is a label key. **value** can be `*` to match all values. 1510 // `status.state` can be one of the following: `ACTIVE`, `INACTIVE`, 1511 // `CREATING`, `RUNNING`, `ERROR`, `DELETING`, or `UPDATING`. `ACTIVE` 1512 // contains the `CREATING`, `UPDATING`, and `RUNNING` states. `INACTIVE` 1513 // contains the `DELETING` and `ERROR` states. 1514 // `clusterName` is the name of the cluster provided at creation time. 1515 // Only the logical `AND` operator is supported; space-separated items are 1516 // treated as having an implicit `AND` operator. 1517 // 1518 // Example filter: 1519 // 1520 // status.state = ACTIVE AND clusterName = mycluster 1521 // AND labels.env = staging AND labels.starred = * 1522 string filter = 5 [(google.api.field_behavior) = OPTIONAL]; 1523 1524 // Optional. The standard List page size. 1525 int32 page_size = 2 [(google.api.field_behavior) = OPTIONAL]; 1526 1527 // Optional. The standard List page token. 1528 string page_token = 3 [(google.api.field_behavior) = OPTIONAL]; 1529} 1530 1531// The list of all clusters in a project. 1532message ListClustersResponse { 1533 // Output only. The clusters in the project. 1534 repeated Cluster clusters = 1 [(google.api.field_behavior) = OUTPUT_ONLY]; 1535 1536 // Output only. This token is included in the response if there are more 1537 // results to fetch. To fetch additional results, provide this value as the 1538 // `page_token` in a subsequent `ListClustersRequest`. 1539 string next_page_token = 2 [(google.api.field_behavior) = OUTPUT_ONLY]; 1540} 1541 1542// A request to collect cluster diagnostic information. 1543message DiagnoseClusterRequest { 1544 // Required. The ID of the Google Cloud Platform project that the cluster 1545 // belongs to. 1546 string project_id = 1 [(google.api.field_behavior) = REQUIRED]; 1547 1548 // Required. The Dataproc region in which to handle the request. 1549 string region = 3 [(google.api.field_behavior) = REQUIRED]; 1550 1551 // Required. The cluster name. 1552 string cluster_name = 2 [(google.api.field_behavior) = REQUIRED]; 1553 1554 // Optional. The output Cloud Storage directory for the diagnostic 1555 // tarball. If not specified, a task-specific directory in the cluster's 1556 // staging bucket will be used. 1557 string tarball_gcs_dir = 4 [(google.api.field_behavior) = OPTIONAL]; 1558 1559 // Optional. Time interval in which diagnosis should be carried out on the 1560 // cluster. 1561 google.type.Interval diagnosis_interval = 6 1562 [(google.api.field_behavior) = OPTIONAL]; 1563 1564 // Optional. Specifies a list of jobs on which diagnosis is to be performed. 1565 // Format: projects/{project}/regions/{region}/jobs/{job} 1566 repeated string jobs = 10 [(google.api.field_behavior) = OPTIONAL]; 1567 1568 // Optional. Specifies a list of yarn applications on which diagnosis is to be 1569 // performed. 1570 repeated string yarn_application_ids = 11 1571 [(google.api.field_behavior) = OPTIONAL]; 1572} 1573 1574// The location of diagnostic output. 1575message DiagnoseClusterResults { 1576 // Output only. The Cloud Storage URI of the diagnostic output. 1577 // The output report is a plain text file with a summary of collected 1578 // diagnostics. 1579 string output_uri = 1 [(google.api.field_behavior) = OUTPUT_ONLY]; 1580} 1581 1582// Reservation Affinity for consuming Zonal reservation. 1583message ReservationAffinity { 1584 // Indicates whether to consume capacity from an reservation or not. 1585 enum Type { 1586 TYPE_UNSPECIFIED = 0; 1587 1588 // Do not consume from any allocated capacity. 1589 NO_RESERVATION = 1; 1590 1591 // Consume any reservation available. 1592 ANY_RESERVATION = 2; 1593 1594 // Must consume from a specific reservation. Must specify key value fields 1595 // for specifying the reservations. 1596 SPECIFIC_RESERVATION = 3; 1597 } 1598 1599 // Optional. Type of reservation to consume 1600 Type consume_reservation_type = 1 [(google.api.field_behavior) = OPTIONAL]; 1601 1602 // Optional. Corresponds to the label key of reservation resource. 1603 string key = 2 [(google.api.field_behavior) = OPTIONAL]; 1604 1605 // Optional. Corresponds to the label values of reservation resource. 1606 repeated string values = 3 [(google.api.field_behavior) = OPTIONAL]; 1607} 1608