1// Copyright 2022 Google LLC 2// 3// Licensed under the Apache License, Version 2.0 (the "License"); 4// you may not use this file except in compliance with the License. 5// You may obtain a copy of the License at 6// 7// http://www.apache.org/licenses/LICENSE-2.0 8// 9// Unless required by applicable law or agreed to in writing, software 10// distributed under the License is distributed on an "AS IS" BASIS, 11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12// See the License for the specific language governing permissions and 13// limitations under the License. 14 15syntax = "proto3"; 16 17package google.cloud.dataproc.v1; 18 19import "google/api/annotations.proto"; 20import "google/api/client.proto"; 21import "google/api/field_behavior.proto"; 22import "google/api/resource.proto"; 23import "google/cloud/dataproc/v1/shared.proto"; 24import "google/longrunning/operations.proto"; 25import "google/protobuf/duration.proto"; 26import "google/protobuf/field_mask.proto"; 27import "google/protobuf/timestamp.proto"; 28 29option go_package = "cloud.google.com/go/dataproc/v2/apiv1/dataprocpb;dataprocpb"; 30option java_multiple_files = true; 31option java_outer_classname = "ClustersProto"; 32option java_package = "com.google.cloud.dataproc.v1"; 33 34// The ClusterControllerService provides methods to manage clusters 35// of Compute Engine instances. 36service ClusterController { 37 option (google.api.default_host) = "dataproc.googleapis.com"; 38 option (google.api.oauth_scopes) = 39 "https://www.googleapis.com/auth/cloud-platform"; 40 41 // Creates a cluster in a project. The returned 42 // [Operation.metadata][google.longrunning.Operation.metadata] will be 43 // [ClusterOperationMetadata](https://cloud.google.com/dataproc/docs/reference/rpc/google.cloud.dataproc.v1#clusteroperationmetadata). 44 rpc CreateCluster(CreateClusterRequest) 45 returns (google.longrunning.Operation) { 46 option (google.api.http) = { 47 post: "/v1/projects/{project_id}/regions/{region}/clusters" 48 body: "cluster" 49 }; 50 option (google.api.method_signature) = "project_id,region,cluster"; 51 option (google.longrunning.operation_info) = { 52 response_type: "Cluster" 53 metadata_type: "google.cloud.dataproc.v1.ClusterOperationMetadata" 54 }; 55 } 56 57 // Updates a cluster in a project. The returned 58 // [Operation.metadata][google.longrunning.Operation.metadata] will be 59 // [ClusterOperationMetadata](https://cloud.google.com/dataproc/docs/reference/rpc/google.cloud.dataproc.v1#clusteroperationmetadata). 60 // The cluster must be in a 61 // [`RUNNING`][google.cloud.dataproc.v1.ClusterStatus.State] state or an error 62 // is returned. 63 rpc UpdateCluster(UpdateClusterRequest) 64 returns (google.longrunning.Operation) { 65 option (google.api.http) = { 66 patch: "/v1/projects/{project_id}/regions/{region}/clusters/{cluster_name}" 67 body: "cluster" 68 }; 69 option (google.api.method_signature) = 70 "project_id,region,cluster_name,cluster,update_mask"; 71 option (google.longrunning.operation_info) = { 72 response_type: "Cluster" 73 metadata_type: "google.cloud.dataproc.v1.ClusterOperationMetadata" 74 }; 75 } 76 77 // Stops a cluster in a project. 78 rpc StopCluster(StopClusterRequest) returns (google.longrunning.Operation) { 79 option (google.api.http) = { 80 post: "/v1/projects/{project_id}/regions/{region}/clusters/{cluster_name}:stop" 81 body: "*" 82 }; 83 option (google.longrunning.operation_info) = { 84 response_type: "Cluster" 85 metadata_type: "google.cloud.dataproc.v1.ClusterOperationMetadata" 86 }; 87 } 88 89 // Starts a cluster in a project. 90 rpc StartCluster(StartClusterRequest) returns (google.longrunning.Operation) { 91 option (google.api.http) = { 92 post: "/v1/projects/{project_id}/regions/{region}/clusters/{cluster_name}:start" 93 body: "*" 94 }; 95 option (google.longrunning.operation_info) = { 96 response_type: "Cluster" 97 metadata_type: "google.cloud.dataproc.v1.ClusterOperationMetadata" 98 }; 99 } 100 101 // Deletes a cluster in a project. The returned 102 // [Operation.metadata][google.longrunning.Operation.metadata] will be 103 // [ClusterOperationMetadata](https://cloud.google.com/dataproc/docs/reference/rpc/google.cloud.dataproc.v1#clusteroperationmetadata). 104 rpc DeleteCluster(DeleteClusterRequest) 105 returns (google.longrunning.Operation) { 106 option (google.api.http) = { 107 delete: "/v1/projects/{project_id}/regions/{region}/clusters/{cluster_name}" 108 }; 109 option (google.api.method_signature) = "project_id,region,cluster_name"; 110 option (google.longrunning.operation_info) = { 111 response_type: "google.protobuf.Empty" 112 metadata_type: "google.cloud.dataproc.v1.ClusterOperationMetadata" 113 }; 114 } 115 116 // Gets the resource representation for a cluster in a project. 117 rpc GetCluster(GetClusterRequest) returns (Cluster) { 118 option (google.api.http) = { 119 get: "/v1/projects/{project_id}/regions/{region}/clusters/{cluster_name}" 120 }; 121 option (google.api.method_signature) = "project_id,region,cluster_name"; 122 } 123 124 // Lists all regions/{region}/clusters in a project alphabetically. 125 rpc ListClusters(ListClustersRequest) returns (ListClustersResponse) { 126 option (google.api.http) = { 127 get: "/v1/projects/{project_id}/regions/{region}/clusters" 128 }; 129 option (google.api.method_signature) = "project_id,region"; 130 option (google.api.method_signature) = "project_id,region,filter"; 131 } 132 133 // Gets cluster diagnostic information. The returned 134 // [Operation.metadata][google.longrunning.Operation.metadata] will be 135 // [ClusterOperationMetadata](https://cloud.google.com/dataproc/docs/reference/rpc/google.cloud.dataproc.v1#clusteroperationmetadata). 136 // After the operation completes, 137 // [Operation.response][google.longrunning.Operation.response] 138 // contains 139 // [DiagnoseClusterResults](https://cloud.google.com/dataproc/docs/reference/rpc/google.cloud.dataproc.v1#diagnoseclusterresults). 140 rpc DiagnoseCluster(DiagnoseClusterRequest) 141 returns (google.longrunning.Operation) { 142 option (google.api.http) = { 143 post: "/v1/projects/{project_id}/regions/{region}/clusters/{cluster_name}:diagnose" 144 body: "*" 145 }; 146 option (google.api.method_signature) = "project_id,region,cluster_name"; 147 option (google.longrunning.operation_info) = { 148 response_type: "DiagnoseClusterResults" 149 metadata_type: "google.cloud.dataproc.v1.ClusterOperationMetadata" 150 }; 151 } 152} 153 154// Describes the identifying information, config, and status of 155// a Dataproc cluster 156message Cluster { 157 // Required. The Google Cloud Platform project ID that the cluster belongs to. 158 string project_id = 1 [(google.api.field_behavior) = REQUIRED]; 159 160 // Required. The cluster name, which must be unique within a project. 161 // The name must start with a lowercase letter, and can contain 162 // up to 51 lowercase letters, numbers, and hyphens. It cannot end 163 // with a hyphen. The name of a deleted cluster can be reused. 164 string cluster_name = 2 [(google.api.field_behavior) = REQUIRED]; 165 166 // Optional. The cluster config for a cluster of Compute Engine Instances. 167 // Note that Dataproc may set default values, and values may change 168 // when clusters are updated. 169 // 170 // Exactly one of ClusterConfig or VirtualClusterConfig must be specified. 171 ClusterConfig config = 3 [(google.api.field_behavior) = OPTIONAL]; 172 173 // Optional. The virtual cluster config is used when creating a Dataproc 174 // cluster that does not directly control the underlying compute resources, 175 // for example, when creating a [Dataproc-on-GKE 176 // cluster](https://cloud.google.com/dataproc/docs/guides/dpgke/dataproc-gke-overview). 177 // Dataproc may set default values, and values may change when 178 // clusters are updated. Exactly one of 179 // [config][google.cloud.dataproc.v1.Cluster.config] or 180 // [virtual_cluster_config][google.cloud.dataproc.v1.Cluster.virtual_cluster_config] 181 // must be specified. 182 VirtualClusterConfig virtual_cluster_config = 10 183 [(google.api.field_behavior) = OPTIONAL]; 184 185 // Optional. The labels to associate with this cluster. 186 // Label **keys** must contain 1 to 63 characters, and must conform to 187 // [RFC 1035](https://www.ietf.org/rfc/rfc1035.txt). 188 // Label **values** may be empty, but, if present, must contain 1 to 63 189 // characters, and must conform to [RFC 190 // 1035](https://www.ietf.org/rfc/rfc1035.txt). No more than 32 labels can be 191 // associated with a cluster. 192 map<string, string> labels = 8 [(google.api.field_behavior) = OPTIONAL]; 193 194 // Output only. Cluster status. 195 ClusterStatus status = 4 [(google.api.field_behavior) = OUTPUT_ONLY]; 196 197 // Output only. The previous cluster status. 198 repeated ClusterStatus status_history = 7 199 [(google.api.field_behavior) = OUTPUT_ONLY]; 200 201 // Output only. A cluster UUID (Unique Universal Identifier). Dataproc 202 // generates this value when it creates the cluster. 203 string cluster_uuid = 6 [(google.api.field_behavior) = OUTPUT_ONLY]; 204 205 // Output only. Contains cluster daemon metrics such as HDFS and YARN stats. 206 // 207 // **Beta Feature**: This report is available for testing purposes only. It 208 // may be changed before final release. 209 ClusterMetrics metrics = 9 [(google.api.field_behavior) = OUTPUT_ONLY]; 210} 211 212// The cluster config. 213message ClusterConfig { 214 // Optional. A Cloud Storage bucket used to stage job 215 // dependencies, config files, and job driver console output. 216 // If you do not specify a staging bucket, Cloud 217 // Dataproc will determine a Cloud Storage location (US, 218 // ASIA, or EU) for your cluster's staging bucket according to the 219 // Compute Engine zone where your cluster is deployed, and then create 220 // and manage this project-level, per-location bucket (see 221 // [Dataproc staging and temp 222 // buckets](https://cloud.google.com/dataproc/docs/concepts/configuring-clusters/staging-bucket)). 223 // **This field requires a Cloud Storage bucket name, not a `gs://...` URI to 224 // a Cloud Storage bucket.** 225 string config_bucket = 1 [(google.api.field_behavior) = OPTIONAL]; 226 227 // Optional. A Cloud Storage bucket used to store ephemeral cluster and jobs 228 // data, such as Spark and MapReduce history files. If you do not specify a 229 // temp bucket, Dataproc will determine a Cloud Storage location (US, ASIA, or 230 // EU) for your cluster's temp bucket according to the Compute Engine zone 231 // where your cluster is deployed, and then create and manage this 232 // project-level, per-location bucket. The default bucket has a TTL of 90 233 // days, but you can use any TTL (or none) if you specify a bucket (see 234 // [Dataproc staging and temp 235 // buckets](https://cloud.google.com/dataproc/docs/concepts/configuring-clusters/staging-bucket)). 236 // **This field requires a Cloud Storage bucket name, not a `gs://...` URI to 237 // a Cloud Storage bucket.** 238 string temp_bucket = 2 [(google.api.field_behavior) = OPTIONAL]; 239 240 // Optional. The shared Compute Engine config settings for 241 // all instances in a cluster. 242 GceClusterConfig gce_cluster_config = 8 243 [(google.api.field_behavior) = OPTIONAL]; 244 245 // Optional. The Compute Engine config settings for 246 // the cluster's master instance. 247 InstanceGroupConfig master_config = 9 248 [(google.api.field_behavior) = OPTIONAL]; 249 250 // Optional. The Compute Engine config settings for 251 // the cluster's worker instances. 252 InstanceGroupConfig worker_config = 10 253 [(google.api.field_behavior) = OPTIONAL]; 254 255 // Optional. The Compute Engine config settings for 256 // a cluster's secondary worker instances 257 InstanceGroupConfig secondary_worker_config = 12 258 [(google.api.field_behavior) = OPTIONAL]; 259 260 // Optional. The config settings for cluster software. 261 SoftwareConfig software_config = 13 [(google.api.field_behavior) = OPTIONAL]; 262 263 // Optional. Commands to execute on each node after config is 264 // completed. By default, executables are run on master and all worker nodes. 265 // You can test a node's `role` metadata to run an executable on 266 // a master or worker node, as shown below using `curl` (you can also use 267 // `wget`): 268 // 269 // ROLE=$(curl -H Metadata-Flavor:Google 270 // http://metadata/computeMetadata/v1/instance/attributes/dataproc-role) 271 // if [[ "${ROLE}" == 'Master' ]]; then 272 // ... master specific actions ... 273 // else 274 // ... worker specific actions ... 275 // fi 276 repeated NodeInitializationAction initialization_actions = 11 277 [(google.api.field_behavior) = OPTIONAL]; 278 279 // Optional. Encryption settings for the cluster. 280 EncryptionConfig encryption_config = 15 281 [(google.api.field_behavior) = OPTIONAL]; 282 283 // Optional. Autoscaling config for the policy associated with the cluster. 284 // Cluster does not autoscale if this field is unset. 285 AutoscalingConfig autoscaling_config = 18 286 [(google.api.field_behavior) = OPTIONAL]; 287 288 // Optional. Security settings for the cluster. 289 SecurityConfig security_config = 16 [(google.api.field_behavior) = OPTIONAL]; 290 291 // Optional. Lifecycle setting for the cluster. 292 LifecycleConfig lifecycle_config = 17 293 [(google.api.field_behavior) = OPTIONAL]; 294 295 // Optional. Port/endpoint configuration for this cluster 296 EndpointConfig endpoint_config = 19 [(google.api.field_behavior) = OPTIONAL]; 297 298 // Optional. Metastore configuration. 299 MetastoreConfig metastore_config = 20 300 [(google.api.field_behavior) = OPTIONAL]; 301 302 // Optional. The config for Dataproc metrics. 303 DataprocMetricConfig dataproc_metric_config = 23 304 [(google.api.field_behavior) = OPTIONAL]; 305 306 // Optional. The node group settings. 307 repeated AuxiliaryNodeGroup auxiliary_node_groups = 25 308 [(google.api.field_behavior) = OPTIONAL]; 309} 310 311// The Dataproc cluster config for a cluster that does not directly control the 312// underlying compute resources, such as a [Dataproc-on-GKE 313// cluster](https://cloud.google.com/dataproc/docs/guides/dpgke/dataproc-gke-overview). 314message VirtualClusterConfig { 315 // Optional. A Cloud Storage bucket used to stage job 316 // dependencies, config files, and job driver console output. 317 // If you do not specify a staging bucket, Cloud 318 // Dataproc will determine a Cloud Storage location (US, 319 // ASIA, or EU) for your cluster's staging bucket according to the 320 // Compute Engine zone where your cluster is deployed, and then create 321 // and manage this project-level, per-location bucket (see 322 // [Dataproc staging and temp 323 // buckets](https://cloud.google.com/dataproc/docs/concepts/configuring-clusters/staging-bucket)). 324 // **This field requires a Cloud Storage bucket name, not a `gs://...` URI to 325 // a Cloud Storage bucket.** 326 string staging_bucket = 1 [(google.api.field_behavior) = OPTIONAL]; 327 328 oneof infrastructure_config { 329 // Required. The configuration for running the Dataproc cluster on 330 // Kubernetes. 331 KubernetesClusterConfig kubernetes_cluster_config = 6 332 [(google.api.field_behavior) = REQUIRED]; 333 } 334 335 // Optional. Configuration of auxiliary services used by this cluster. 336 AuxiliaryServicesConfig auxiliary_services_config = 7 337 [(google.api.field_behavior) = OPTIONAL]; 338} 339 340// Auxiliary services configuration for a Cluster. 341message AuxiliaryServicesConfig { 342 // Optional. The Hive Metastore configuration for this workload. 343 MetastoreConfig metastore_config = 1 [(google.api.field_behavior) = OPTIONAL]; 344 345 // Optional. The Spark History Server configuration for the workload. 346 SparkHistoryServerConfig spark_history_server_config = 2 347 [(google.api.field_behavior) = OPTIONAL]; 348} 349 350// Endpoint config for this cluster 351message EndpointConfig { 352 // Output only. The map of port descriptions to URLs. Will only be populated 353 // if enable_http_port_access is true. 354 map<string, string> http_ports = 1 355 [(google.api.field_behavior) = OUTPUT_ONLY]; 356 357 // Optional. If true, enable http access to specific ports on the cluster 358 // from external sources. Defaults to false. 359 bool enable_http_port_access = 2 [(google.api.field_behavior) = OPTIONAL]; 360} 361 362// Autoscaling Policy config associated with the cluster. 363message AutoscalingConfig { 364 // Optional. The autoscaling policy used by the cluster. 365 // 366 // Only resource names including projectid and location (region) are valid. 367 // Examples: 368 // 369 // * `https://www.googleapis.com/compute/v1/projects/[project_id]/locations/[dataproc_region]/autoscalingPolicies/[policy_id]` 370 // * `projects/[project_id]/locations/[dataproc_region]/autoscalingPolicies/[policy_id]` 371 // 372 // Note that the policy must be in the same project and Dataproc region. 373 string policy_uri = 1 [(google.api.field_behavior) = OPTIONAL]; 374} 375 376// Encryption settings for the cluster. 377message EncryptionConfig { 378 // Optional. The Cloud KMS key name to use for PD disk encryption for all 379 // instances in the cluster. 380 string gce_pd_kms_key_name = 1 [(google.api.field_behavior) = OPTIONAL]; 381} 382 383// Common config settings for resources of Compute Engine cluster 384// instances, applicable to all instances in the cluster. 385message GceClusterConfig { 386 // `PrivateIpv6GoogleAccess` controls whether and how Dataproc cluster nodes 387 // can communicate with Google Services through gRPC over IPv6. 388 // These values are directly mapped to corresponding values in the 389 // [Compute Engine Instance 390 // fields](https://cloud.google.com/compute/docs/reference/rest/v1/instances). 391 enum PrivateIpv6GoogleAccess { 392 // If unspecified, Compute Engine default behavior will apply, which 393 // is the same as 394 // [INHERIT_FROM_SUBNETWORK][google.cloud.dataproc.v1.GceClusterConfig.PrivateIpv6GoogleAccess.INHERIT_FROM_SUBNETWORK]. 395 PRIVATE_IPV6_GOOGLE_ACCESS_UNSPECIFIED = 0; 396 397 // Private access to and from Google Services configuration 398 // inherited from the subnetwork configuration. This is the 399 // default Compute Engine behavior. 400 INHERIT_FROM_SUBNETWORK = 1; 401 402 // Enables outbound private IPv6 access to Google Services from the Dataproc 403 // cluster. 404 OUTBOUND = 2; 405 406 // Enables bidirectional private IPv6 access between Google Services and the 407 // Dataproc cluster. 408 BIDIRECTIONAL = 3; 409 } 410 411 // Optional. The Compute Engine zone where the Dataproc cluster will be 412 // located. If omitted, the service will pick a zone in the cluster's Compute 413 // Engine region. On a get request, zone will always be present. 414 // 415 // A full URL, partial URI, or short name are valid. Examples: 416 // 417 // * `https://www.googleapis.com/compute/v1/projects/[project_id]/zones/[zone]` 418 // * `projects/[project_id]/zones/[zone]` 419 // * `[zone]` 420 string zone_uri = 1 [(google.api.field_behavior) = OPTIONAL]; 421 422 // Optional. The Compute Engine network to be used for machine 423 // communications. Cannot be specified with subnetwork_uri. If neither 424 // `network_uri` nor `subnetwork_uri` is specified, the "default" network of 425 // the project is used, if it exists. Cannot be a "Custom Subnet Network" (see 426 // [Using Subnetworks](https://cloud.google.com/compute/docs/subnetworks) for 427 // more information). 428 // 429 // A full URL, partial URI, or short name are valid. Examples: 430 // 431 // * `https://www.googleapis.com/compute/v1/projects/[project_id]/global/networks/default` 432 // * `projects/[project_id]/global/networks/default` 433 // * `default` 434 string network_uri = 2 [(google.api.field_behavior) = OPTIONAL]; 435 436 // Optional. The Compute Engine subnetwork to be used for machine 437 // communications. Cannot be specified with network_uri. 438 // 439 // A full URL, partial URI, or short name are valid. Examples: 440 // 441 // * `https://www.googleapis.com/compute/v1/projects/[project_id]/regions/[region]/subnetworks/sub0` 442 // * `projects/[project_id]/regions/[region]/subnetworks/sub0` 443 // * `sub0` 444 string subnetwork_uri = 6 [(google.api.field_behavior) = OPTIONAL]; 445 446 // Optional. If true, all instances in the cluster will only have internal IP 447 // addresses. By default, clusters are not restricted to internal IP 448 // addresses, and will have ephemeral external IP addresses assigned to each 449 // instance. This `internal_ip_only` restriction can only be enabled for 450 // subnetwork enabled networks, and all off-cluster dependencies must be 451 // configured to be accessible without external IP addresses. 452 optional bool internal_ip_only = 7 [(google.api.field_behavior) = OPTIONAL]; 453 454 // Optional. The type of IPv6 access for a cluster. 455 PrivateIpv6GoogleAccess private_ipv6_google_access = 12 456 [(google.api.field_behavior) = OPTIONAL]; 457 458 // Optional. The [Dataproc service 459 // account](https://cloud.google.com/dataproc/docs/concepts/configuring-clusters/service-accounts#service_accounts_in_dataproc) 460 // (also see [VM Data Plane 461 // identity](https://cloud.google.com/dataproc/docs/concepts/iam/dataproc-principals#vm_service_account_data_plane_identity)) 462 // used by Dataproc cluster VM instances to access Google Cloud Platform 463 // services. 464 // 465 // If not specified, the 466 // [Compute Engine default service 467 // account](https://cloud.google.com/compute/docs/access/service-accounts#default_service_account) 468 // is used. 469 string service_account = 8 [(google.api.field_behavior) = OPTIONAL]; 470 471 // Optional. The URIs of service account scopes to be included in 472 // Compute Engine instances. The following base set of scopes is always 473 // included: 474 // 475 // * https://www.googleapis.com/auth/cloud.useraccounts.readonly 476 // * https://www.googleapis.com/auth/devstorage.read_write 477 // * https://www.googleapis.com/auth/logging.write 478 // 479 // If no scopes are specified, the following defaults are also provided: 480 // 481 // * https://www.googleapis.com/auth/bigquery 482 // * https://www.googleapis.com/auth/bigtable.admin.table 483 // * https://www.googleapis.com/auth/bigtable.data 484 // * https://www.googleapis.com/auth/devstorage.full_control 485 repeated string service_account_scopes = 3 486 [(google.api.field_behavior) = OPTIONAL]; 487 488 // The Compute Engine tags to add to all instances (see [Tagging 489 // instances](https://cloud.google.com/compute/docs/label-or-tag-resources#tags)). 490 repeated string tags = 4; 491 492 // The Compute Engine metadata entries to add to all instances (see 493 // [Project and instance 494 // metadata](https://cloud.google.com/compute/docs/storing-retrieving-metadata#project_and_instance_metadata)). 495 map<string, string> metadata = 5; 496 497 // Optional. Reservation Affinity for consuming Zonal reservation. 498 ReservationAffinity reservation_affinity = 11 499 [(google.api.field_behavior) = OPTIONAL]; 500 501 // Optional. Node Group Affinity for sole-tenant clusters. 502 NodeGroupAffinity node_group_affinity = 13 503 [(google.api.field_behavior) = OPTIONAL]; 504 505 // Optional. Shielded Instance Config for clusters using [Compute Engine 506 // Shielded 507 // VMs](https://cloud.google.com/security/shielded-cloud/shielded-vm). 508 ShieldedInstanceConfig shielded_instance_config = 14 509 [(google.api.field_behavior) = OPTIONAL]; 510 511 // Optional. Confidential Instance Config for clusters using [Confidential 512 // VMs](https://cloud.google.com/compute/confidential-vm/docs). 513 ConfidentialInstanceConfig confidential_instance_config = 15 514 [(google.api.field_behavior) = OPTIONAL]; 515} 516 517// Node Group Affinity for clusters using sole-tenant node groups. 518// **The Dataproc `NodeGroupAffinity` resource is not related to the 519// Dataproc [NodeGroup][google.cloud.dataproc.v1.NodeGroup] resource.** 520message NodeGroupAffinity { 521 // Required. The URI of a 522 // sole-tenant [node group 523 // resource](https://cloud.google.com/compute/docs/reference/rest/v1/nodeGroups) 524 // that the cluster will be created on. 525 // 526 // A full URL, partial URI, or node group name are valid. Examples: 527 // 528 // * `https://www.googleapis.com/compute/v1/projects/[project_id]/zones/[zone]/nodeGroups/node-group-1` 529 // * `projects/[project_id]/zones/[zone]/nodeGroups/node-group-1` 530 // * `node-group-1` 531 string node_group_uri = 1 [(google.api.field_behavior) = REQUIRED]; 532} 533 534// Shielded Instance Config for clusters using [Compute Engine Shielded 535// VMs](https://cloud.google.com/security/shielded-cloud/shielded-vm). 536message ShieldedInstanceConfig { 537 // Optional. Defines whether instances have Secure Boot enabled. 538 optional bool enable_secure_boot = 1 [(google.api.field_behavior) = OPTIONAL]; 539 540 // Optional. Defines whether instances have the vTPM enabled. 541 optional bool enable_vtpm = 2 [(google.api.field_behavior) = OPTIONAL]; 542 543 // Optional. Defines whether instances have integrity monitoring enabled. 544 optional bool enable_integrity_monitoring = 3 545 [(google.api.field_behavior) = OPTIONAL]; 546} 547 548// Confidential Instance Config for clusters using [Confidential 549// VMs](https://cloud.google.com/compute/confidential-vm/docs) 550message ConfidentialInstanceConfig { 551 // Optional. Defines whether the instance should have confidential compute 552 // enabled. 553 bool enable_confidential_compute = 1 [(google.api.field_behavior) = OPTIONAL]; 554} 555 556// The config settings for Compute Engine resources in 557// an instance group, such as a master or worker group. 558message InstanceGroupConfig { 559 // Controls the use of preemptible instances within the group. 560 enum Preemptibility { 561 // Preemptibility is unspecified, the system will choose the 562 // appropriate setting for each instance group. 563 PREEMPTIBILITY_UNSPECIFIED = 0; 564 565 // Instances are non-preemptible. 566 // 567 // This option is allowed for all instance groups and is the only valid 568 // value for Master and Worker instance groups. 569 NON_PREEMPTIBLE = 1; 570 571 // Instances are [preemptible] 572 // (https://cloud.google.com/compute/docs/instances/preemptible). 573 // 574 // This option is allowed only for [secondary worker] 575 // (https://cloud.google.com/dataproc/docs/concepts/compute/secondary-vms) 576 // groups. 577 PREEMPTIBLE = 2; 578 579 // Instances are [Spot VMs] 580 // (https://cloud.google.com/compute/docs/instances/spot). 581 // 582 // This option is allowed only for [secondary worker] 583 // (https://cloud.google.com/dataproc/docs/concepts/compute/secondary-vms) 584 // groups. Spot VMs are the latest version of [preemptible VMs] 585 // (https://cloud.google.com/compute/docs/instances/preemptible), and 586 // provide additional features. 587 SPOT = 3; 588 } 589 590 // Optional. The number of VM instances in the instance group. 591 // For [HA 592 // cluster](/dataproc/docs/concepts/configuring-clusters/high-availability) 593 // [master_config](#FIELDS.master_config) groups, **must be set to 3**. 594 // For standard cluster [master_config](#FIELDS.master_config) groups, 595 // **must be set to 1**. 596 int32 num_instances = 1 [(google.api.field_behavior) = OPTIONAL]; 597 598 // Output only. The list of instance names. Dataproc derives the names 599 // from `cluster_name`, `num_instances`, and the instance group. 600 repeated string instance_names = 2 601 [(google.api.field_behavior) = OUTPUT_ONLY]; 602 603 // Optional. The Compute Engine image resource used for cluster instances. 604 // 605 // The URI can represent an image or image family. 606 // 607 // Image examples: 608 // 609 // * `https://www.googleapis.com/compute/v1/projects/[project_id]/global/images/[image-id]` 610 // * `projects/[project_id]/global/images/[image-id]` 611 // * `image-id` 612 // 613 // Image family examples. Dataproc will use the most recent 614 // image from the family: 615 // 616 // * `https://www.googleapis.com/compute/v1/projects/[project_id]/global/images/family/[custom-image-family-name]` 617 // * `projects/[project_id]/global/images/family/[custom-image-family-name]` 618 // 619 // If the URI is unspecified, it will be inferred from 620 // `SoftwareConfig.image_version` or the system default. 621 string image_uri = 3 [(google.api.field_behavior) = OPTIONAL]; 622 623 // Optional. The Compute Engine machine type used for cluster instances. 624 // 625 // A full URL, partial URI, or short name are valid. Examples: 626 // 627 // * `https://www.googleapis.com/compute/v1/projects/[project_id]/zones/[zone]/machineTypes/n1-standard-2` 628 // * `projects/[project_id]/zones/[zone]/machineTypes/n1-standard-2` 629 // * `n1-standard-2` 630 // 631 // **Auto Zone Exception**: If you are using the Dataproc 632 // [Auto Zone 633 // Placement](https://cloud.google.com/dataproc/docs/concepts/configuring-clusters/auto-zone#using_auto_zone_placement) 634 // feature, you must use the short name of the machine type 635 // resource, for example, `n1-standard-2`. 636 string machine_type_uri = 4 [(google.api.field_behavior) = OPTIONAL]; 637 638 // Optional. Disk option config settings. 639 DiskConfig disk_config = 5 [(google.api.field_behavior) = OPTIONAL]; 640 641 // Output only. Specifies that this instance group contains preemptible 642 // instances. 643 bool is_preemptible = 6 [(google.api.field_behavior) = OUTPUT_ONLY]; 644 645 // Optional. Specifies the preemptibility of the instance group. 646 // 647 // The default value for master and worker groups is 648 // `NON_PREEMPTIBLE`. This default cannot be changed. 649 // 650 // The default value for secondary instances is 651 // `PREEMPTIBLE`. 652 Preemptibility preemptibility = 10 [(google.api.field_behavior) = OPTIONAL]; 653 654 // Output only. The config for Compute Engine Instance Group 655 // Manager that manages this group. 656 // This is only used for preemptible instance groups. 657 ManagedGroupConfig managed_group_config = 7 658 [(google.api.field_behavior) = OUTPUT_ONLY]; 659 660 // Optional. The Compute Engine accelerator configuration for these 661 // instances. 662 repeated AcceleratorConfig accelerators = 8 663 [(google.api.field_behavior) = OPTIONAL]; 664 665 // Optional. Specifies the minimum cpu platform for the Instance Group. 666 // See [Dataproc -> Minimum CPU 667 // Platform](https://cloud.google.com/dataproc/docs/concepts/compute/dataproc-min-cpu). 668 string min_cpu_platform = 9 [(google.api.field_behavior) = OPTIONAL]; 669} 670 671// Specifies the resources used to actively manage an instance group. 672message ManagedGroupConfig { 673 // Output only. The name of the Instance Template used for the Managed 674 // Instance Group. 675 string instance_template_name = 1 [(google.api.field_behavior) = OUTPUT_ONLY]; 676 677 // Output only. The name of the Instance Group Manager for this group. 678 string instance_group_manager_name = 2 679 [(google.api.field_behavior) = OUTPUT_ONLY]; 680} 681 682// Specifies the type and number of accelerator cards attached to the instances 683// of an instance. See [GPUs on Compute 684// Engine](https://cloud.google.com/compute/docs/gpus/). 685message AcceleratorConfig { 686 // Full URL, partial URI, or short name of the accelerator type resource to 687 // expose to this instance. See 688 // [Compute Engine 689 // AcceleratorTypes](https://cloud.google.com/compute/docs/reference/v1/acceleratorTypes). 690 // 691 // Examples: 692 // 693 // * `https://www.googleapis.com/compute/v1/projects/[project_id]/zones/[zone]/acceleratorTypes/nvidia-tesla-k80` 694 // * `projects/[project_id]/zones/[zone]/acceleratorTypes/nvidia-tesla-k80` 695 // * `nvidia-tesla-k80` 696 // 697 // **Auto Zone Exception**: If you are using the Dataproc 698 // [Auto Zone 699 // Placement](https://cloud.google.com/dataproc/docs/concepts/configuring-clusters/auto-zone#using_auto_zone_placement) 700 // feature, you must use the short name of the accelerator type 701 // resource, for example, `nvidia-tesla-k80`. 702 string accelerator_type_uri = 1; 703 704 // The number of the accelerator cards of this type exposed to this instance. 705 int32 accelerator_count = 2; 706} 707 708// Specifies the config of disk options for a group of VM instances. 709message DiskConfig { 710 // Optional. Type of the boot disk (default is "pd-standard"). 711 // Valid values: "pd-balanced" (Persistent Disk Balanced Solid State Drive), 712 // "pd-ssd" (Persistent Disk Solid State Drive), 713 // or "pd-standard" (Persistent Disk Hard Disk Drive). 714 // See [Disk types](https://cloud.google.com/compute/docs/disks#disk-types). 715 string boot_disk_type = 3 [(google.api.field_behavior) = OPTIONAL]; 716 717 // Optional. Size in GB of the boot disk (default is 500GB). 718 int32 boot_disk_size_gb = 1 [(google.api.field_behavior) = OPTIONAL]; 719 720 // Optional. Number of attached SSDs, from 0 to 8 (default is 0). 721 // If SSDs are not attached, the boot disk is used to store runtime logs and 722 // [HDFS](https://hadoop.apache.org/docs/r1.2.1/hdfs_user_guide.html) data. 723 // If one or more SSDs are attached, this runtime bulk 724 // data is spread across them, and the boot disk contains only basic 725 // config and installed binaries. 726 // 727 // Note: Local SSD options may vary by machine type and number of vCPUs 728 // selected. 729 int32 num_local_ssds = 2 [(google.api.field_behavior) = OPTIONAL]; 730 731 // Optional. Interface type of local SSDs (default is "scsi"). 732 // Valid values: "scsi" (Small Computer System Interface), 733 // "nvme" (Non-Volatile Memory Express). 734 // See [local SSD 735 // performance](https://cloud.google.com/compute/docs/disks/local-ssd#performance). 736 string local_ssd_interface = 4 [(google.api.field_behavior) = OPTIONAL]; 737} 738 739// Node group identification and configuration information. 740message AuxiliaryNodeGroup { 741 // Required. Node group configuration. 742 NodeGroup node_group = 1 [(google.api.field_behavior) = REQUIRED]; 743 744 // Optional. A node group ID. Generated if not specified. 745 // 746 // The ID must contain only letters (a-z, A-Z), numbers (0-9), 747 // underscores (_), and hyphens (-). Cannot begin or end with underscore 748 // or hyphen. Must consist of from 3 to 33 characters. 749 string node_group_id = 2 [(google.api.field_behavior) = OPTIONAL]; 750} 751 752// Dataproc Node Group. 753// **The Dataproc `NodeGroup` resource is not related to the 754// Dataproc [NodeGroupAffinity][google.cloud.dataproc.v1.NodeGroupAffinity] 755// resource.** 756message NodeGroup { 757 option (google.api.resource) = { 758 type: "dataproc.googleapis.com/NodeGroup" 759 pattern: "projects/{project}/regions/{region}/clusters/{cluster}/nodeGroups/{node_group}" 760 }; 761 762 // Node group roles. 763 enum Role { 764 // Required unspecified role. 765 ROLE_UNSPECIFIED = 0; 766 767 // Job drivers run on the node group. 768 DRIVER = 1; 769 } 770 771 // The Node group [resource name](https://aip.dev/122). 772 string name = 1; 773 774 // Required. Node group roles. 775 repeated Role roles = 2 [(google.api.field_behavior) = REQUIRED]; 776 777 // Optional. The node group instance group configuration. 778 InstanceGroupConfig node_group_config = 3 779 [(google.api.field_behavior) = OPTIONAL]; 780 781 // Optional. Node group labels. 782 // 783 // * Label **keys** must consist of from 1 to 63 characters and conform to 784 // [RFC 1035](https://www.ietf.org/rfc/rfc1035.txt). 785 // * Label **values** can be empty. If specified, they must consist of from 786 // 1 to 63 characters and conform to [RFC 1035] 787 // (https://www.ietf.org/rfc/rfc1035.txt). 788 // * The node group must have no more than 32 labels. 789 map<string, string> labels = 4 [(google.api.field_behavior) = OPTIONAL]; 790} 791 792// Specifies an executable to run on a fully configured node and a 793// timeout period for executable completion. 794message NodeInitializationAction { 795 // Required. Cloud Storage URI of executable file. 796 string executable_file = 1 [(google.api.field_behavior) = REQUIRED]; 797 798 // Optional. Amount of time executable has to complete. Default is 799 // 10 minutes (see JSON representation of 800 // [Duration](https://developers.google.com/protocol-buffers/docs/proto3#json)). 801 // 802 // Cluster creation fails with an explanatory error message (the 803 // name of the executable that caused the error and the exceeded timeout 804 // period) if the executable is not completed at end of the timeout period. 805 google.protobuf.Duration execution_timeout = 2 806 [(google.api.field_behavior) = OPTIONAL]; 807} 808 809// The status of a cluster and its instances. 810message ClusterStatus { 811 // The cluster state. 812 enum State { 813 // The cluster state is unknown. 814 UNKNOWN = 0; 815 816 // The cluster is being created and set up. It is not ready for use. 817 CREATING = 1; 818 819 // The cluster is currently running and healthy. It is ready for use. 820 // 821 // **Note:** The cluster state changes from "creating" to "running" status 822 // after the master node(s), first two primary worker nodes (and the last 823 // primary worker node if primary workers > 2) are running. 824 RUNNING = 2; 825 826 // The cluster encountered an error. It is not ready for use. 827 ERROR = 3; 828 829 // The cluster has encountered an error while being updated. Jobs can 830 // be submitted to the cluster, but the cluster cannot be updated. 831 ERROR_DUE_TO_UPDATE = 9; 832 833 // The cluster is being deleted. It cannot be used. 834 DELETING = 4; 835 836 // The cluster is being updated. It continues to accept and process jobs. 837 UPDATING = 5; 838 839 // The cluster is being stopped. It cannot be used. 840 STOPPING = 6; 841 842 // The cluster is currently stopped. It is not ready for use. 843 STOPPED = 7; 844 845 // The cluster is being started. It is not ready for use. 846 STARTING = 8; 847 } 848 849 // The cluster substate. 850 enum Substate { 851 // The cluster substate is unknown. 852 UNSPECIFIED = 0; 853 854 // The cluster is known to be in an unhealthy state 855 // (for example, critical daemons are not running or HDFS capacity is 856 // exhausted). 857 // 858 // Applies to RUNNING state. 859 UNHEALTHY = 1; 860 861 // The agent-reported status is out of date (may occur if 862 // Dataproc loses communication with Agent). 863 // 864 // Applies to RUNNING state. 865 STALE_STATUS = 2; 866 } 867 868 // Output only. The cluster's state. 869 State state = 1 [(google.api.field_behavior) = OUTPUT_ONLY]; 870 871 // Optional. Output only. Details of cluster's state. 872 string detail = 2 [ 873 (google.api.field_behavior) = OUTPUT_ONLY, 874 (google.api.field_behavior) = OPTIONAL 875 ]; 876 877 // Output only. Time when this state was entered (see JSON representation of 878 // [Timestamp](https://developers.google.com/protocol-buffers/docs/proto3#json)). 879 google.protobuf.Timestamp state_start_time = 3 880 [(google.api.field_behavior) = OUTPUT_ONLY]; 881 882 // Output only. Additional state information that includes 883 // status reported by the agent. 884 Substate substate = 4 [(google.api.field_behavior) = OUTPUT_ONLY]; 885} 886 887// Security related configuration, including encryption, Kerberos, etc. 888message SecurityConfig { 889 // Optional. Kerberos related configuration. 890 KerberosConfig kerberos_config = 1 [(google.api.field_behavior) = OPTIONAL]; 891 892 // Optional. Identity related configuration, including service account based 893 // secure multi-tenancy user mappings. 894 IdentityConfig identity_config = 2 [(google.api.field_behavior) = OPTIONAL]; 895} 896 897// Specifies Kerberos related configuration. 898message KerberosConfig { 899 // Optional. Flag to indicate whether to Kerberize the cluster (default: 900 // false). Set this field to true to enable Kerberos on a cluster. 901 bool enable_kerberos = 1 [(google.api.field_behavior) = OPTIONAL]; 902 903 // Optional. The Cloud Storage URI of a KMS encrypted file containing the root 904 // principal password. 905 string root_principal_password_uri = 2 906 [(google.api.field_behavior) = OPTIONAL]; 907 908 // Optional. The uri of the KMS key used to encrypt various sensitive 909 // files. 910 string kms_key_uri = 3 [(google.api.field_behavior) = OPTIONAL]; 911 912 // Optional. The Cloud Storage URI of the keystore file used for SSL 913 // encryption. If not provided, Dataproc will provide a self-signed 914 // certificate. 915 string keystore_uri = 4 [(google.api.field_behavior) = OPTIONAL]; 916 917 // Optional. The Cloud Storage URI of the truststore file used for SSL 918 // encryption. If not provided, Dataproc will provide a self-signed 919 // certificate. 920 string truststore_uri = 5 [(google.api.field_behavior) = OPTIONAL]; 921 922 // Optional. The Cloud Storage URI of a KMS encrypted file containing the 923 // password to the user provided keystore. For the self-signed certificate, 924 // this password is generated by Dataproc. 925 string keystore_password_uri = 6 [(google.api.field_behavior) = OPTIONAL]; 926 927 // Optional. The Cloud Storage URI of a KMS encrypted file containing the 928 // password to the user provided key. For the self-signed certificate, this 929 // password is generated by Dataproc. 930 string key_password_uri = 7 [(google.api.field_behavior) = OPTIONAL]; 931 932 // Optional. The Cloud Storage URI of a KMS encrypted file containing the 933 // password to the user provided truststore. For the self-signed certificate, 934 // this password is generated by Dataproc. 935 string truststore_password_uri = 8 [(google.api.field_behavior) = OPTIONAL]; 936 937 // Optional. The remote realm the Dataproc on-cluster KDC will trust, should 938 // the user enable cross realm trust. 939 string cross_realm_trust_realm = 9 [(google.api.field_behavior) = OPTIONAL]; 940 941 // Optional. The KDC (IP or hostname) for the remote trusted realm in a cross 942 // realm trust relationship. 943 string cross_realm_trust_kdc = 10 [(google.api.field_behavior) = OPTIONAL]; 944 945 // Optional. The admin server (IP or hostname) for the remote trusted realm in 946 // a cross realm trust relationship. 947 string cross_realm_trust_admin_server = 11 948 [(google.api.field_behavior) = OPTIONAL]; 949 950 // Optional. The Cloud Storage URI of a KMS encrypted file containing the 951 // shared password between the on-cluster Kerberos realm and the remote 952 // trusted realm, in a cross realm trust relationship. 953 string cross_realm_trust_shared_password_uri = 12 954 [(google.api.field_behavior) = OPTIONAL]; 955 956 // Optional. The Cloud Storage URI of a KMS encrypted file containing the 957 // master key of the KDC database. 958 string kdc_db_key_uri = 13 [(google.api.field_behavior) = OPTIONAL]; 959 960 // Optional. The lifetime of the ticket granting ticket, in hours. 961 // If not specified, or user specifies 0, then default value 10 962 // will be used. 963 int32 tgt_lifetime_hours = 14 [(google.api.field_behavior) = OPTIONAL]; 964 965 // Optional. The name of the on-cluster Kerberos realm. 966 // If not specified, the uppercased domain of hostnames will be the realm. 967 string realm = 15 [(google.api.field_behavior) = OPTIONAL]; 968} 969 970// Identity related configuration, including service account based 971// secure multi-tenancy user mappings. 972message IdentityConfig { 973 // Required. Map of user to service account. 974 map<string, string> user_service_account_mapping = 1 975 [(google.api.field_behavior) = REQUIRED]; 976} 977 978// Specifies the selection and config of software inside the cluster. 979message SoftwareConfig { 980 // Optional. The version of software inside the cluster. It must be one of the 981 // supported [Dataproc 982 // Versions](https://cloud.google.com/dataproc/docs/concepts/versioning/dataproc-versions#supported_dataproc_versions), 983 // such as "1.2" (including a subminor version, such as "1.2.29"), or the 984 // ["preview" 985 // version](https://cloud.google.com/dataproc/docs/concepts/versioning/dataproc-versions#other_versions). 986 // If unspecified, it defaults to the latest Debian version. 987 string image_version = 1 [(google.api.field_behavior) = OPTIONAL]; 988 989 // Optional. The properties to set on daemon config files. 990 // 991 // Property keys are specified in `prefix:property` format, for example 992 // `core:hadoop.tmp.dir`. The following are supported prefixes 993 // and their mappings: 994 // 995 // * capacity-scheduler: `capacity-scheduler.xml` 996 // * core: `core-site.xml` 997 // * distcp: `distcp-default.xml` 998 // * hdfs: `hdfs-site.xml` 999 // * hive: `hive-site.xml` 1000 // * mapred: `mapred-site.xml` 1001 // * pig: `pig.properties` 1002 // * spark: `spark-defaults.conf` 1003 // * yarn: `yarn-site.xml` 1004 // 1005 // For more information, see [Cluster 1006 // properties](https://cloud.google.com/dataproc/docs/concepts/cluster-properties). 1007 map<string, string> properties = 2 [(google.api.field_behavior) = OPTIONAL]; 1008 1009 // Optional. The set of components to activate on the cluster. 1010 repeated Component optional_components = 3 1011 [(google.api.field_behavior) = OPTIONAL]; 1012} 1013 1014// Specifies the cluster auto-delete schedule configuration. 1015message LifecycleConfig { 1016 // Optional. The duration to keep the cluster alive while idling (when no jobs 1017 // are running). Passing this threshold will cause the cluster to be 1018 // deleted. Minimum value is 5 minutes; maximum value is 14 days (see JSON 1019 // representation of 1020 // [Duration](https://developers.google.com/protocol-buffers/docs/proto3#json)). 1021 google.protobuf.Duration idle_delete_ttl = 1 1022 [(google.api.field_behavior) = OPTIONAL]; 1023 1024 // Either the exact time the cluster should be deleted at or 1025 // the cluster maximum age. 1026 oneof ttl { 1027 // Optional. The time when cluster will be auto-deleted (see JSON 1028 // representation of 1029 // [Timestamp](https://developers.google.com/protocol-buffers/docs/proto3#json)). 1030 google.protobuf.Timestamp auto_delete_time = 2 1031 [(google.api.field_behavior) = OPTIONAL]; 1032 1033 // Optional. The lifetime duration of cluster. The cluster will be 1034 // auto-deleted at the end of this period. Minimum value is 10 minutes; 1035 // maximum value is 14 days (see JSON representation of 1036 // [Duration](https://developers.google.com/protocol-buffers/docs/proto3#json)). 1037 google.protobuf.Duration auto_delete_ttl = 3 1038 [(google.api.field_behavior) = OPTIONAL]; 1039 } 1040 1041 // Output only. The time when cluster became idle (most recent job finished) 1042 // and became eligible for deletion due to idleness (see JSON representation 1043 // of 1044 // [Timestamp](https://developers.google.com/protocol-buffers/docs/proto3#json)). 1045 google.protobuf.Timestamp idle_start_time = 4 1046 [(google.api.field_behavior) = OUTPUT_ONLY]; 1047} 1048 1049// Specifies a Metastore configuration. 1050message MetastoreConfig { 1051 // Required. Resource name of an existing Dataproc Metastore service. 1052 // 1053 // Example: 1054 // 1055 // * `projects/[project_id]/locations/[dataproc_region]/services/[service-name]` 1056 string dataproc_metastore_service = 1 [ 1057 (google.api.field_behavior) = REQUIRED, 1058 (google.api.resource_reference) = { 1059 type: "metastore.googleapis.com/Service" 1060 } 1061 ]; 1062} 1063 1064// Contains cluster daemon metrics, such as HDFS and YARN stats. 1065// 1066// **Beta Feature**: This report is available for testing purposes only. It may 1067// be changed before final release. 1068message ClusterMetrics { 1069 // The HDFS metrics. 1070 map<string, int64> hdfs_metrics = 1; 1071 1072 // YARN metrics. 1073 map<string, int64> yarn_metrics = 2; 1074} 1075 1076// Dataproc metric config. 1077message DataprocMetricConfig { 1078 // A source for the collection of Dataproc OSS metrics (see [available OSS 1079 // metrics] 1080 // (https://cloud.google.com//dataproc/docs/guides/monitoring#available_oss_metrics)). 1081 enum MetricSource { 1082 // Required unspecified metric source. 1083 METRIC_SOURCE_UNSPECIFIED = 0; 1084 1085 // Default monitoring agent metrics. If this source is enabled, 1086 // Dataproc enables the monitoring agent in Compute Engine, 1087 // and collects default monitoring agent metrics, which are published 1088 // with an `agent.googleapis.com` prefix. 1089 MONITORING_AGENT_DEFAULTS = 1; 1090 1091 // HDFS metric source. 1092 HDFS = 2; 1093 1094 // Spark metric source. 1095 SPARK = 3; 1096 1097 // YARN metric source. 1098 YARN = 4; 1099 1100 // Spark History Server metric source. 1101 SPARK_HISTORY_SERVER = 5; 1102 1103 // Hiveserver2 metric source. 1104 HIVESERVER2 = 6; 1105 1106 // hivemetastore metric source 1107 HIVEMETASTORE = 7; 1108 } 1109 1110 // A Dataproc OSS metric. 1111 message Metric { 1112 // Required. Default metrics are collected unless `metricOverrides` are 1113 // specified for the metric source (see [Available OSS metrics] 1114 // (https://cloud.google.com/dataproc/docs/guides/monitoring#available_oss_metrics) 1115 // for more information). 1116 MetricSource metric_source = 1 [(google.api.field_behavior) = REQUIRED]; 1117 1118 // Optional. Specify one or more [available OSS metrics] 1119 // (https://cloud.google.com/dataproc/docs/guides/monitoring#available_oss_metrics) 1120 // to collect for the metric course (for the `SPARK` metric source, any 1121 // [Spark metric] 1122 // (https://spark.apache.org/docs/latest/monitoring.html#metrics) can be 1123 // specified). 1124 // 1125 // Provide metrics in the following format: 1126 // <code><var>METRIC_SOURCE</var>:<var>INSTANCE</var>:<var>GROUP</var>:<var>METRIC</var></code> 1127 // Use camelcase as appropriate. 1128 // 1129 // Examples: 1130 // 1131 // ``` 1132 // yarn:ResourceManager:QueueMetrics:AppsCompleted 1133 // spark:driver:DAGScheduler:job.allJobs 1134 // sparkHistoryServer:JVM:Memory:NonHeapMemoryUsage.committed 1135 // hiveserver2:JVM:Memory:NonHeapMemoryUsage.used 1136 // ``` 1137 // 1138 // Notes: 1139 // 1140 // * Only the specified overridden metrics will be collected for the 1141 // metric source. For example, if one or more `spark:executive` metrics 1142 // are listed as metric overrides, other `SPARK` metrics will not be 1143 // collected. The collection of the default metrics for other OSS metric 1144 // sources is unaffected. For example, if both `SPARK` andd `YARN` metric 1145 // sources are enabled, and overrides are provided for Spark metrics only, 1146 // all default YARN metrics will be collected. 1147 repeated string metric_overrides = 2 1148 [(google.api.field_behavior) = OPTIONAL]; 1149 } 1150 1151 // Required. Metrics sources to enable. 1152 repeated Metric metrics = 1 [(google.api.field_behavior) = REQUIRED]; 1153} 1154 1155// A request to create a cluster. 1156message CreateClusterRequest { 1157 // Required. The ID of the Google Cloud Platform project that the cluster 1158 // belongs to. 1159 string project_id = 1 [(google.api.field_behavior) = REQUIRED]; 1160 1161 // Required. The Dataproc region in which to handle the request. 1162 string region = 3 [(google.api.field_behavior) = REQUIRED]; 1163 1164 // Required. The cluster to create. 1165 Cluster cluster = 2 [(google.api.field_behavior) = REQUIRED]; 1166 1167 // Optional. A unique ID used to identify the request. If the server receives 1168 // two 1169 // [CreateClusterRequest](https://cloud.google.com/dataproc/docs/reference/rpc/google.cloud.dataproc.v1#google.cloud.dataproc.v1.CreateClusterRequest)s 1170 // with the same id, then the second request will be ignored and the 1171 // first [google.longrunning.Operation][google.longrunning.Operation] created 1172 // and stored in the backend is returned. 1173 // 1174 // It is recommended to always set this value to a 1175 // [UUID](https://en.wikipedia.org/wiki/Universally_unique_identifier). 1176 // 1177 // The ID must contain only letters (a-z, A-Z), numbers (0-9), 1178 // underscores (_), and hyphens (-). The maximum length is 40 characters. 1179 string request_id = 4 [(google.api.field_behavior) = OPTIONAL]; 1180 1181 // Optional. Failure action when primary worker creation fails. 1182 FailureAction action_on_failed_primary_workers = 5 1183 [(google.api.field_behavior) = OPTIONAL]; 1184} 1185 1186// A request to update a cluster. 1187message UpdateClusterRequest { 1188 // Required. The ID of the Google Cloud Platform project the 1189 // cluster belongs to. 1190 string project_id = 1 [(google.api.field_behavior) = REQUIRED]; 1191 1192 // Required. The Dataproc region in which to handle the request. 1193 string region = 5 [(google.api.field_behavior) = REQUIRED]; 1194 1195 // Required. The cluster name. 1196 string cluster_name = 2 [(google.api.field_behavior) = REQUIRED]; 1197 1198 // Required. The changes to the cluster. 1199 Cluster cluster = 3 [(google.api.field_behavior) = REQUIRED]; 1200 1201 // Optional. Timeout for graceful YARN decommissioning. Graceful 1202 // decommissioning allows removing nodes from the cluster without 1203 // interrupting jobs in progress. Timeout specifies how long to wait for jobs 1204 // in progress to finish before forcefully removing nodes (and potentially 1205 // interrupting jobs). Default timeout is 0 (for forceful decommission), and 1206 // the maximum allowed timeout is 1 day. (see JSON representation of 1207 // [Duration](https://developers.google.com/protocol-buffers/docs/proto3#json)). 1208 // 1209 // Only supported on Dataproc image versions 1.2 and higher. 1210 google.protobuf.Duration graceful_decommission_timeout = 6 1211 [(google.api.field_behavior) = OPTIONAL]; 1212 1213 // Required. Specifies the path, relative to `Cluster`, of 1214 // the field to update. For example, to change the number of workers 1215 // in a cluster to 5, the `update_mask` parameter would be 1216 // specified as `config.worker_config.num_instances`, 1217 // and the `PATCH` request body would specify the new value, as follows: 1218 // 1219 // { 1220 // "config":{ 1221 // "workerConfig":{ 1222 // "numInstances":"5" 1223 // } 1224 // } 1225 // } 1226 // Similarly, to change the number of preemptible workers in a cluster to 5, 1227 // the `update_mask` parameter would be 1228 // `config.secondary_worker_config.num_instances`, and the `PATCH` request 1229 // body would be set as follows: 1230 // 1231 // { 1232 // "config":{ 1233 // "secondaryWorkerConfig":{ 1234 // "numInstances":"5" 1235 // } 1236 // } 1237 // } 1238 // <strong>Note:</strong> Currently, only the following fields can be updated: 1239 // 1240 // <table> 1241 // <tbody> 1242 // <tr> 1243 // <td><strong>Mask</strong></td> 1244 // <td><strong>Purpose</strong></td> 1245 // </tr> 1246 // <tr> 1247 // <td><strong><em>labels</em></strong></td> 1248 // <td>Update labels</td> 1249 // </tr> 1250 // <tr> 1251 // <td><strong><em>config.worker_config.num_instances</em></strong></td> 1252 // <td>Resize primary worker group</td> 1253 // </tr> 1254 // <tr> 1255 // <td><strong><em>config.secondary_worker_config.num_instances</em></strong></td> 1256 // <td>Resize secondary worker group</td> 1257 // </tr> 1258 // <tr> 1259 // <td>config.autoscaling_config.policy_uri</td><td>Use, stop using, or 1260 // change autoscaling policies</td> 1261 // </tr> 1262 // </tbody> 1263 // </table> 1264 google.protobuf.FieldMask update_mask = 4 1265 [(google.api.field_behavior) = REQUIRED]; 1266 1267 // Optional. A unique ID used to identify the request. If the server 1268 // receives two 1269 // [UpdateClusterRequest](https://cloud.google.com/dataproc/docs/reference/rpc/google.cloud.dataproc.v1#google.cloud.dataproc.v1.UpdateClusterRequest)s 1270 // with the same id, then the second request will be ignored and the 1271 // first [google.longrunning.Operation][google.longrunning.Operation] created 1272 // and stored in the backend is returned. 1273 // 1274 // It is recommended to always set this value to a 1275 // [UUID](https://en.wikipedia.org/wiki/Universally_unique_identifier). 1276 // 1277 // The ID must contain only letters (a-z, A-Z), numbers (0-9), 1278 // underscores (_), and hyphens (-). The maximum length is 40 characters. 1279 string request_id = 7 [(google.api.field_behavior) = OPTIONAL]; 1280} 1281 1282// A request to stop a cluster. 1283message StopClusterRequest { 1284 // Required. The ID of the Google Cloud Platform project the 1285 // cluster belongs to. 1286 string project_id = 1 [(google.api.field_behavior) = REQUIRED]; 1287 1288 // Required. The Dataproc region in which to handle the request. 1289 string region = 2 [(google.api.field_behavior) = REQUIRED]; 1290 1291 // Required. The cluster name. 1292 string cluster_name = 3 [(google.api.field_behavior) = REQUIRED]; 1293 1294 // Optional. Specifying the `cluster_uuid` means the RPC will fail 1295 // (with error NOT_FOUND) if a cluster with the specified UUID does not exist. 1296 string cluster_uuid = 4 [(google.api.field_behavior) = OPTIONAL]; 1297 1298 // Optional. A unique ID used to identify the request. If the server 1299 // receives two 1300 // [StopClusterRequest](https://cloud.google.com/dataproc/docs/reference/rpc/google.cloud.dataproc.v1#google.cloud.dataproc.v1.StopClusterRequest)s 1301 // with the same id, then the second request will be ignored and the 1302 // first [google.longrunning.Operation][google.longrunning.Operation] created 1303 // and stored in the backend is returned. 1304 // 1305 // Recommendation: Set this value to a 1306 // [UUID](https://en.wikipedia.org/wiki/Universally_unique_identifier). 1307 // 1308 // The ID must contain only letters (a-z, A-Z), numbers (0-9), 1309 // underscores (_), and hyphens (-). The maximum length is 40 characters. 1310 string request_id = 5 [(google.api.field_behavior) = OPTIONAL]; 1311} 1312 1313// A request to start a cluster. 1314message StartClusterRequest { 1315 // Required. The ID of the Google Cloud Platform project the 1316 // cluster belongs to. 1317 string project_id = 1 [(google.api.field_behavior) = REQUIRED]; 1318 1319 // Required. The Dataproc region in which to handle the request. 1320 string region = 2 [(google.api.field_behavior) = REQUIRED]; 1321 1322 // Required. The cluster name. 1323 string cluster_name = 3 [(google.api.field_behavior) = REQUIRED]; 1324 1325 // Optional. Specifying the `cluster_uuid` means the RPC will fail 1326 // (with error NOT_FOUND) if a cluster with the specified UUID does not exist. 1327 string cluster_uuid = 4 [(google.api.field_behavior) = OPTIONAL]; 1328 1329 // Optional. A unique ID used to identify the request. If the server 1330 // receives two 1331 // [StartClusterRequest](https://cloud.google.com/dataproc/docs/reference/rpc/google.cloud.dataproc.v1#google.cloud.dataproc.v1.StartClusterRequest)s 1332 // with the same id, then the second request will be ignored and the 1333 // first [google.longrunning.Operation][google.longrunning.Operation] created 1334 // and stored in the backend is returned. 1335 // 1336 // Recommendation: Set this value to a 1337 // [UUID](https://en.wikipedia.org/wiki/Universally_unique_identifier). 1338 // 1339 // The ID must contain only letters (a-z, A-Z), numbers (0-9), 1340 // underscores (_), and hyphens (-). The maximum length is 40 characters. 1341 string request_id = 5 [(google.api.field_behavior) = OPTIONAL]; 1342} 1343 1344// A request to delete a cluster. 1345message DeleteClusterRequest { 1346 // Required. The ID of the Google Cloud Platform project that the cluster 1347 // belongs to. 1348 string project_id = 1 [(google.api.field_behavior) = REQUIRED]; 1349 1350 // Required. The Dataproc region in which to handle the request. 1351 string region = 3 [(google.api.field_behavior) = REQUIRED]; 1352 1353 // Required. The cluster name. 1354 string cluster_name = 2 [(google.api.field_behavior) = REQUIRED]; 1355 1356 // Optional. Specifying the `cluster_uuid` means the RPC should fail 1357 // (with error NOT_FOUND) if cluster with specified UUID does not exist. 1358 string cluster_uuid = 4 [(google.api.field_behavior) = OPTIONAL]; 1359 1360 // Optional. A unique ID used to identify the request. If the server 1361 // receives two 1362 // [DeleteClusterRequest](https://cloud.google.com/dataproc/docs/reference/rpc/google.cloud.dataproc.v1#google.cloud.dataproc.v1.DeleteClusterRequest)s 1363 // with the same id, then the second request will be ignored and the 1364 // first [google.longrunning.Operation][google.longrunning.Operation] created 1365 // and stored in the backend is returned. 1366 // 1367 // It is recommended to always set this value to a 1368 // [UUID](https://en.wikipedia.org/wiki/Universally_unique_identifier). 1369 // 1370 // The ID must contain only letters (a-z, A-Z), numbers (0-9), 1371 // underscores (_), and hyphens (-). The maximum length is 40 characters. 1372 string request_id = 5 [(google.api.field_behavior) = OPTIONAL]; 1373} 1374 1375// Request to get the resource representation for a cluster in a project. 1376message GetClusterRequest { 1377 // Required. The ID of the Google Cloud Platform project that the cluster 1378 // belongs to. 1379 string project_id = 1 [(google.api.field_behavior) = REQUIRED]; 1380 1381 // Required. The Dataproc region in which to handle the request. 1382 string region = 3 [(google.api.field_behavior) = REQUIRED]; 1383 1384 // Required. The cluster name. 1385 string cluster_name = 2 [(google.api.field_behavior) = REQUIRED]; 1386} 1387 1388// A request to list the clusters in a project. 1389message ListClustersRequest { 1390 // Required. The ID of the Google Cloud Platform project that the cluster 1391 // belongs to. 1392 string project_id = 1 [(google.api.field_behavior) = REQUIRED]; 1393 1394 // Required. The Dataproc region in which to handle the request. 1395 string region = 4 [(google.api.field_behavior) = REQUIRED]; 1396 1397 // Optional. A filter constraining the clusters to list. Filters are 1398 // case-sensitive and have the following syntax: 1399 // 1400 // field = value [AND [field = value]] ... 1401 // 1402 // where **field** is one of `status.state`, `clusterName`, or `labels.[KEY]`, 1403 // and `[KEY]` is a label key. **value** can be `*` to match all values. 1404 // `status.state` can be one of the following: `ACTIVE`, `INACTIVE`, 1405 // `CREATING`, `RUNNING`, `ERROR`, `DELETING`, or `UPDATING`. `ACTIVE` 1406 // contains the `CREATING`, `UPDATING`, and `RUNNING` states. `INACTIVE` 1407 // contains the `DELETING` and `ERROR` states. 1408 // `clusterName` is the name of the cluster provided at creation time. 1409 // Only the logical `AND` operator is supported; space-separated items are 1410 // treated as having an implicit `AND` operator. 1411 // 1412 // Example filter: 1413 // 1414 // status.state = ACTIVE AND clusterName = mycluster 1415 // AND labels.env = staging AND labels.starred = * 1416 string filter = 5 [(google.api.field_behavior) = OPTIONAL]; 1417 1418 // Optional. The standard List page size. 1419 int32 page_size = 2 [(google.api.field_behavior) = OPTIONAL]; 1420 1421 // Optional. The standard List page token. 1422 string page_token = 3 [(google.api.field_behavior) = OPTIONAL]; 1423} 1424 1425// The list of all clusters in a project. 1426message ListClustersResponse { 1427 // Output only. The clusters in the project. 1428 repeated Cluster clusters = 1 [(google.api.field_behavior) = OUTPUT_ONLY]; 1429 1430 // Output only. This token is included in the response if there are more 1431 // results to fetch. To fetch additional results, provide this value as the 1432 // `page_token` in a subsequent `ListClustersRequest`. 1433 string next_page_token = 2 [(google.api.field_behavior) = OUTPUT_ONLY]; 1434} 1435 1436// A request to collect cluster diagnostic information. 1437message DiagnoseClusterRequest { 1438 // Required. The ID of the Google Cloud Platform project that the cluster 1439 // belongs to. 1440 string project_id = 1 [(google.api.field_behavior) = REQUIRED]; 1441 1442 // Required. The Dataproc region in which to handle the request. 1443 string region = 3 [(google.api.field_behavior) = REQUIRED]; 1444 1445 // Required. The cluster name. 1446 string cluster_name = 2 [(google.api.field_behavior) = REQUIRED]; 1447} 1448 1449// The location of diagnostic output. 1450message DiagnoseClusterResults { 1451 // Output only. The Cloud Storage URI of the diagnostic output. 1452 // The output report is a plain text file with a summary of collected 1453 // diagnostics. 1454 string output_uri = 1 [(google.api.field_behavior) = OUTPUT_ONLY]; 1455} 1456 1457// Reservation Affinity for consuming Zonal reservation. 1458message ReservationAffinity { 1459 // Indicates whether to consume capacity from an reservation or not. 1460 enum Type { 1461 TYPE_UNSPECIFIED = 0; 1462 1463 // Do not consume from any allocated capacity. 1464 NO_RESERVATION = 1; 1465 1466 // Consume any reservation available. 1467 ANY_RESERVATION = 2; 1468 1469 // Must consume from a specific reservation. Must specify key value fields 1470 // for specifying the reservations. 1471 SPECIFIC_RESERVATION = 3; 1472 } 1473 1474 // Optional. Type of reservation to consume 1475 Type consume_reservation_type = 1 [(google.api.field_behavior) = OPTIONAL]; 1476 1477 // Optional. Corresponds to the label key of reservation resource. 1478 string key = 2 [(google.api.field_behavior) = OPTIONAL]; 1479 1480 // Optional. Corresponds to the label values of reservation resource. 1481 repeated string values = 3 [(google.api.field_behavior) = OPTIONAL]; 1482} 1483