1// Copyright 2022 Google LLC 2// 3// Licensed under the Apache License, Version 2.0 (the "License"); 4// you may not use this file except in compliance with the License. 5// You may obtain a copy of the License at 6// 7// http://www.apache.org/licenses/LICENSE-2.0 8// 9// Unless required by applicable law or agreed to in writing, software 10// distributed under the License is distributed on an "AS IS" BASIS, 11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12// See the License for the specific language governing permissions and 13// limitations under the License. 14 15syntax = "proto3"; 16 17package google.cloud.dataproc.v1; 18 19import "google/api/annotations.proto"; 20import "google/api/client.proto"; 21import "google/api/field_behavior.proto"; 22import "google/longrunning/operations.proto"; 23import "google/protobuf/empty.proto"; 24import "google/protobuf/field_mask.proto"; 25import "google/protobuf/timestamp.proto"; 26 27option go_package = "cloud.google.com/go/dataproc/v2/apiv1/dataprocpb;dataprocpb"; 28option java_multiple_files = true; 29option java_outer_classname = "JobsProto"; 30option java_package = "com.google.cloud.dataproc.v1"; 31 32// The JobController provides methods to manage jobs. 33service JobController { 34 option (google.api.default_host) = "dataproc.googleapis.com"; 35 option (google.api.oauth_scopes) = 36 "https://www.googleapis.com/auth/cloud-platform"; 37 38 // Submits a job to a cluster. 39 rpc SubmitJob(SubmitJobRequest) returns (Job) { 40 option (google.api.http) = { 41 post: "/v1/projects/{project_id}/regions/{region}/jobs:submit" 42 body: "*" 43 }; 44 option (google.api.method_signature) = "project_id,region,job"; 45 } 46 47 // Submits job to a cluster. 48 rpc SubmitJobAsOperation(SubmitJobRequest) 49 returns (google.longrunning.Operation) { 50 option (google.api.http) = { 51 post: "/v1/projects/{project_id}/regions/{region}/jobs:submitAsOperation" 52 body: "*" 53 }; 54 option (google.api.method_signature) = "project_id, region, job"; 55 option (google.longrunning.operation_info) = { 56 response_type: "Job" 57 metadata_type: "JobMetadata" 58 }; 59 } 60 61 // Gets the resource representation for a job in a project. 62 rpc GetJob(GetJobRequest) returns (Job) { 63 option (google.api.http) = { 64 get: "/v1/projects/{project_id}/regions/{region}/jobs/{job_id}" 65 }; 66 option (google.api.method_signature) = "project_id,region,job_id"; 67 } 68 69 // Lists regions/{region}/jobs in a project. 70 rpc ListJobs(ListJobsRequest) returns (ListJobsResponse) { 71 option (google.api.http) = { 72 get: "/v1/projects/{project_id}/regions/{region}/jobs" 73 }; 74 option (google.api.method_signature) = "project_id,region"; 75 option (google.api.method_signature) = "project_id,region,filter"; 76 } 77 78 // Updates a job in a project. 79 rpc UpdateJob(UpdateJobRequest) returns (Job) { 80 option (google.api.http) = { 81 patch: "/v1/projects/{project_id}/regions/{region}/jobs/{job_id}" 82 body: "job" 83 }; 84 } 85 86 // Starts a job cancellation request. To access the job resource 87 // after cancellation, call 88 // [regions/{region}/jobs.list](https://cloud.google.com/dataproc/docs/reference/rest/v1/projects.regions.jobs/list) 89 // or 90 // [regions/{region}/jobs.get](https://cloud.google.com/dataproc/docs/reference/rest/v1/projects.regions.jobs/get). 91 rpc CancelJob(CancelJobRequest) returns (Job) { 92 option (google.api.http) = { 93 post: "/v1/projects/{project_id}/regions/{region}/jobs/{job_id}:cancel" 94 body: "*" 95 }; 96 option (google.api.method_signature) = "project_id,region,job_id"; 97 } 98 99 // Deletes the job from the project. If the job is active, the delete fails, 100 // and the response returns `FAILED_PRECONDITION`. 101 rpc DeleteJob(DeleteJobRequest) returns (google.protobuf.Empty) { 102 option (google.api.http) = { 103 delete: "/v1/projects/{project_id}/regions/{region}/jobs/{job_id}" 104 }; 105 option (google.api.method_signature) = "project_id,region,job_id"; 106 } 107} 108 109// The runtime logging config of the job. 110message LoggingConfig { 111 // The Log4j level for job execution. When running an 112 // [Apache Hive](https://hive.apache.org/) job, Cloud 113 // Dataproc configures the Hive client to an equivalent verbosity level. 114 enum Level { 115 // Level is unspecified. Use default level for log4j. 116 LEVEL_UNSPECIFIED = 0; 117 118 // Use ALL level for log4j. 119 ALL = 1; 120 121 // Use TRACE level for log4j. 122 TRACE = 2; 123 124 // Use DEBUG level for log4j. 125 DEBUG = 3; 126 127 // Use INFO level for log4j. 128 INFO = 4; 129 130 // Use WARN level for log4j. 131 WARN = 5; 132 133 // Use ERROR level for log4j. 134 ERROR = 6; 135 136 // Use FATAL level for log4j. 137 FATAL = 7; 138 139 // Turn off log4j. 140 OFF = 8; 141 } 142 143 // The per-package log levels for the driver. This may include 144 // "root" package name to configure rootLogger. 145 // Examples: 146 // 'com.google = FATAL', 'root = INFO', 'org.apache = DEBUG' 147 map<string, Level> driver_log_levels = 2; 148} 149 150// A Dataproc job for running 151// [Apache Hadoop 152// MapReduce](https://hadoop.apache.org/docs/current/hadoop-mapreduce-client/hadoop-mapreduce-client-core/MapReduceTutorial.html) 153// jobs on [Apache Hadoop 154// YARN](https://hadoop.apache.org/docs/r2.7.1/hadoop-yarn/hadoop-yarn-site/YARN.html). 155message HadoopJob { 156 // Required. Indicates the location of the driver's main class. Specify 157 // either the jar file that contains the main class or the main class name. 158 // To specify both, add the jar file to `jar_file_uris`, and then specify 159 // the main class name in this property. 160 oneof driver { 161 // The HCFS URI of the jar file containing the main class. 162 // Examples: 163 // 'gs://foo-bucket/analytics-binaries/extract-useful-metrics-mr.jar' 164 // 'hdfs:/tmp/test-samples/custom-wordcount.jar' 165 // 'file:///home/usr/lib/hadoop-mapreduce/hadoop-mapreduce-examples.jar' 166 string main_jar_file_uri = 1; 167 168 // The name of the driver's main class. The jar file containing the class 169 // must be in the default CLASSPATH or specified in `jar_file_uris`. 170 string main_class = 2; 171 } 172 173 // Optional. The arguments to pass to the driver. Do not 174 // include arguments, such as `-libjars` or `-Dfoo=bar`, that can be set as 175 // job properties, since a collision may occur that causes an incorrect job 176 // submission. 177 repeated string args = 3 [(google.api.field_behavior) = OPTIONAL]; 178 179 // Optional. Jar file URIs to add to the CLASSPATHs of the 180 // Hadoop driver and tasks. 181 repeated string jar_file_uris = 4 [(google.api.field_behavior) = OPTIONAL]; 182 183 // Optional. HCFS (Hadoop Compatible Filesystem) URIs of files to be copied 184 // to the working directory of Hadoop drivers and distributed tasks. Useful 185 // for naively parallel tasks. 186 repeated string file_uris = 5 [(google.api.field_behavior) = OPTIONAL]; 187 188 // Optional. HCFS URIs of archives to be extracted in the working directory of 189 // Hadoop drivers and tasks. Supported file types: 190 // .jar, .tar, .tar.gz, .tgz, or .zip. 191 repeated string archive_uris = 6 [(google.api.field_behavior) = OPTIONAL]; 192 193 // Optional. A mapping of property names to values, used to configure Hadoop. 194 // Properties that conflict with values set by the Dataproc API may be 195 // overwritten. Can include properties set in /etc/hadoop/conf/*-site and 196 // classes in user code. 197 map<string, string> properties = 7 [(google.api.field_behavior) = OPTIONAL]; 198 199 // Optional. The runtime log config for job execution. 200 LoggingConfig logging_config = 8 [(google.api.field_behavior) = OPTIONAL]; 201} 202 203// A Dataproc job for running [Apache Spark](https://spark.apache.org/) 204// applications on YARN. 205message SparkJob { 206 // Required. The specification of the main method to call to drive the job. 207 // Specify either the jar file that contains the main class or the main class 208 // name. To pass both a main jar and a main class in that jar, add the jar to 209 // `CommonJob.jar_file_uris`, and then specify the main class name in 210 // `main_class`. 211 oneof driver { 212 // The HCFS URI of the jar file that contains the main class. 213 string main_jar_file_uri = 1; 214 215 // The name of the driver's main class. The jar file that contains the class 216 // must be in the default CLASSPATH or specified in `jar_file_uris`. 217 string main_class = 2; 218 } 219 220 // Optional. The arguments to pass to the driver. Do not include arguments, 221 // such as `--conf`, that can be set as job properties, since a collision may 222 // occur that causes an incorrect job submission. 223 repeated string args = 3 [(google.api.field_behavior) = OPTIONAL]; 224 225 // Optional. HCFS URIs of jar files to add to the CLASSPATHs of the 226 // Spark driver and tasks. 227 repeated string jar_file_uris = 4 [(google.api.field_behavior) = OPTIONAL]; 228 229 // Optional. HCFS URIs of files to be placed in the working directory of 230 // each executor. Useful for naively parallel tasks. 231 repeated string file_uris = 5 [(google.api.field_behavior) = OPTIONAL]; 232 233 // Optional. HCFS URIs of archives to be extracted into the working directory 234 // of each executor. Supported file types: 235 // .jar, .tar, .tar.gz, .tgz, and .zip. 236 repeated string archive_uris = 6 [(google.api.field_behavior) = OPTIONAL]; 237 238 // Optional. A mapping of property names to values, used to configure Spark. 239 // Properties that conflict with values set by the Dataproc API may be 240 // overwritten. Can include properties set in 241 // /etc/spark/conf/spark-defaults.conf and classes in user code. 242 map<string, string> properties = 7 [(google.api.field_behavior) = OPTIONAL]; 243 244 // Optional. The runtime log config for job execution. 245 LoggingConfig logging_config = 8 [(google.api.field_behavior) = OPTIONAL]; 246} 247 248// A Dataproc job for running 249// [Apache 250// PySpark](https://spark.apache.org/docs/0.9.0/python-programming-guide.html) 251// applications on YARN. 252message PySparkJob { 253 // Required. The HCFS URI of the main Python file to use as the driver. Must 254 // be a .py file. 255 string main_python_file_uri = 1 [(google.api.field_behavior) = REQUIRED]; 256 257 // Optional. The arguments to pass to the driver. Do not include arguments, 258 // such as `--conf`, that can be set as job properties, since a collision may 259 // occur that causes an incorrect job submission. 260 repeated string args = 2 [(google.api.field_behavior) = OPTIONAL]; 261 262 // Optional. HCFS file URIs of Python files to pass to the PySpark 263 // framework. Supported file types: .py, .egg, and .zip. 264 repeated string python_file_uris = 3 [(google.api.field_behavior) = OPTIONAL]; 265 266 // Optional. HCFS URIs of jar files to add to the CLASSPATHs of the 267 // Python driver and tasks. 268 repeated string jar_file_uris = 4 [(google.api.field_behavior) = OPTIONAL]; 269 270 // Optional. HCFS URIs of files to be placed in the working directory of 271 // each executor. Useful for naively parallel tasks. 272 repeated string file_uris = 5 [(google.api.field_behavior) = OPTIONAL]; 273 274 // Optional. HCFS URIs of archives to be extracted into the working directory 275 // of each executor. Supported file types: 276 // .jar, .tar, .tar.gz, .tgz, and .zip. 277 repeated string archive_uris = 6 [(google.api.field_behavior) = OPTIONAL]; 278 279 // Optional. A mapping of property names to values, used to configure PySpark. 280 // Properties that conflict with values set by the Dataproc API may be 281 // overwritten. Can include properties set in 282 // /etc/spark/conf/spark-defaults.conf and classes in user code. 283 map<string, string> properties = 7 [(google.api.field_behavior) = OPTIONAL]; 284 285 // Optional. The runtime log config for job execution. 286 LoggingConfig logging_config = 8 [(google.api.field_behavior) = OPTIONAL]; 287} 288 289// A list of queries to run on a cluster. 290message QueryList { 291 // Required. The queries to execute. You do not need to end a query expression 292 // with a semicolon. Multiple queries can be specified in one 293 // string by separating each with a semicolon. Here is an example of a 294 // Dataproc API snippet that uses a QueryList to specify a HiveJob: 295 // 296 // "hiveJob": { 297 // "queryList": { 298 // "queries": [ 299 // "query1", 300 // "query2", 301 // "query3;query4", 302 // ] 303 // } 304 // } 305 repeated string queries = 1 [(google.api.field_behavior) = REQUIRED]; 306} 307 308// A Dataproc job for running [Apache Hive](https://hive.apache.org/) 309// queries on YARN. 310message HiveJob { 311 // Required. The sequence of Hive queries to execute, specified as either 312 // an HCFS file URI or a list of queries. 313 oneof queries { 314 // The HCFS URI of the script that contains Hive queries. 315 string query_file_uri = 1; 316 317 // A list of queries. 318 QueryList query_list = 2; 319 } 320 321 // Optional. Whether to continue executing queries if a query fails. 322 // The default value is `false`. Setting to `true` can be useful when 323 // executing independent parallel queries. 324 bool continue_on_failure = 3 [(google.api.field_behavior) = OPTIONAL]; 325 326 // Optional. Mapping of query variable names to values (equivalent to the 327 // Hive command: `SET name="value";`). 328 map<string, string> script_variables = 4 329 [(google.api.field_behavior) = OPTIONAL]; 330 331 // Optional. A mapping of property names and values, used to configure Hive. 332 // Properties that conflict with values set by the Dataproc API may be 333 // overwritten. Can include properties set in /etc/hadoop/conf/*-site.xml, 334 // /etc/hive/conf/hive-site.xml, and classes in user code. 335 map<string, string> properties = 5 [(google.api.field_behavior) = OPTIONAL]; 336 337 // Optional. HCFS URIs of jar files to add to the CLASSPATH of the 338 // Hive server and Hadoop MapReduce (MR) tasks. Can contain Hive SerDes 339 // and UDFs. 340 repeated string jar_file_uris = 6 [(google.api.field_behavior) = OPTIONAL]; 341} 342 343// A Dataproc job for running [Apache Spark 344// SQL](https://spark.apache.org/sql/) queries. 345message SparkSqlJob { 346 // Required. The sequence of Spark SQL queries to execute, specified as 347 // either an HCFS file URI or as a list of queries. 348 oneof queries { 349 // The HCFS URI of the script that contains SQL queries. 350 string query_file_uri = 1; 351 352 // A list of queries. 353 QueryList query_list = 2; 354 } 355 356 // Optional. Mapping of query variable names to values (equivalent to the 357 // Spark SQL command: SET `name="value";`). 358 map<string, string> script_variables = 3 359 [(google.api.field_behavior) = OPTIONAL]; 360 361 // Optional. A mapping of property names to values, used to configure 362 // Spark SQL's SparkConf. Properties that conflict with values set by the 363 // Dataproc API may be overwritten. 364 map<string, string> properties = 4 [(google.api.field_behavior) = OPTIONAL]; 365 366 // Optional. HCFS URIs of jar files to be added to the Spark CLASSPATH. 367 repeated string jar_file_uris = 56 [(google.api.field_behavior) = OPTIONAL]; 368 369 // Optional. The runtime log config for job execution. 370 LoggingConfig logging_config = 6 [(google.api.field_behavior) = OPTIONAL]; 371} 372 373// A Dataproc job for running [Apache Pig](https://pig.apache.org/) 374// queries on YARN. 375message PigJob { 376 // Required. The sequence of Pig queries to execute, specified as an HCFS 377 // file URI or a list of queries. 378 oneof queries { 379 // The HCFS URI of the script that contains the Pig queries. 380 string query_file_uri = 1; 381 382 // A list of queries. 383 QueryList query_list = 2; 384 } 385 386 // Optional. Whether to continue executing queries if a query fails. 387 // The default value is `false`. Setting to `true` can be useful when 388 // executing independent parallel queries. 389 bool continue_on_failure = 3 [(google.api.field_behavior) = OPTIONAL]; 390 391 // Optional. Mapping of query variable names to values (equivalent to the Pig 392 // command: `name=[value]`). 393 map<string, string> script_variables = 4 394 [(google.api.field_behavior) = OPTIONAL]; 395 396 // Optional. A mapping of property names to values, used to configure Pig. 397 // Properties that conflict with values set by the Dataproc API may be 398 // overwritten. Can include properties set in /etc/hadoop/conf/*-site.xml, 399 // /etc/pig/conf/pig.properties, and classes in user code. 400 map<string, string> properties = 5 [(google.api.field_behavior) = OPTIONAL]; 401 402 // Optional. HCFS URIs of jar files to add to the CLASSPATH of 403 // the Pig Client and Hadoop MapReduce (MR) tasks. Can contain Pig UDFs. 404 repeated string jar_file_uris = 6 [(google.api.field_behavior) = OPTIONAL]; 405 406 // Optional. The runtime log config for job execution. 407 LoggingConfig logging_config = 7 [(google.api.field_behavior) = OPTIONAL]; 408} 409 410// A Dataproc job for running 411// [Apache SparkR](https://spark.apache.org/docs/latest/sparkr.html) 412// applications on YARN. 413message SparkRJob { 414 // Required. The HCFS URI of the main R file to use as the driver. 415 // Must be a .R file. 416 string main_r_file_uri = 1 [(google.api.field_behavior) = REQUIRED]; 417 418 // Optional. The arguments to pass to the driver. Do not include arguments, 419 // such as `--conf`, that can be set as job properties, since a collision may 420 // occur that causes an incorrect job submission. 421 repeated string args = 2 [(google.api.field_behavior) = OPTIONAL]; 422 423 // Optional. HCFS URIs of files to be placed in the working directory of 424 // each executor. Useful for naively parallel tasks. 425 repeated string file_uris = 3 [(google.api.field_behavior) = OPTIONAL]; 426 427 // Optional. HCFS URIs of archives to be extracted into the working directory 428 // of each executor. Supported file types: 429 // .jar, .tar, .tar.gz, .tgz, and .zip. 430 repeated string archive_uris = 4 [(google.api.field_behavior) = OPTIONAL]; 431 432 // Optional. A mapping of property names to values, used to configure SparkR. 433 // Properties that conflict with values set by the Dataproc API may be 434 // overwritten. Can include properties set in 435 // /etc/spark/conf/spark-defaults.conf and classes in user code. 436 map<string, string> properties = 5 [(google.api.field_behavior) = OPTIONAL]; 437 438 // Optional. The runtime log config for job execution. 439 LoggingConfig logging_config = 6 [(google.api.field_behavior) = OPTIONAL]; 440} 441 442// A Dataproc job for running [Presto](https://prestosql.io/) queries. 443// **IMPORTANT**: The [Dataproc Presto Optional 444// Component](https://cloud.google.com/dataproc/docs/concepts/components/presto) 445// must be enabled when the cluster is created to submit a Presto job to the 446// cluster. 447message PrestoJob { 448 // Required. The sequence of Presto queries to execute, specified as 449 // either an HCFS file URI or as a list of queries. 450 oneof queries { 451 // The HCFS URI of the script that contains SQL queries. 452 string query_file_uri = 1; 453 454 // A list of queries. 455 QueryList query_list = 2; 456 } 457 458 // Optional. Whether to continue executing queries if a query fails. 459 // The default value is `false`. Setting to `true` can be useful when 460 // executing independent parallel queries. 461 bool continue_on_failure = 3 [(google.api.field_behavior) = OPTIONAL]; 462 463 // Optional. The format in which query output will be displayed. See the 464 // Presto documentation for supported output formats 465 string output_format = 4 [(google.api.field_behavior) = OPTIONAL]; 466 467 // Optional. Presto client tags to attach to this query 468 repeated string client_tags = 5 [(google.api.field_behavior) = OPTIONAL]; 469 470 // Optional. A mapping of property names to values. Used to set Presto 471 // [session properties](https://prestodb.io/docs/current/sql/set-session.html) 472 // Equivalent to using the --session flag in the Presto CLI 473 map<string, string> properties = 6 [(google.api.field_behavior) = OPTIONAL]; 474 475 // Optional. The runtime log config for job execution. 476 LoggingConfig logging_config = 7 [(google.api.field_behavior) = OPTIONAL]; 477} 478 479// A Dataproc job for running [Trino](https://trino.io/) queries. 480// **IMPORTANT**: The [Dataproc Trino Optional 481// Component](https://cloud.google.com/dataproc/docs/concepts/components/trino) 482// must be enabled when the cluster is created to submit a Trino job to the 483// cluster. 484message TrinoJob { 485 // Required. The sequence of Trino queries to execute, specified as 486 // either an HCFS file URI or as a list of queries. 487 oneof queries { 488 // The HCFS URI of the script that contains SQL queries. 489 string query_file_uri = 1; 490 491 // A list of queries. 492 QueryList query_list = 2; 493 } 494 495 // Optional. Whether to continue executing queries if a query fails. 496 // The default value is `false`. Setting to `true` can be useful when 497 // executing independent parallel queries. 498 bool continue_on_failure = 3 [(google.api.field_behavior) = OPTIONAL]; 499 500 // Optional. The format in which query output will be displayed. See the 501 // Trino documentation for supported output formats 502 string output_format = 4 [(google.api.field_behavior) = OPTIONAL]; 503 504 // Optional. Trino client tags to attach to this query 505 repeated string client_tags = 5 [(google.api.field_behavior) = OPTIONAL]; 506 507 // Optional. A mapping of property names to values. Used to set Trino 508 // [session properties](https://trino.io/docs/current/sql/set-session.html) 509 // Equivalent to using the --session flag in the Trino CLI 510 map<string, string> properties = 6 [(google.api.field_behavior) = OPTIONAL]; 511 512 // Optional. The runtime log config for job execution. 513 LoggingConfig logging_config = 7 [(google.api.field_behavior) = OPTIONAL]; 514} 515 516// Dataproc job config. 517message JobPlacement { 518 // Required. The name of the cluster where the job will be submitted. 519 string cluster_name = 1 [(google.api.field_behavior) = REQUIRED]; 520 521 // Output only. A cluster UUID generated by the Dataproc service when 522 // the job is submitted. 523 string cluster_uuid = 2 [(google.api.field_behavior) = OUTPUT_ONLY]; 524 525 // Optional. Cluster labels to identify a cluster where the job will be 526 // submitted. 527 map<string, string> cluster_labels = 3 528 [(google.api.field_behavior) = OPTIONAL]; 529} 530 531// Dataproc job status. 532message JobStatus { 533 // The job state. 534 enum State { 535 // The job state is unknown. 536 STATE_UNSPECIFIED = 0; 537 538 // The job is pending; it has been submitted, but is not yet running. 539 PENDING = 1; 540 541 // Job has been received by the service and completed initial setup; 542 // it will soon be submitted to the cluster. 543 SETUP_DONE = 8; 544 545 // The job is running on the cluster. 546 RUNNING = 2; 547 548 // A CancelJob request has been received, but is pending. 549 CANCEL_PENDING = 3; 550 551 // Transient in-flight resources have been canceled, and the request to 552 // cancel the running job has been issued to the cluster. 553 CANCEL_STARTED = 7; 554 555 // The job cancellation was successful. 556 CANCELLED = 4; 557 558 // The job has completed successfully. 559 DONE = 5; 560 561 // The job has completed, but encountered an error. 562 ERROR = 6; 563 564 // Job attempt has failed. The detail field contains failure details for 565 // this attempt. 566 // 567 // Applies to restartable jobs only. 568 ATTEMPT_FAILURE = 9; 569 } 570 571 // The job substate. 572 enum Substate { 573 // The job substate is unknown. 574 UNSPECIFIED = 0; 575 576 // The Job is submitted to the agent. 577 // 578 // Applies to RUNNING state. 579 SUBMITTED = 1; 580 581 // The Job has been received and is awaiting execution (it may be waiting 582 // for a condition to be met). See the "details" field for the reason for 583 // the delay. 584 // 585 // Applies to RUNNING state. 586 QUEUED = 2; 587 588 // The agent-reported status is out of date, which may be caused by a 589 // loss of communication between the agent and Dataproc. If the 590 // agent does not send a timely update, the job will fail. 591 // 592 // Applies to RUNNING state. 593 STALE_STATUS = 3; 594 } 595 596 // Output only. A state message specifying the overall job state. 597 State state = 1 [(google.api.field_behavior) = OUTPUT_ONLY]; 598 599 // Optional. Output only. Job state details, such as an error 600 // description if the state is <code>ERROR</code>. 601 string details = 2 [ 602 (google.api.field_behavior) = OUTPUT_ONLY, 603 (google.api.field_behavior) = OPTIONAL 604 ]; 605 606 // Output only. The time when this state was entered. 607 google.protobuf.Timestamp state_start_time = 6 608 [(google.api.field_behavior) = OUTPUT_ONLY]; 609 610 // Output only. Additional state information, which includes 611 // status reported by the agent. 612 Substate substate = 7 [(google.api.field_behavior) = OUTPUT_ONLY]; 613} 614 615// Encapsulates the full scoping used to reference a job. 616message JobReference { 617 // Optional. The ID of the Google Cloud Platform project that the job belongs 618 // to. If specified, must match the request project ID. 619 string project_id = 1 [(google.api.field_behavior) = OPTIONAL]; 620 621 // Optional. The job ID, which must be unique within the project. 622 // 623 // The ID must contain only letters (a-z, A-Z), numbers (0-9), 624 // underscores (_), or hyphens (-). The maximum length is 100 characters. 625 // 626 // If not specified by the caller, the job ID will be provided by the server. 627 string job_id = 2 [(google.api.field_behavior) = OPTIONAL]; 628} 629 630// A YARN application created by a job. Application information is a subset of 631// <code>org.apache.hadoop.yarn.proto.YarnProtos.ApplicationReportProto</code>. 632// 633// **Beta Feature**: This report is available for testing purposes only. It may 634// be changed before final release. 635message YarnApplication { 636 // The application state, corresponding to 637 // <code>YarnProtos.YarnApplicationStateProto</code>. 638 enum State { 639 // Status is unspecified. 640 STATE_UNSPECIFIED = 0; 641 642 // Status is NEW. 643 NEW = 1; 644 645 // Status is NEW_SAVING. 646 NEW_SAVING = 2; 647 648 // Status is SUBMITTED. 649 SUBMITTED = 3; 650 651 // Status is ACCEPTED. 652 ACCEPTED = 4; 653 654 // Status is RUNNING. 655 RUNNING = 5; 656 657 // Status is FINISHED. 658 FINISHED = 6; 659 660 // Status is FAILED. 661 FAILED = 7; 662 663 // Status is KILLED. 664 KILLED = 8; 665 } 666 667 // Required. The application name. 668 string name = 1 [(google.api.field_behavior) = REQUIRED]; 669 670 // Required. The application state. 671 State state = 2 [(google.api.field_behavior) = REQUIRED]; 672 673 // Required. The numerical progress of the application, from 1 to 100. 674 float progress = 3 [(google.api.field_behavior) = REQUIRED]; 675 676 // Optional. The HTTP URL of the ApplicationMaster, HistoryServer, or 677 // TimelineServer that provides application-specific information. The URL uses 678 // the internal hostname, and requires a proxy server for resolution and, 679 // possibly, access. 680 string tracking_url = 4 [(google.api.field_behavior) = OPTIONAL]; 681} 682 683// A Dataproc job resource. 684message Job { 685 // Optional. The fully qualified reference to the job, which can be used to 686 // obtain the equivalent REST path of the job resource. If this property 687 // is not specified when a job is created, the server generates a 688 // <code>job_id</code>. 689 JobReference reference = 1 [(google.api.field_behavior) = OPTIONAL]; 690 691 // Required. Job information, including how, when, and where to 692 // run the job. 693 JobPlacement placement = 2 [(google.api.field_behavior) = REQUIRED]; 694 695 // Required. The application/framework-specific portion of the job. 696 oneof type_job { 697 // Optional. Job is a Hadoop job. 698 HadoopJob hadoop_job = 3 [(google.api.field_behavior) = OPTIONAL]; 699 700 // Optional. Job is a Spark job. 701 SparkJob spark_job = 4 [(google.api.field_behavior) = OPTIONAL]; 702 703 // Optional. Job is a PySpark job. 704 PySparkJob pyspark_job = 5 [(google.api.field_behavior) = OPTIONAL]; 705 706 // Optional. Job is a Hive job. 707 HiveJob hive_job = 6 [(google.api.field_behavior) = OPTIONAL]; 708 709 // Optional. Job is a Pig job. 710 PigJob pig_job = 7 [(google.api.field_behavior) = OPTIONAL]; 711 712 // Optional. Job is a SparkR job. 713 SparkRJob spark_r_job = 21 [(google.api.field_behavior) = OPTIONAL]; 714 715 // Optional. Job is a SparkSql job. 716 SparkSqlJob spark_sql_job = 12 [(google.api.field_behavior) = OPTIONAL]; 717 718 // Optional. Job is a Presto job. 719 PrestoJob presto_job = 23 [(google.api.field_behavior) = OPTIONAL]; 720 721 // Optional. Job is a Trino job. 722 TrinoJob trino_job = 28 [(google.api.field_behavior) = OPTIONAL]; 723 } 724 725 // Output only. The job status. Additional application-specific 726 // status information may be contained in the <code>type_job</code> 727 // and <code>yarn_applications</code> fields. 728 JobStatus status = 8 [(google.api.field_behavior) = OUTPUT_ONLY]; 729 730 // Output only. The previous job status. 731 repeated JobStatus status_history = 13 732 [(google.api.field_behavior) = OUTPUT_ONLY]; 733 734 // Output only. The collection of YARN applications spun up by this job. 735 // 736 // **Beta** Feature: This report is available for testing purposes only. It 737 // may be changed before final release. 738 repeated YarnApplication yarn_applications = 9 739 [(google.api.field_behavior) = OUTPUT_ONLY]; 740 741 // Output only. A URI pointing to the location of the stdout of the job's 742 // driver program. 743 string driver_output_resource_uri = 17 744 [(google.api.field_behavior) = OUTPUT_ONLY]; 745 746 // Output only. If present, the location of miscellaneous control files 747 // which may be used as part of job setup and handling. If not present, 748 // control files may be placed in the same location as `driver_output_uri`. 749 string driver_control_files_uri = 15 750 [(google.api.field_behavior) = OUTPUT_ONLY]; 751 752 // Optional. The labels to associate with this job. 753 // Label **keys** must contain 1 to 63 characters, and must conform to 754 // [RFC 1035](https://www.ietf.org/rfc/rfc1035.txt). 755 // Label **values** may be empty, but, if present, must contain 1 to 63 756 // characters, and must conform to [RFC 757 // 1035](https://www.ietf.org/rfc/rfc1035.txt). No more than 32 labels can be 758 // associated with a job. 759 map<string, string> labels = 18 [(google.api.field_behavior) = OPTIONAL]; 760 761 // Optional. Job scheduling configuration. 762 JobScheduling scheduling = 20 [(google.api.field_behavior) = OPTIONAL]; 763 764 // Output only. A UUID that uniquely identifies a job within the project 765 // over time. This is in contrast to a user-settable reference.job_id that 766 // may be reused over time. 767 string job_uuid = 22 [(google.api.field_behavior) = OUTPUT_ONLY]; 768 769 // Output only. Indicates whether the job is completed. If the value is 770 // `false`, the job is still in progress. If `true`, the job is completed, and 771 // `status.state` field will indicate if it was successful, failed, 772 // or cancelled. 773 bool done = 24 [(google.api.field_behavior) = OUTPUT_ONLY]; 774 775 // Optional. Driver scheduling configuration. 776 DriverSchedulingConfig driver_scheduling_config = 27 777 [(google.api.field_behavior) = OPTIONAL]; 778} 779 780// Driver scheduling configuration. 781message DriverSchedulingConfig { 782 // Required. The amount of memory in MB the driver is requesting. 783 int32 memory_mb = 1 [(google.api.field_behavior) = REQUIRED]; 784 785 // Required. The number of vCPUs the driver is requesting. 786 int32 vcores = 2 [(google.api.field_behavior) = REQUIRED]; 787} 788 789// Job scheduling options. 790message JobScheduling { 791 // Optional. Maximum number of times per hour a driver may be restarted as 792 // a result of driver exiting with non-zero code before job is 793 // reported failed. 794 // 795 // A job may be reported as thrashing if the driver exits with a non-zero code 796 // four times within a 10-minute window. 797 // 798 // Maximum value is 10. 799 // 800 // **Note:** This restartable job option is not supported in Dataproc 801 // [workflow templates] 802 // (https://cloud.google.com/dataproc/docs/concepts/workflows/using-workflows#adding_jobs_to_a_template). 803 int32 max_failures_per_hour = 1 [(google.api.field_behavior) = OPTIONAL]; 804 805 // Optional. Maximum total number of times a driver may be restarted as a 806 // result of the driver exiting with a non-zero code. After the maximum number 807 // is reached, the job will be reported as failed. 808 // 809 // Maximum value is 240. 810 // 811 // **Note:** Currently, this restartable job option is 812 // not supported in Dataproc 813 // [workflow 814 // templates](https://cloud.google.com/dataproc/docs/concepts/workflows/using-workflows#adding_jobs_to_a_template). 815 int32 max_failures_total = 2 [(google.api.field_behavior) = OPTIONAL]; 816} 817 818// A request to submit a job. 819message SubmitJobRequest { 820 // Required. The ID of the Google Cloud Platform project that the job 821 // belongs to. 822 string project_id = 1 [(google.api.field_behavior) = REQUIRED]; 823 824 // Required. The Dataproc region in which to handle the request. 825 string region = 3 [(google.api.field_behavior) = REQUIRED]; 826 827 // Required. The job resource. 828 Job job = 2 [(google.api.field_behavior) = REQUIRED]; 829 830 // Optional. A unique id used to identify the request. If the server 831 // receives two 832 // [SubmitJobRequest](https://cloud.google.com/dataproc/docs/reference/rpc/google.cloud.dataproc.v1#google.cloud.dataproc.v1.SubmitJobRequest)s 833 // with the same id, then the second request will be ignored and the 834 // first [Job][google.cloud.dataproc.v1.Job] created and stored in the backend 835 // is returned. 836 // 837 // It is recommended to always set this value to a 838 // [UUID](https://en.wikipedia.org/wiki/Universally_unique_identifier). 839 // 840 // The id must contain only letters (a-z, A-Z), numbers (0-9), 841 // underscores (_), and hyphens (-). The maximum length is 40 characters. 842 string request_id = 4 [(google.api.field_behavior) = OPTIONAL]; 843} 844 845// Job Operation metadata. 846message JobMetadata { 847 // Output only. The job id. 848 string job_id = 1 [(google.api.field_behavior) = OUTPUT_ONLY]; 849 850 // Output only. Most recent job status. 851 JobStatus status = 2 [(google.api.field_behavior) = OUTPUT_ONLY]; 852 853 // Output only. Operation type. 854 string operation_type = 3 [(google.api.field_behavior) = OUTPUT_ONLY]; 855 856 // Output only. Job submission time. 857 google.protobuf.Timestamp start_time = 4 858 [(google.api.field_behavior) = OUTPUT_ONLY]; 859} 860 861// A request to get the resource representation for a job in a project. 862message GetJobRequest { 863 // Required. The ID of the Google Cloud Platform project that the job 864 // belongs to. 865 string project_id = 1 [(google.api.field_behavior) = REQUIRED]; 866 867 // Required. The Dataproc region in which to handle the request. 868 string region = 3 [(google.api.field_behavior) = REQUIRED]; 869 870 // Required. The job ID. 871 string job_id = 2 [(google.api.field_behavior) = REQUIRED]; 872} 873 874// A request to list jobs in a project. 875message ListJobsRequest { 876 // A matcher that specifies categories of job states. 877 enum JobStateMatcher { 878 // Match all jobs, regardless of state. 879 ALL = 0; 880 881 // Only match jobs in non-terminal states: PENDING, RUNNING, or 882 // CANCEL_PENDING. 883 ACTIVE = 1; 884 885 // Only match jobs in terminal states: CANCELLED, DONE, or ERROR. 886 NON_ACTIVE = 2; 887 } 888 889 // Required. The ID of the Google Cloud Platform project that the job 890 // belongs to. 891 string project_id = 1 [(google.api.field_behavior) = REQUIRED]; 892 893 // Required. The Dataproc region in which to handle the request. 894 string region = 6 [(google.api.field_behavior) = REQUIRED]; 895 896 // Optional. The number of results to return in each response. 897 int32 page_size = 2 [(google.api.field_behavior) = OPTIONAL]; 898 899 // Optional. The page token, returned by a previous call, to request the 900 // next page of results. 901 string page_token = 3 [(google.api.field_behavior) = OPTIONAL]; 902 903 // Optional. If set, the returned jobs list includes only jobs that were 904 // submitted to the named cluster. 905 string cluster_name = 4 [(google.api.field_behavior) = OPTIONAL]; 906 907 // Optional. Specifies enumerated categories of jobs to list. 908 // (default = match ALL jobs). 909 // 910 // If `filter` is provided, `jobStateMatcher` will be ignored. 911 JobStateMatcher job_state_matcher = 5 912 [(google.api.field_behavior) = OPTIONAL]; 913 914 // Optional. A filter constraining the jobs to list. Filters are 915 // case-sensitive and have the following syntax: 916 // 917 // [field = value] AND [field [= value]] ... 918 // 919 // where **field** is `status.state` or `labels.[KEY]`, and `[KEY]` is a label 920 // key. **value** can be `*` to match all values. 921 // `status.state` can be either `ACTIVE` or `NON_ACTIVE`. 922 // Only the logical `AND` operator is supported; space-separated items are 923 // treated as having an implicit `AND` operator. 924 // 925 // Example filter: 926 // 927 // status.state = ACTIVE AND labels.env = staging AND labels.starred = * 928 string filter = 7 [(google.api.field_behavior) = OPTIONAL]; 929} 930 931// A request to update a job. 932message UpdateJobRequest { 933 // Required. The ID of the Google Cloud Platform project that the job 934 // belongs to. 935 string project_id = 1 [(google.api.field_behavior) = REQUIRED]; 936 937 // Required. The Dataproc region in which to handle the request. 938 string region = 2 [(google.api.field_behavior) = REQUIRED]; 939 940 // Required. The job ID. 941 string job_id = 3 [(google.api.field_behavior) = REQUIRED]; 942 943 // Required. The changes to the job. 944 Job job = 4 [(google.api.field_behavior) = REQUIRED]; 945 946 // Required. Specifies the path, relative to <code>Job</code>, of 947 // the field to update. For example, to update the labels of a Job the 948 // <code>update_mask</code> parameter would be specified as 949 // <code>labels</code>, and the `PATCH` request body would specify the new 950 // value. <strong>Note:</strong> Currently, <code>labels</code> is the only 951 // field that can be updated. 952 google.protobuf.FieldMask update_mask = 5 953 [(google.api.field_behavior) = REQUIRED]; 954} 955 956// A list of jobs in a project. 957message ListJobsResponse { 958 // Output only. Jobs list. 959 repeated Job jobs = 1 [(google.api.field_behavior) = OUTPUT_ONLY]; 960 961 // Optional. This token is included in the response if there are more results 962 // to fetch. To fetch additional results, provide this value as the 963 // `page_token` in a subsequent <code>ListJobsRequest</code>. 964 string next_page_token = 2 [(google.api.field_behavior) = OPTIONAL]; 965} 966 967// A request to cancel a job. 968message CancelJobRequest { 969 // Required. The ID of the Google Cloud Platform project that the job 970 // belongs to. 971 string project_id = 1 [(google.api.field_behavior) = REQUIRED]; 972 973 // Required. The Dataproc region in which to handle the request. 974 string region = 3 [(google.api.field_behavior) = REQUIRED]; 975 976 // Required. The job ID. 977 string job_id = 2 [(google.api.field_behavior) = REQUIRED]; 978} 979 980// A request to delete a job. 981message DeleteJobRequest { 982 // Required. The ID of the Google Cloud Platform project that the job 983 // belongs to. 984 string project_id = 1 [(google.api.field_behavior) = REQUIRED]; 985 986 // Required. The Dataproc region in which to handle the request. 987 string region = 3 [(google.api.field_behavior) = REQUIRED]; 988 989 // Required. The job ID. 990 string job_id = 2 [(google.api.field_behavior) = REQUIRED]; 991} 992