1// Copyright 2022 Google LLC
2//
3// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at
6//
7//     http://www.apache.org/licenses/LICENSE-2.0
8//
9// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS,
11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12// See the License for the specific language governing permissions and
13// limitations under the License.
14
15syntax = "proto3";
16
17package google.cloud.dataproc.v1;
18
19import "google/api/annotations.proto";
20import "google/api/client.proto";
21import "google/api/field_behavior.proto";
22import "google/longrunning/operations.proto";
23import "google/protobuf/empty.proto";
24import "google/protobuf/field_mask.proto";
25import "google/protobuf/timestamp.proto";
26
27option go_package = "cloud.google.com/go/dataproc/v2/apiv1/dataprocpb;dataprocpb";
28option java_multiple_files = true;
29option java_outer_classname = "JobsProto";
30option java_package = "com.google.cloud.dataproc.v1";
31
32// The JobController provides methods to manage jobs.
33service JobController {
34  option (google.api.default_host) = "dataproc.googleapis.com";
35  option (google.api.oauth_scopes) =
36      "https://www.googleapis.com/auth/cloud-platform";
37
38  // Submits a job to a cluster.
39  rpc SubmitJob(SubmitJobRequest) returns (Job) {
40    option (google.api.http) = {
41      post: "/v1/projects/{project_id}/regions/{region}/jobs:submit"
42      body: "*"
43    };
44    option (google.api.method_signature) = "project_id,region,job";
45  }
46
47  // Submits job to a cluster.
48  rpc SubmitJobAsOperation(SubmitJobRequest)
49      returns (google.longrunning.Operation) {
50    option (google.api.http) = {
51      post: "/v1/projects/{project_id}/regions/{region}/jobs:submitAsOperation"
52      body: "*"
53    };
54    option (google.api.method_signature) = "project_id, region, job";
55    option (google.longrunning.operation_info) = {
56      response_type: "Job"
57      metadata_type: "JobMetadata"
58    };
59  }
60
61  // Gets the resource representation for a job in a project.
62  rpc GetJob(GetJobRequest) returns (Job) {
63    option (google.api.http) = {
64      get: "/v1/projects/{project_id}/regions/{region}/jobs/{job_id}"
65    };
66    option (google.api.method_signature) = "project_id,region,job_id";
67  }
68
69  // Lists regions/{region}/jobs in a project.
70  rpc ListJobs(ListJobsRequest) returns (ListJobsResponse) {
71    option (google.api.http) = {
72      get: "/v1/projects/{project_id}/regions/{region}/jobs"
73    };
74    option (google.api.method_signature) = "project_id,region";
75    option (google.api.method_signature) = "project_id,region,filter";
76  }
77
78  // Updates a job in a project.
79  rpc UpdateJob(UpdateJobRequest) returns (Job) {
80    option (google.api.http) = {
81      patch: "/v1/projects/{project_id}/regions/{region}/jobs/{job_id}"
82      body: "job"
83    };
84  }
85
86  // Starts a job cancellation request. To access the job resource
87  // after cancellation, call
88  // [regions/{region}/jobs.list](https://cloud.google.com/dataproc/docs/reference/rest/v1/projects.regions.jobs/list)
89  // or
90  // [regions/{region}/jobs.get](https://cloud.google.com/dataproc/docs/reference/rest/v1/projects.regions.jobs/get).
91  rpc CancelJob(CancelJobRequest) returns (Job) {
92    option (google.api.http) = {
93      post: "/v1/projects/{project_id}/regions/{region}/jobs/{job_id}:cancel"
94      body: "*"
95    };
96    option (google.api.method_signature) = "project_id,region,job_id";
97  }
98
99  // Deletes the job from the project. If the job is active, the delete fails,
100  // and the response returns `FAILED_PRECONDITION`.
101  rpc DeleteJob(DeleteJobRequest) returns (google.protobuf.Empty) {
102    option (google.api.http) = {
103      delete: "/v1/projects/{project_id}/regions/{region}/jobs/{job_id}"
104    };
105    option (google.api.method_signature) = "project_id,region,job_id";
106  }
107}
108
109// The runtime logging config of the job.
110message LoggingConfig {
111  // The Log4j level for job execution. When running an
112  // [Apache Hive](https://hive.apache.org/) job, Cloud
113  // Dataproc configures the Hive client to an equivalent verbosity level.
114  enum Level {
115    // Level is unspecified. Use default level for log4j.
116    LEVEL_UNSPECIFIED = 0;
117
118    // Use ALL level for log4j.
119    ALL = 1;
120
121    // Use TRACE level for log4j.
122    TRACE = 2;
123
124    // Use DEBUG level for log4j.
125    DEBUG = 3;
126
127    // Use INFO level for log4j.
128    INFO = 4;
129
130    // Use WARN level for log4j.
131    WARN = 5;
132
133    // Use ERROR level for log4j.
134    ERROR = 6;
135
136    // Use FATAL level for log4j.
137    FATAL = 7;
138
139    // Turn off log4j.
140    OFF = 8;
141  }
142
143  // The per-package log levels for the driver. This may include
144  // "root" package name to configure rootLogger.
145  // Examples:
146  //   'com.google = FATAL', 'root = INFO', 'org.apache = DEBUG'
147  map<string, Level> driver_log_levels = 2;
148}
149
150// A Dataproc job for running
151// [Apache Hadoop
152// MapReduce](https://hadoop.apache.org/docs/current/hadoop-mapreduce-client/hadoop-mapreduce-client-core/MapReduceTutorial.html)
153// jobs on [Apache Hadoop
154// YARN](https://hadoop.apache.org/docs/r2.7.1/hadoop-yarn/hadoop-yarn-site/YARN.html).
155message HadoopJob {
156  // Required. Indicates the location of the driver's main class. Specify
157  // either the jar file that contains the main class or the main class name.
158  // To specify both, add the jar file to `jar_file_uris`, and then specify
159  // the main class name in this property.
160  oneof driver {
161    // The HCFS URI of the jar file containing the main class.
162    // Examples:
163    //     'gs://foo-bucket/analytics-binaries/extract-useful-metrics-mr.jar'
164    //     'hdfs:/tmp/test-samples/custom-wordcount.jar'
165    //     'file:///home/usr/lib/hadoop-mapreduce/hadoop-mapreduce-examples.jar'
166    string main_jar_file_uri = 1;
167
168    // The name of the driver's main class. The jar file containing the class
169    // must be in the default CLASSPATH or specified in `jar_file_uris`.
170    string main_class = 2;
171  }
172
173  // Optional. The arguments to pass to the driver. Do not
174  // include arguments, such as `-libjars` or `-Dfoo=bar`, that can be set as
175  // job properties, since a collision may occur that causes an incorrect job
176  // submission.
177  repeated string args = 3 [(google.api.field_behavior) = OPTIONAL];
178
179  // Optional. Jar file URIs to add to the CLASSPATHs of the
180  // Hadoop driver and tasks.
181  repeated string jar_file_uris = 4 [(google.api.field_behavior) = OPTIONAL];
182
183  // Optional. HCFS (Hadoop Compatible Filesystem) URIs of files to be copied
184  // to the working directory of Hadoop drivers and distributed tasks. Useful
185  // for naively parallel tasks.
186  repeated string file_uris = 5 [(google.api.field_behavior) = OPTIONAL];
187
188  // Optional. HCFS URIs of archives to be extracted in the working directory of
189  // Hadoop drivers and tasks. Supported file types:
190  // .jar, .tar, .tar.gz, .tgz, or .zip.
191  repeated string archive_uris = 6 [(google.api.field_behavior) = OPTIONAL];
192
193  // Optional. A mapping of property names to values, used to configure Hadoop.
194  // Properties that conflict with values set by the Dataproc API may be
195  // overwritten. Can include properties set in /etc/hadoop/conf/*-site and
196  // classes in user code.
197  map<string, string> properties = 7 [(google.api.field_behavior) = OPTIONAL];
198
199  // Optional. The runtime log config for job execution.
200  LoggingConfig logging_config = 8 [(google.api.field_behavior) = OPTIONAL];
201}
202
203// A Dataproc job for running [Apache Spark](https://spark.apache.org/)
204// applications on YARN.
205message SparkJob {
206  // Required. The specification of the main method to call to drive the job.
207  // Specify either the jar file that contains the main class or the main class
208  // name. To pass both a main jar and a main class in that jar, add the jar to
209  // `CommonJob.jar_file_uris`, and then specify the main class name in
210  // `main_class`.
211  oneof driver {
212    // The HCFS URI of the jar file that contains the main class.
213    string main_jar_file_uri = 1;
214
215    // The name of the driver's main class. The jar file that contains the class
216    // must be in the default CLASSPATH or specified in `jar_file_uris`.
217    string main_class = 2;
218  }
219
220  // Optional. The arguments to pass to the driver. Do not include arguments,
221  // such as `--conf`, that can be set as job properties, since a collision may
222  // occur that causes an incorrect job submission.
223  repeated string args = 3 [(google.api.field_behavior) = OPTIONAL];
224
225  // Optional. HCFS URIs of jar files to add to the CLASSPATHs of the
226  // Spark driver and tasks.
227  repeated string jar_file_uris = 4 [(google.api.field_behavior) = OPTIONAL];
228
229  // Optional. HCFS URIs of files to be placed in the working directory of
230  // each executor. Useful for naively parallel tasks.
231  repeated string file_uris = 5 [(google.api.field_behavior) = OPTIONAL];
232
233  // Optional. HCFS URIs of archives to be extracted into the working directory
234  // of each executor. Supported file types:
235  // .jar, .tar, .tar.gz, .tgz, and .zip.
236  repeated string archive_uris = 6 [(google.api.field_behavior) = OPTIONAL];
237
238  // Optional. A mapping of property names to values, used to configure Spark.
239  // Properties that conflict with values set by the Dataproc API may be
240  // overwritten. Can include properties set in
241  // /etc/spark/conf/spark-defaults.conf and classes in user code.
242  map<string, string> properties = 7 [(google.api.field_behavior) = OPTIONAL];
243
244  // Optional. The runtime log config for job execution.
245  LoggingConfig logging_config = 8 [(google.api.field_behavior) = OPTIONAL];
246}
247
248// A Dataproc job for running
249// [Apache
250// PySpark](https://spark.apache.org/docs/0.9.0/python-programming-guide.html)
251// applications on YARN.
252message PySparkJob {
253  // Required. The HCFS URI of the main Python file to use as the driver. Must
254  // be a .py file.
255  string main_python_file_uri = 1 [(google.api.field_behavior) = REQUIRED];
256
257  // Optional. The arguments to pass to the driver.  Do not include arguments,
258  // such as `--conf`, that can be set as job properties, since a collision may
259  // occur that causes an incorrect job submission.
260  repeated string args = 2 [(google.api.field_behavior) = OPTIONAL];
261
262  // Optional. HCFS file URIs of Python files to pass to the PySpark
263  // framework. Supported file types: .py, .egg, and .zip.
264  repeated string python_file_uris = 3 [(google.api.field_behavior) = OPTIONAL];
265
266  // Optional. HCFS URIs of jar files to add to the CLASSPATHs of the
267  // Python driver and tasks.
268  repeated string jar_file_uris = 4 [(google.api.field_behavior) = OPTIONAL];
269
270  // Optional. HCFS URIs of files to be placed in the working directory of
271  // each executor. Useful for naively parallel tasks.
272  repeated string file_uris = 5 [(google.api.field_behavior) = OPTIONAL];
273
274  // Optional. HCFS URIs of archives to be extracted into the working directory
275  // of each executor. Supported file types:
276  // .jar, .tar, .tar.gz, .tgz, and .zip.
277  repeated string archive_uris = 6 [(google.api.field_behavior) = OPTIONAL];
278
279  // Optional. A mapping of property names to values, used to configure PySpark.
280  // Properties that conflict with values set by the Dataproc API may be
281  // overwritten. Can include properties set in
282  // /etc/spark/conf/spark-defaults.conf and classes in user code.
283  map<string, string> properties = 7 [(google.api.field_behavior) = OPTIONAL];
284
285  // Optional. The runtime log config for job execution.
286  LoggingConfig logging_config = 8 [(google.api.field_behavior) = OPTIONAL];
287}
288
289// A list of queries to run on a cluster.
290message QueryList {
291  // Required. The queries to execute. You do not need to end a query expression
292  // with a semicolon. Multiple queries can be specified in one
293  // string by separating each with a semicolon. Here is an example of a
294  // Dataproc API snippet that uses a QueryList to specify a HiveJob:
295  //
296  //     "hiveJob": {
297  //       "queryList": {
298  //         "queries": [
299  //           "query1",
300  //           "query2",
301  //           "query3;query4",
302  //         ]
303  //       }
304  //     }
305  repeated string queries = 1 [(google.api.field_behavior) = REQUIRED];
306}
307
308// A Dataproc job for running [Apache Hive](https://hive.apache.org/)
309// queries on YARN.
310message HiveJob {
311  // Required. The sequence of Hive queries to execute, specified as either
312  // an HCFS file URI or a list of queries.
313  oneof queries {
314    // The HCFS URI of the script that contains Hive queries.
315    string query_file_uri = 1;
316
317    // A list of queries.
318    QueryList query_list = 2;
319  }
320
321  // Optional. Whether to continue executing queries if a query fails.
322  // The default value is `false`. Setting to `true` can be useful when
323  // executing independent parallel queries.
324  bool continue_on_failure = 3 [(google.api.field_behavior) = OPTIONAL];
325
326  // Optional. Mapping of query variable names to values (equivalent to the
327  // Hive command: `SET name="value";`).
328  map<string, string> script_variables = 4
329      [(google.api.field_behavior) = OPTIONAL];
330
331  // Optional. A mapping of property names and values, used to configure Hive.
332  // Properties that conflict with values set by the Dataproc API may be
333  // overwritten. Can include properties set in /etc/hadoop/conf/*-site.xml,
334  // /etc/hive/conf/hive-site.xml, and classes in user code.
335  map<string, string> properties = 5 [(google.api.field_behavior) = OPTIONAL];
336
337  // Optional. HCFS URIs of jar files to add to the CLASSPATH of the
338  // Hive server and Hadoop MapReduce (MR) tasks. Can contain Hive SerDes
339  // and UDFs.
340  repeated string jar_file_uris = 6 [(google.api.field_behavior) = OPTIONAL];
341}
342
343// A Dataproc job for running [Apache Spark
344// SQL](https://spark.apache.org/sql/) queries.
345message SparkSqlJob {
346  // Required. The sequence of Spark SQL queries to execute, specified as
347  // either an HCFS file URI or as a list of queries.
348  oneof queries {
349    // The HCFS URI of the script that contains SQL queries.
350    string query_file_uri = 1;
351
352    // A list of queries.
353    QueryList query_list = 2;
354  }
355
356  // Optional. Mapping of query variable names to values (equivalent to the
357  // Spark SQL command: SET `name="value";`).
358  map<string, string> script_variables = 3
359      [(google.api.field_behavior) = OPTIONAL];
360
361  // Optional. A mapping of property names to values, used to configure
362  // Spark SQL's SparkConf. Properties that conflict with values set by the
363  // Dataproc API may be overwritten.
364  map<string, string> properties = 4 [(google.api.field_behavior) = OPTIONAL];
365
366  // Optional. HCFS URIs of jar files to be added to the Spark CLASSPATH.
367  repeated string jar_file_uris = 56 [(google.api.field_behavior) = OPTIONAL];
368
369  // Optional. The runtime log config for job execution.
370  LoggingConfig logging_config = 6 [(google.api.field_behavior) = OPTIONAL];
371}
372
373// A Dataproc job for running [Apache Pig](https://pig.apache.org/)
374// queries on YARN.
375message PigJob {
376  // Required. The sequence of Pig queries to execute, specified as an HCFS
377  // file URI or a list of queries.
378  oneof queries {
379    // The HCFS URI of the script that contains the Pig queries.
380    string query_file_uri = 1;
381
382    // A list of queries.
383    QueryList query_list = 2;
384  }
385
386  // Optional. Whether to continue executing queries if a query fails.
387  // The default value is `false`. Setting to `true` can be useful when
388  // executing independent parallel queries.
389  bool continue_on_failure = 3 [(google.api.field_behavior) = OPTIONAL];
390
391  // Optional. Mapping of query variable names to values (equivalent to the Pig
392  // command: `name=[value]`).
393  map<string, string> script_variables = 4
394      [(google.api.field_behavior) = OPTIONAL];
395
396  // Optional. A mapping of property names to values, used to configure Pig.
397  // Properties that conflict with values set by the Dataproc API may be
398  // overwritten. Can include properties set in /etc/hadoop/conf/*-site.xml,
399  // /etc/pig/conf/pig.properties, and classes in user code.
400  map<string, string> properties = 5 [(google.api.field_behavior) = OPTIONAL];
401
402  // Optional. HCFS URIs of jar files to add to the CLASSPATH of
403  // the Pig Client and Hadoop MapReduce (MR) tasks. Can contain Pig UDFs.
404  repeated string jar_file_uris = 6 [(google.api.field_behavior) = OPTIONAL];
405
406  // Optional. The runtime log config for job execution.
407  LoggingConfig logging_config = 7 [(google.api.field_behavior) = OPTIONAL];
408}
409
410// A Dataproc job for running
411// [Apache SparkR](https://spark.apache.org/docs/latest/sparkr.html)
412// applications on YARN.
413message SparkRJob {
414  // Required. The HCFS URI of the main R file to use as the driver.
415  // Must be a .R file.
416  string main_r_file_uri = 1 [(google.api.field_behavior) = REQUIRED];
417
418  // Optional. The arguments to pass to the driver.  Do not include arguments,
419  // such as `--conf`, that can be set as job properties, since a collision may
420  // occur that causes an incorrect job submission.
421  repeated string args = 2 [(google.api.field_behavior) = OPTIONAL];
422
423  // Optional. HCFS URIs of files to be placed in the working directory of
424  // each executor. Useful for naively parallel tasks.
425  repeated string file_uris = 3 [(google.api.field_behavior) = OPTIONAL];
426
427  // Optional. HCFS URIs of archives to be extracted into the working directory
428  // of each executor. Supported file types:
429  // .jar, .tar, .tar.gz, .tgz, and .zip.
430  repeated string archive_uris = 4 [(google.api.field_behavior) = OPTIONAL];
431
432  // Optional. A mapping of property names to values, used to configure SparkR.
433  // Properties that conflict with values set by the Dataproc API may be
434  // overwritten. Can include properties set in
435  // /etc/spark/conf/spark-defaults.conf and classes in user code.
436  map<string, string> properties = 5 [(google.api.field_behavior) = OPTIONAL];
437
438  // Optional. The runtime log config for job execution.
439  LoggingConfig logging_config = 6 [(google.api.field_behavior) = OPTIONAL];
440}
441
442// A Dataproc job for running [Presto](https://prestosql.io/) queries.
443// **IMPORTANT**: The [Dataproc Presto Optional
444// Component](https://cloud.google.com/dataproc/docs/concepts/components/presto)
445// must be enabled when the cluster is created to submit a Presto job to the
446// cluster.
447message PrestoJob {
448  // Required. The sequence of Presto queries to execute, specified as
449  // either an HCFS file URI or as a list of queries.
450  oneof queries {
451    // The HCFS URI of the script that contains SQL queries.
452    string query_file_uri = 1;
453
454    // A list of queries.
455    QueryList query_list = 2;
456  }
457
458  // Optional. Whether to continue executing queries if a query fails.
459  // The default value is `false`. Setting to `true` can be useful when
460  // executing independent parallel queries.
461  bool continue_on_failure = 3 [(google.api.field_behavior) = OPTIONAL];
462
463  // Optional. The format in which query output will be displayed. See the
464  // Presto documentation for supported output formats
465  string output_format = 4 [(google.api.field_behavior) = OPTIONAL];
466
467  // Optional. Presto client tags to attach to this query
468  repeated string client_tags = 5 [(google.api.field_behavior) = OPTIONAL];
469
470  // Optional. A mapping of property names to values. Used to set Presto
471  // [session properties](https://prestodb.io/docs/current/sql/set-session.html)
472  // Equivalent to using the --session flag in the Presto CLI
473  map<string, string> properties = 6 [(google.api.field_behavior) = OPTIONAL];
474
475  // Optional. The runtime log config for job execution.
476  LoggingConfig logging_config = 7 [(google.api.field_behavior) = OPTIONAL];
477}
478
479// A Dataproc job for running [Trino](https://trino.io/) queries.
480// **IMPORTANT**: The [Dataproc Trino Optional
481// Component](https://cloud.google.com/dataproc/docs/concepts/components/trino)
482// must be enabled when the cluster is created to submit a Trino job to the
483// cluster.
484message TrinoJob {
485  // Required. The sequence of Trino queries to execute, specified as
486  // either an HCFS file URI or as a list of queries.
487  oneof queries {
488    // The HCFS URI of the script that contains SQL queries.
489    string query_file_uri = 1;
490
491    // A list of queries.
492    QueryList query_list = 2;
493  }
494
495  // Optional. Whether to continue executing queries if a query fails.
496  // The default value is `false`. Setting to `true` can be useful when
497  // executing independent parallel queries.
498  bool continue_on_failure = 3 [(google.api.field_behavior) = OPTIONAL];
499
500  // Optional. The format in which query output will be displayed. See the
501  // Trino documentation for supported output formats
502  string output_format = 4 [(google.api.field_behavior) = OPTIONAL];
503
504  // Optional. Trino client tags to attach to this query
505  repeated string client_tags = 5 [(google.api.field_behavior) = OPTIONAL];
506
507  // Optional. A mapping of property names to values. Used to set Trino
508  // [session properties](https://trino.io/docs/current/sql/set-session.html)
509  // Equivalent to using the --session flag in the Trino CLI
510  map<string, string> properties = 6 [(google.api.field_behavior) = OPTIONAL];
511
512  // Optional. The runtime log config for job execution.
513  LoggingConfig logging_config = 7 [(google.api.field_behavior) = OPTIONAL];
514}
515
516// Dataproc job config.
517message JobPlacement {
518  // Required. The name of the cluster where the job will be submitted.
519  string cluster_name = 1 [(google.api.field_behavior) = REQUIRED];
520
521  // Output only. A cluster UUID generated by the Dataproc service when
522  // the job is submitted.
523  string cluster_uuid = 2 [(google.api.field_behavior) = OUTPUT_ONLY];
524
525  // Optional. Cluster labels to identify a cluster where the job will be
526  // submitted.
527  map<string, string> cluster_labels = 3
528      [(google.api.field_behavior) = OPTIONAL];
529}
530
531// Dataproc job status.
532message JobStatus {
533  // The job state.
534  enum State {
535    // The job state is unknown.
536    STATE_UNSPECIFIED = 0;
537
538    // The job is pending; it has been submitted, but is not yet running.
539    PENDING = 1;
540
541    // Job has been received by the service and completed initial setup;
542    // it will soon be submitted to the cluster.
543    SETUP_DONE = 8;
544
545    // The job is running on the cluster.
546    RUNNING = 2;
547
548    // A CancelJob request has been received, but is pending.
549    CANCEL_PENDING = 3;
550
551    // Transient in-flight resources have been canceled, and the request to
552    // cancel the running job has been issued to the cluster.
553    CANCEL_STARTED = 7;
554
555    // The job cancellation was successful.
556    CANCELLED = 4;
557
558    // The job has completed successfully.
559    DONE = 5;
560
561    // The job has completed, but encountered an error.
562    ERROR = 6;
563
564    // Job attempt has failed. The detail field contains failure details for
565    // this attempt.
566    //
567    // Applies to restartable jobs only.
568    ATTEMPT_FAILURE = 9;
569  }
570
571  // The job substate.
572  enum Substate {
573    // The job substate is unknown.
574    UNSPECIFIED = 0;
575
576    // The Job is submitted to the agent.
577    //
578    // Applies to RUNNING state.
579    SUBMITTED = 1;
580
581    // The Job has been received and is awaiting execution (it may be waiting
582    // for a condition to be met). See the "details" field for the reason for
583    // the delay.
584    //
585    // Applies to RUNNING state.
586    QUEUED = 2;
587
588    // The agent-reported status is out of date, which may be caused by a
589    // loss of communication between the agent and Dataproc. If the
590    // agent does not send a timely update, the job will fail.
591    //
592    // Applies to RUNNING state.
593    STALE_STATUS = 3;
594  }
595
596  // Output only. A state message specifying the overall job state.
597  State state = 1 [(google.api.field_behavior) = OUTPUT_ONLY];
598
599  // Optional. Output only. Job state details, such as an error
600  // description if the state is <code>ERROR</code>.
601  string details = 2 [
602    (google.api.field_behavior) = OUTPUT_ONLY,
603    (google.api.field_behavior) = OPTIONAL
604  ];
605
606  // Output only. The time when this state was entered.
607  google.protobuf.Timestamp state_start_time = 6
608      [(google.api.field_behavior) = OUTPUT_ONLY];
609
610  // Output only. Additional state information, which includes
611  // status reported by the agent.
612  Substate substate = 7 [(google.api.field_behavior) = OUTPUT_ONLY];
613}
614
615// Encapsulates the full scoping used to reference a job.
616message JobReference {
617  // Optional. The ID of the Google Cloud Platform project that the job belongs
618  // to. If specified, must match the request project ID.
619  string project_id = 1 [(google.api.field_behavior) = OPTIONAL];
620
621  // Optional. The job ID, which must be unique within the project.
622  //
623  // The ID must contain only letters (a-z, A-Z), numbers (0-9),
624  // underscores (_), or hyphens (-). The maximum length is 100 characters.
625  //
626  // If not specified by the caller, the job ID will be provided by the server.
627  string job_id = 2 [(google.api.field_behavior) = OPTIONAL];
628}
629
630// A YARN application created by a job. Application information is a subset of
631// <code>org.apache.hadoop.yarn.proto.YarnProtos.ApplicationReportProto</code>.
632//
633// **Beta Feature**: This report is available for testing purposes only. It may
634// be changed before final release.
635message YarnApplication {
636  // The application state, corresponding to
637  // <code>YarnProtos.YarnApplicationStateProto</code>.
638  enum State {
639    // Status is unspecified.
640    STATE_UNSPECIFIED = 0;
641
642    // Status is NEW.
643    NEW = 1;
644
645    // Status is NEW_SAVING.
646    NEW_SAVING = 2;
647
648    // Status is SUBMITTED.
649    SUBMITTED = 3;
650
651    // Status is ACCEPTED.
652    ACCEPTED = 4;
653
654    // Status is RUNNING.
655    RUNNING = 5;
656
657    // Status is FINISHED.
658    FINISHED = 6;
659
660    // Status is FAILED.
661    FAILED = 7;
662
663    // Status is KILLED.
664    KILLED = 8;
665  }
666
667  // Required. The application name.
668  string name = 1 [(google.api.field_behavior) = REQUIRED];
669
670  // Required. The application state.
671  State state = 2 [(google.api.field_behavior) = REQUIRED];
672
673  // Required. The numerical progress of the application, from 1 to 100.
674  float progress = 3 [(google.api.field_behavior) = REQUIRED];
675
676  // Optional. The HTTP URL of the ApplicationMaster, HistoryServer, or
677  // TimelineServer that provides application-specific information. The URL uses
678  // the internal hostname, and requires a proxy server for resolution and,
679  // possibly, access.
680  string tracking_url = 4 [(google.api.field_behavior) = OPTIONAL];
681}
682
683// A Dataproc job resource.
684message Job {
685  // Optional. The fully qualified reference to the job, which can be used to
686  // obtain the equivalent REST path of the job resource. If this property
687  // is not specified when a job is created, the server generates a
688  // <code>job_id</code>.
689  JobReference reference = 1 [(google.api.field_behavior) = OPTIONAL];
690
691  // Required. Job information, including how, when, and where to
692  // run the job.
693  JobPlacement placement = 2 [(google.api.field_behavior) = REQUIRED];
694
695  // Required. The application/framework-specific portion of the job.
696  oneof type_job {
697    // Optional. Job is a Hadoop job.
698    HadoopJob hadoop_job = 3 [(google.api.field_behavior) = OPTIONAL];
699
700    // Optional. Job is a Spark job.
701    SparkJob spark_job = 4 [(google.api.field_behavior) = OPTIONAL];
702
703    // Optional. Job is a PySpark job.
704    PySparkJob pyspark_job = 5 [(google.api.field_behavior) = OPTIONAL];
705
706    // Optional. Job is a Hive job.
707    HiveJob hive_job = 6 [(google.api.field_behavior) = OPTIONAL];
708
709    // Optional. Job is a Pig job.
710    PigJob pig_job = 7 [(google.api.field_behavior) = OPTIONAL];
711
712    // Optional. Job is a SparkR job.
713    SparkRJob spark_r_job = 21 [(google.api.field_behavior) = OPTIONAL];
714
715    // Optional. Job is a SparkSql job.
716    SparkSqlJob spark_sql_job = 12 [(google.api.field_behavior) = OPTIONAL];
717
718    // Optional. Job is a Presto job.
719    PrestoJob presto_job = 23 [(google.api.field_behavior) = OPTIONAL];
720
721    // Optional. Job is a Trino job.
722    TrinoJob trino_job = 28 [(google.api.field_behavior) = OPTIONAL];
723  }
724
725  // Output only. The job status. Additional application-specific
726  // status information may be contained in the <code>type_job</code>
727  // and <code>yarn_applications</code> fields.
728  JobStatus status = 8 [(google.api.field_behavior) = OUTPUT_ONLY];
729
730  // Output only. The previous job status.
731  repeated JobStatus status_history = 13
732      [(google.api.field_behavior) = OUTPUT_ONLY];
733
734  // Output only. The collection of YARN applications spun up by this job.
735  //
736  // **Beta** Feature: This report is available for testing purposes only. It
737  // may be changed before final release.
738  repeated YarnApplication yarn_applications = 9
739      [(google.api.field_behavior) = OUTPUT_ONLY];
740
741  // Output only. A URI pointing to the location of the stdout of the job's
742  // driver program.
743  string driver_output_resource_uri = 17
744      [(google.api.field_behavior) = OUTPUT_ONLY];
745
746  // Output only. If present, the location of miscellaneous control files
747  // which may be used as part of job setup and handling. If not present,
748  // control files may be placed in the same location as `driver_output_uri`.
749  string driver_control_files_uri = 15
750      [(google.api.field_behavior) = OUTPUT_ONLY];
751
752  // Optional. The labels to associate with this job.
753  // Label **keys** must contain 1 to 63 characters, and must conform to
754  // [RFC 1035](https://www.ietf.org/rfc/rfc1035.txt).
755  // Label **values** may be empty, but, if present, must contain 1 to 63
756  // characters, and must conform to [RFC
757  // 1035](https://www.ietf.org/rfc/rfc1035.txt). No more than 32 labels can be
758  // associated with a job.
759  map<string, string> labels = 18 [(google.api.field_behavior) = OPTIONAL];
760
761  // Optional. Job scheduling configuration.
762  JobScheduling scheduling = 20 [(google.api.field_behavior) = OPTIONAL];
763
764  // Output only. A UUID that uniquely identifies a job within the project
765  // over time. This is in contrast to a user-settable reference.job_id that
766  // may be reused over time.
767  string job_uuid = 22 [(google.api.field_behavior) = OUTPUT_ONLY];
768
769  // Output only. Indicates whether the job is completed. If the value is
770  // `false`, the job is still in progress. If `true`, the job is completed, and
771  // `status.state` field will indicate if it was successful, failed,
772  // or cancelled.
773  bool done = 24 [(google.api.field_behavior) = OUTPUT_ONLY];
774
775  // Optional. Driver scheduling configuration.
776  DriverSchedulingConfig driver_scheduling_config = 27
777      [(google.api.field_behavior) = OPTIONAL];
778}
779
780// Driver scheduling configuration.
781message DriverSchedulingConfig {
782  // Required. The amount of memory in MB the driver is requesting.
783  int32 memory_mb = 1 [(google.api.field_behavior) = REQUIRED];
784
785  // Required. The number of vCPUs the driver is requesting.
786  int32 vcores = 2 [(google.api.field_behavior) = REQUIRED];
787}
788
789// Job scheduling options.
790message JobScheduling {
791  // Optional. Maximum number of times per hour a driver may be restarted as
792  // a result of driver exiting with non-zero code before job is
793  // reported failed.
794  //
795  // A job may be reported as thrashing if the driver exits with a non-zero code
796  // four times within a 10-minute window.
797  //
798  // Maximum value is 10.
799  //
800  // **Note:** This restartable job option is not supported in Dataproc
801  // [workflow templates]
802  // (https://cloud.google.com/dataproc/docs/concepts/workflows/using-workflows#adding_jobs_to_a_template).
803  int32 max_failures_per_hour = 1 [(google.api.field_behavior) = OPTIONAL];
804
805  // Optional. Maximum total number of times a driver may be restarted as a
806  // result of the driver exiting with a non-zero code. After the maximum number
807  // is reached, the job will be reported as failed.
808  //
809  // Maximum value is 240.
810  //
811  // **Note:** Currently, this restartable job option is
812  // not supported in Dataproc
813  // [workflow
814  // templates](https://cloud.google.com/dataproc/docs/concepts/workflows/using-workflows#adding_jobs_to_a_template).
815  int32 max_failures_total = 2 [(google.api.field_behavior) = OPTIONAL];
816}
817
818// A request to submit a job.
819message SubmitJobRequest {
820  // Required. The ID of the Google Cloud Platform project that the job
821  // belongs to.
822  string project_id = 1 [(google.api.field_behavior) = REQUIRED];
823
824  // Required. The Dataproc region in which to handle the request.
825  string region = 3 [(google.api.field_behavior) = REQUIRED];
826
827  // Required. The job resource.
828  Job job = 2 [(google.api.field_behavior) = REQUIRED];
829
830  // Optional. A unique id used to identify the request. If the server
831  // receives two
832  // [SubmitJobRequest](https://cloud.google.com/dataproc/docs/reference/rpc/google.cloud.dataproc.v1#google.cloud.dataproc.v1.SubmitJobRequest)s
833  // with the same id, then the second request will be ignored and the
834  // first [Job][google.cloud.dataproc.v1.Job] created and stored in the backend
835  // is returned.
836  //
837  // It is recommended to always set this value to a
838  // [UUID](https://en.wikipedia.org/wiki/Universally_unique_identifier).
839  //
840  // The id must contain only letters (a-z, A-Z), numbers (0-9),
841  // underscores (_), and hyphens (-). The maximum length is 40 characters.
842  string request_id = 4 [(google.api.field_behavior) = OPTIONAL];
843}
844
845// Job Operation metadata.
846message JobMetadata {
847  // Output only. The job id.
848  string job_id = 1 [(google.api.field_behavior) = OUTPUT_ONLY];
849
850  // Output only. Most recent job status.
851  JobStatus status = 2 [(google.api.field_behavior) = OUTPUT_ONLY];
852
853  // Output only. Operation type.
854  string operation_type = 3 [(google.api.field_behavior) = OUTPUT_ONLY];
855
856  // Output only. Job submission time.
857  google.protobuf.Timestamp start_time = 4
858      [(google.api.field_behavior) = OUTPUT_ONLY];
859}
860
861// A request to get the resource representation for a job in a project.
862message GetJobRequest {
863  // Required. The ID of the Google Cloud Platform project that the job
864  // belongs to.
865  string project_id = 1 [(google.api.field_behavior) = REQUIRED];
866
867  // Required. The Dataproc region in which to handle the request.
868  string region = 3 [(google.api.field_behavior) = REQUIRED];
869
870  // Required. The job ID.
871  string job_id = 2 [(google.api.field_behavior) = REQUIRED];
872}
873
874// A request to list jobs in a project.
875message ListJobsRequest {
876  // A matcher that specifies categories of job states.
877  enum JobStateMatcher {
878    // Match all jobs, regardless of state.
879    ALL = 0;
880
881    // Only match jobs in non-terminal states: PENDING, RUNNING, or
882    // CANCEL_PENDING.
883    ACTIVE = 1;
884
885    // Only match jobs in terminal states: CANCELLED, DONE, or ERROR.
886    NON_ACTIVE = 2;
887  }
888
889  // Required. The ID of the Google Cloud Platform project that the job
890  // belongs to.
891  string project_id = 1 [(google.api.field_behavior) = REQUIRED];
892
893  // Required. The Dataproc region in which to handle the request.
894  string region = 6 [(google.api.field_behavior) = REQUIRED];
895
896  // Optional. The number of results to return in each response.
897  int32 page_size = 2 [(google.api.field_behavior) = OPTIONAL];
898
899  // Optional. The page token, returned by a previous call, to request the
900  // next page of results.
901  string page_token = 3 [(google.api.field_behavior) = OPTIONAL];
902
903  // Optional. If set, the returned jobs list includes only jobs that were
904  // submitted to the named cluster.
905  string cluster_name = 4 [(google.api.field_behavior) = OPTIONAL];
906
907  // Optional. Specifies enumerated categories of jobs to list.
908  // (default = match ALL jobs).
909  //
910  // If `filter` is provided, `jobStateMatcher` will be ignored.
911  JobStateMatcher job_state_matcher = 5
912      [(google.api.field_behavior) = OPTIONAL];
913
914  // Optional. A filter constraining the jobs to list. Filters are
915  // case-sensitive and have the following syntax:
916  //
917  // [field = value] AND [field [= value]] ...
918  //
919  // where **field** is `status.state` or `labels.[KEY]`, and `[KEY]` is a label
920  // key. **value** can be `*` to match all values.
921  // `status.state` can be either `ACTIVE` or `NON_ACTIVE`.
922  // Only the logical `AND` operator is supported; space-separated items are
923  // treated as having an implicit `AND` operator.
924  //
925  // Example filter:
926  //
927  // status.state = ACTIVE AND labels.env = staging AND labels.starred = *
928  string filter = 7 [(google.api.field_behavior) = OPTIONAL];
929}
930
931// A request to update a job.
932message UpdateJobRequest {
933  // Required. The ID of the Google Cloud Platform project that the job
934  // belongs to.
935  string project_id = 1 [(google.api.field_behavior) = REQUIRED];
936
937  // Required. The Dataproc region in which to handle the request.
938  string region = 2 [(google.api.field_behavior) = REQUIRED];
939
940  // Required. The job ID.
941  string job_id = 3 [(google.api.field_behavior) = REQUIRED];
942
943  // Required. The changes to the job.
944  Job job = 4 [(google.api.field_behavior) = REQUIRED];
945
946  // Required. Specifies the path, relative to <code>Job</code>, of
947  // the field to update. For example, to update the labels of a Job the
948  // <code>update_mask</code> parameter would be specified as
949  // <code>labels</code>, and the `PATCH` request body would specify the new
950  // value. <strong>Note:</strong> Currently, <code>labels</code> is the only
951  // field that can be updated.
952  google.protobuf.FieldMask update_mask = 5
953      [(google.api.field_behavior) = REQUIRED];
954}
955
956// A list of jobs in a project.
957message ListJobsResponse {
958  // Output only. Jobs list.
959  repeated Job jobs = 1 [(google.api.field_behavior) = OUTPUT_ONLY];
960
961  // Optional. This token is included in the response if there are more results
962  // to fetch. To fetch additional results, provide this value as the
963  // `page_token` in a subsequent <code>ListJobsRequest</code>.
964  string next_page_token = 2 [(google.api.field_behavior) = OPTIONAL];
965}
966
967// A request to cancel a job.
968message CancelJobRequest {
969  // Required. The ID of the Google Cloud Platform project that the job
970  // belongs to.
971  string project_id = 1 [(google.api.field_behavior) = REQUIRED];
972
973  // Required. The Dataproc region in which to handle the request.
974  string region = 3 [(google.api.field_behavior) = REQUIRED];
975
976  // Required. The job ID.
977  string job_id = 2 [(google.api.field_behavior) = REQUIRED];
978}
979
980// A request to delete a job.
981message DeleteJobRequest {
982  // Required. The ID of the Google Cloud Platform project that the job
983  // belongs to.
984  string project_id = 1 [(google.api.field_behavior) = REQUIRED];
985
986  // Required. The Dataproc region in which to handle the request.
987  string region = 3 [(google.api.field_behavior) = REQUIRED];
988
989  // Required. The job ID.
990  string job_id = 2 [(google.api.field_behavior) = REQUIRED];
991}
992