xref: /aosp_15_r20/external/googleapis/google/cloud/dataproc/v1/batches.proto (revision d5c09012810ac0c9f33fe448fb6da8260d444cc9)
1// Copyright 2022 Google LLC
2//
3// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at
6//
7//     http://www.apache.org/licenses/LICENSE-2.0
8//
9// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS,
11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12// See the License for the specific language governing permissions and
13// limitations under the License.
14
15syntax = "proto3";
16
17package google.cloud.dataproc.v1;
18
19import "google/api/annotations.proto";
20import "google/api/client.proto";
21import "google/api/field_behavior.proto";
22import "google/api/resource.proto";
23import "google/cloud/dataproc/v1/shared.proto";
24import "google/longrunning/operations.proto";
25import "google/protobuf/empty.proto";
26import "google/protobuf/timestamp.proto";
27
28option go_package = "cloud.google.com/go/dataproc/v2/apiv1/dataprocpb;dataprocpb";
29option java_multiple_files = true;
30option java_outer_classname = "BatchesProto";
31option java_package = "com.google.cloud.dataproc.v1";
32
33// The BatchController provides methods to manage batch workloads.
34service BatchController {
35  option (google.api.default_host) = "dataproc.googleapis.com";
36  option (google.api.oauth_scopes) =
37      "https://www.googleapis.com/auth/cloud-platform";
38
39  // Creates a batch workload that executes asynchronously.
40  rpc CreateBatch(CreateBatchRequest) returns (google.longrunning.Operation) {
41    option (google.api.http) = {
42      post: "/v1/{parent=projects/*/locations/*}/batches"
43      body: "batch"
44    };
45    option (google.api.method_signature) = "parent,batch,batch_id";
46    option (google.longrunning.operation_info) = {
47      response_type: "Batch"
48      metadata_type: "google.cloud.dataproc.v1.BatchOperationMetadata"
49    };
50  }
51
52  // Gets the batch workload resource representation.
53  rpc GetBatch(GetBatchRequest) returns (Batch) {
54    option (google.api.http) = {
55      get: "/v1/{name=projects/*/locations/*/batches/*}"
56    };
57    option (google.api.method_signature) = "name";
58  }
59
60  // Lists batch workloads.
61  rpc ListBatches(ListBatchesRequest) returns (ListBatchesResponse) {
62    option (google.api.http) = {
63      get: "/v1/{parent=projects/*/locations/*}/batches"
64    };
65    option (google.api.method_signature) = "parent";
66  }
67
68  // Deletes the batch workload resource. If the batch is not in terminal state,
69  // the delete fails and the response returns `FAILED_PRECONDITION`.
70  rpc DeleteBatch(DeleteBatchRequest) returns (google.protobuf.Empty) {
71    option (google.api.http) = {
72      delete: "/v1/{name=projects/*/locations/*/batches/*}"
73    };
74    option (google.api.method_signature) = "name";
75  }
76}
77
78// A request to create a batch workload.
79message CreateBatchRequest {
80  // Required. The parent resource where this batch will be created.
81  string parent = 1 [
82    (google.api.field_behavior) = REQUIRED,
83    (google.api.resource_reference) = {
84      child_type: "dataproc.googleapis.com/Batch"
85    }
86  ];
87
88  // Required. The batch to create.
89  Batch batch = 2 [(google.api.field_behavior) = REQUIRED];
90
91  // Optional. The ID to use for the batch, which will become the final
92  // component of the batch's resource name.
93  //
94  // This value must be 4-63 characters. Valid characters are `/[a-z][0-9]-/`.
95  string batch_id = 3 [(google.api.field_behavior) = OPTIONAL];
96
97  // Optional. A unique ID used to identify the request. If the service
98  // receives two
99  // [CreateBatchRequest](https://cloud.google.com/dataproc/docs/reference/rpc/google.cloud.dataproc.v1#google.cloud.dataproc.v1.CreateBatchRequest)s
100  // with the same request_id, the second request is ignored and the
101  // Operation that corresponds to the first Batch created and stored
102  // in the backend is returned.
103  //
104  // Recommendation: Set this value to a
105  // [UUID](https://en.wikipedia.org/wiki/Universally_unique_identifier).
106  //
107  // The value must contain only letters (a-z, A-Z), numbers (0-9),
108  // underscores (_), and hyphens (-). The maximum length is 40 characters.
109  string request_id = 4 [(google.api.field_behavior) = OPTIONAL];
110}
111
112// A request to get the resource representation for a batch workload.
113message GetBatchRequest {
114  // Required. The fully qualified name of the batch to retrieve
115  // in the format
116  // "projects/PROJECT_ID/locations/DATAPROC_REGION/batches/BATCH_ID"
117  string name = 1 [
118    (google.api.field_behavior) = REQUIRED,
119    (google.api.resource_reference) = { type: "dataproc.googleapis.com/Batch" }
120  ];
121}
122
123// A request to list batch workloads in a project.
124message ListBatchesRequest {
125  // Required. The parent, which owns this collection of batches.
126  string parent = 1 [
127    (google.api.field_behavior) = REQUIRED,
128    (google.api.resource_reference) = {
129      child_type: "dataproc.googleapis.com/Batch"
130    }
131  ];
132
133  // Optional. The maximum number of batches to return in each response.
134  // The service may return fewer than this value.
135  // The default page size is 20; the maximum page size is 1000.
136  int32 page_size = 2 [(google.api.field_behavior) = OPTIONAL];
137
138  // Optional. A page token received from a previous `ListBatches` call.
139  // Provide this token to retrieve the subsequent page.
140  string page_token = 3 [(google.api.field_behavior) = OPTIONAL];
141
142  // Optional. A filter for the batches to return in the response.
143  //
144  // A filter is a logical expression constraining the values of various fields
145  // in each batch resource. Filters are case sensitive, and may contain
146  // multiple clauses combined with logical operators (AND/OR).
147  // Supported fields are `batch_id`, `batch_uuid`, `state`, and `create_time`.
148  //
149  // e.g. `state = RUNNING and create_time < "2023-01-01T00:00:00Z"`
150  // filters for batches in state RUNNING that were created before 2023-01-01
151  //
152  // See https://google.aip.dev/assets/misc/ebnf-filtering.txt for a detailed
153  // description of the filter syntax and a list of supported comparisons.
154  string filter = 4 [(google.api.field_behavior) = OPTIONAL];
155
156  // Optional. Field(s) on which to sort the list of batches.
157  //
158  // Currently the only supported sort orders are unspecified (empty) and
159  // `create_time desc` to sort by most recently created batches first.
160  //
161  // See https://google.aip.dev/132#ordering for more details.
162  string order_by = 5 [(google.api.field_behavior) = OPTIONAL];
163}
164
165// A list of batch workloads.
166message ListBatchesResponse {
167  // The batches from the specified collection.
168  repeated Batch batches = 1;
169
170  // A token, which can be sent as `page_token` to retrieve the next page.
171  // If this field is omitted, there are no subsequent pages.
172  string next_page_token = 2;
173}
174
175// A request to delete a batch workload.
176message DeleteBatchRequest {
177  // Required. The fully qualified name of the batch to retrieve
178  // in the format
179  // "projects/PROJECT_ID/locations/DATAPROC_REGION/batches/BATCH_ID"
180  string name = 1 [
181    (google.api.field_behavior) = REQUIRED,
182    (google.api.resource_reference) = { type: "dataproc.googleapis.com/Batch" }
183  ];
184}
185
186// A representation of a batch workload in the service.
187message Batch {
188  option (google.api.resource) = {
189    type: "dataproc.googleapis.com/Batch"
190    pattern: "projects/{project}/locations/{location}/batches/{batch}"
191  };
192
193  // The batch state.
194  enum State {
195    // The batch state is unknown.
196    STATE_UNSPECIFIED = 0;
197
198    // The batch is created before running.
199    PENDING = 1;
200
201    // The batch is running.
202    RUNNING = 2;
203
204    // The batch is cancelling.
205    CANCELLING = 3;
206
207    // The batch cancellation was successful.
208    CANCELLED = 4;
209
210    // The batch completed successfully.
211    SUCCEEDED = 5;
212
213    // The batch is no longer running due to an error.
214    FAILED = 6;
215  }
216
217  // Historical state information.
218  message StateHistory {
219    // Output only. The state of the batch at this point in history.
220    State state = 1 [(google.api.field_behavior) = OUTPUT_ONLY];
221
222    // Output only. Details about the state at this point in history.
223    string state_message = 2 [(google.api.field_behavior) = OUTPUT_ONLY];
224
225    // Output only. The time when the batch entered the historical state.
226    google.protobuf.Timestamp state_start_time = 3
227        [(google.api.field_behavior) = OUTPUT_ONLY];
228  }
229
230  // Output only. The resource name of the batch.
231  string name = 1 [(google.api.field_behavior) = OUTPUT_ONLY];
232
233  // Output only. A batch UUID (Unique Universal Identifier). The service
234  // generates this value when it creates the batch.
235  string uuid = 2 [(google.api.field_behavior) = OUTPUT_ONLY];
236
237  // Output only. The time when the batch was created.
238  google.protobuf.Timestamp create_time = 3
239      [(google.api.field_behavior) = OUTPUT_ONLY];
240
241  // The application/framework-specific portion of the batch configuration.
242  oneof batch_config {
243    // Optional. PySpark batch config.
244    PySparkBatch pyspark_batch = 4 [(google.api.field_behavior) = OPTIONAL];
245
246    // Optional. Spark batch config.
247    SparkBatch spark_batch = 5 [(google.api.field_behavior) = OPTIONAL];
248
249    // Optional. SparkR batch config.
250    SparkRBatch spark_r_batch = 6 [(google.api.field_behavior) = OPTIONAL];
251
252    // Optional. SparkSql batch config.
253    SparkSqlBatch spark_sql_batch = 7 [(google.api.field_behavior) = OPTIONAL];
254  }
255
256  // Output only. Runtime information about batch execution.
257  RuntimeInfo runtime_info = 8 [(google.api.field_behavior) = OUTPUT_ONLY];
258
259  // Output only. The state of the batch.
260  State state = 9 [(google.api.field_behavior) = OUTPUT_ONLY];
261
262  // Output only. Batch state details, such as a failure
263  // description if the state is `FAILED`.
264  string state_message = 10 [(google.api.field_behavior) = OUTPUT_ONLY];
265
266  // Output only. The time when the batch entered a current state.
267  google.protobuf.Timestamp state_time = 11
268      [(google.api.field_behavior) = OUTPUT_ONLY];
269
270  // Output only. The email address of the user who created the batch.
271  string creator = 12 [(google.api.field_behavior) = OUTPUT_ONLY];
272
273  // Optional. The labels to associate with this batch.
274  // Label **keys** must contain 1 to 63 characters, and must conform to
275  // [RFC 1035](https://www.ietf.org/rfc/rfc1035.txt).
276  // Label **values** may be empty, but, if present, must contain 1 to 63
277  // characters, and must conform to [RFC
278  // 1035](https://www.ietf.org/rfc/rfc1035.txt). No more than 32 labels can be
279  // associated with a batch.
280  map<string, string> labels = 13 [(google.api.field_behavior) = OPTIONAL];
281
282  // Optional. Runtime configuration for the batch execution.
283  RuntimeConfig runtime_config = 14 [(google.api.field_behavior) = OPTIONAL];
284
285  // Optional. Environment configuration for the batch execution.
286  EnvironmentConfig environment_config = 15
287      [(google.api.field_behavior) = OPTIONAL];
288
289  // Output only. The resource name of the operation associated with this batch.
290  string operation = 16 [(google.api.field_behavior) = OUTPUT_ONLY];
291
292  // Output only. Historical state information for the batch.
293  repeated StateHistory state_history = 17
294      [(google.api.field_behavior) = OUTPUT_ONLY];
295}
296
297// A configuration for running an
298// [Apache
299// PySpark](https://spark.apache.org/docs/latest/api/python/getting_started/quickstart.html)
300// batch workload.
301message PySparkBatch {
302  // Required. The HCFS URI of the main Python file to use as the Spark driver.
303  // Must be a .py file.
304  string main_python_file_uri = 1 [(google.api.field_behavior) = REQUIRED];
305
306  // Optional. The arguments to pass to the driver. Do not include arguments
307  // that can be set as batch properties, such as `--conf`, since a collision
308  // can occur that causes an incorrect batch submission.
309  repeated string args = 2 [(google.api.field_behavior) = OPTIONAL];
310
311  // Optional. HCFS file URIs of Python files to pass to the PySpark
312  // framework. Supported file types: `.py`, `.egg`, and `.zip`.
313  repeated string python_file_uris = 3 [(google.api.field_behavior) = OPTIONAL];
314
315  // Optional. HCFS URIs of jar files to add to the classpath of the
316  // Spark driver and tasks.
317  repeated string jar_file_uris = 4 [(google.api.field_behavior) = OPTIONAL];
318
319  // Optional. HCFS URIs of files to be placed in the working directory of
320  // each executor.
321  repeated string file_uris = 5 [(google.api.field_behavior) = OPTIONAL];
322
323  // Optional. HCFS URIs of archives to be extracted into the working directory
324  // of each executor. Supported file types:
325  // `.jar`, `.tar`, `.tar.gz`, `.tgz`, and `.zip`.
326  repeated string archive_uris = 6 [(google.api.field_behavior) = OPTIONAL];
327}
328
329// A configuration for running an [Apache Spark](https://spark.apache.org/)
330// batch workload.
331message SparkBatch {
332  // The specification of the main method to call to drive the Spark
333  // workload. Specify either the jar file that contains the main class or the
334  // main class name. To pass both a main jar and a main class in that jar, add
335  // the jar to `jar_file_uris`, and then specify the main class
336  // name in `main_class`.
337  oneof driver {
338    // Optional. The HCFS URI of the jar file that contains the main class.
339    string main_jar_file_uri = 1 [(google.api.field_behavior) = OPTIONAL];
340
341    // Optional. The name of the driver main class. The jar file that contains
342    // the class must be in the classpath or specified in `jar_file_uris`.
343    string main_class = 2 [(google.api.field_behavior) = OPTIONAL];
344  }
345
346  // Optional. The arguments to pass to the driver. Do not include arguments
347  // that can be set as batch properties, such as `--conf`, since a collision
348  // can occur that causes an incorrect batch submission.
349  repeated string args = 3 [(google.api.field_behavior) = OPTIONAL];
350
351  // Optional. HCFS URIs of jar files to add to the classpath of the
352  // Spark driver and tasks.
353  repeated string jar_file_uris = 4 [(google.api.field_behavior) = OPTIONAL];
354
355  // Optional. HCFS URIs of files to be placed in the working directory of
356  // each executor.
357  repeated string file_uris = 5 [(google.api.field_behavior) = OPTIONAL];
358
359  // Optional. HCFS URIs of archives to be extracted into the working directory
360  // of each executor. Supported file types:
361  // `.jar`, `.tar`, `.tar.gz`, `.tgz`, and `.zip`.
362  repeated string archive_uris = 6 [(google.api.field_behavior) = OPTIONAL];
363}
364
365// A configuration for running an
366// [Apache SparkR](https://spark.apache.org/docs/latest/sparkr.html)
367// batch workload.
368message SparkRBatch {
369  // Required. The HCFS URI of the main R file to use as the driver.
370  // Must be a `.R` or `.r` file.
371  string main_r_file_uri = 1 [(google.api.field_behavior) = REQUIRED];
372
373  // Optional. The arguments to pass to the Spark driver. Do not include
374  // arguments that can be set as batch properties, such as `--conf`, since a
375  // collision can occur that causes an incorrect batch submission.
376  repeated string args = 2 [(google.api.field_behavior) = OPTIONAL];
377
378  // Optional. HCFS URIs of files to be placed in the working directory of
379  // each executor.
380  repeated string file_uris = 3 [(google.api.field_behavior) = OPTIONAL];
381
382  // Optional. HCFS URIs of archives to be extracted into the working directory
383  // of each executor. Supported file types:
384  // `.jar`, `.tar`, `.tar.gz`, `.tgz`, and `.zip`.
385  repeated string archive_uris = 4 [(google.api.field_behavior) = OPTIONAL];
386}
387
388// A configuration for running
389// [Apache Spark SQL](https://spark.apache.org/sql/) queries as a batch
390// workload.
391message SparkSqlBatch {
392  // Required. The HCFS URI of the script that contains Spark SQL queries to
393  // execute.
394  string query_file_uri = 1 [(google.api.field_behavior) = REQUIRED];
395
396  // Optional. Mapping of query variable names to values (equivalent to the
397  // Spark SQL command: `SET name="value";`).
398  map<string, string> query_variables = 2
399      [(google.api.field_behavior) = OPTIONAL];
400
401  // Optional. HCFS URIs of jar files to be added to the Spark CLASSPATH.
402  repeated string jar_file_uris = 3 [(google.api.field_behavior) = OPTIONAL];
403}
404