1// Copyright 2022 Google LLC 2// 3// Licensed under the Apache License, Version 2.0 (the "License"); 4// you may not use this file except in compliance with the License. 5// You may obtain a copy of the License at 6// 7// http://www.apache.org/licenses/LICENSE-2.0 8// 9// Unless required by applicable law or agreed to in writing, software 10// distributed under the License is distributed on an "AS IS" BASIS, 11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12// See the License for the specific language governing permissions and 13// limitations under the License. 14 15syntax = "proto3"; 16 17package google.cloud.dataproc.v1; 18 19import "google/api/annotations.proto"; 20import "google/api/client.proto"; 21import "google/api/field_behavior.proto"; 22import "google/api/resource.proto"; 23import "google/cloud/dataproc/v1/shared.proto"; 24import "google/longrunning/operations.proto"; 25import "google/protobuf/empty.proto"; 26import "google/protobuf/timestamp.proto"; 27 28option go_package = "cloud.google.com/go/dataproc/v2/apiv1/dataprocpb;dataprocpb"; 29option java_multiple_files = true; 30option java_outer_classname = "BatchesProto"; 31option java_package = "com.google.cloud.dataproc.v1"; 32 33// The BatchController provides methods to manage batch workloads. 34service BatchController { 35 option (google.api.default_host) = "dataproc.googleapis.com"; 36 option (google.api.oauth_scopes) = 37 "https://www.googleapis.com/auth/cloud-platform"; 38 39 // Creates a batch workload that executes asynchronously. 40 rpc CreateBatch(CreateBatchRequest) returns (google.longrunning.Operation) { 41 option (google.api.http) = { 42 post: "/v1/{parent=projects/*/locations/*}/batches" 43 body: "batch" 44 }; 45 option (google.api.method_signature) = "parent,batch,batch_id"; 46 option (google.longrunning.operation_info) = { 47 response_type: "Batch" 48 metadata_type: "google.cloud.dataproc.v1.BatchOperationMetadata" 49 }; 50 } 51 52 // Gets the batch workload resource representation. 53 rpc GetBatch(GetBatchRequest) returns (Batch) { 54 option (google.api.http) = { 55 get: "/v1/{name=projects/*/locations/*/batches/*}" 56 }; 57 option (google.api.method_signature) = "name"; 58 } 59 60 // Lists batch workloads. 61 rpc ListBatches(ListBatchesRequest) returns (ListBatchesResponse) { 62 option (google.api.http) = { 63 get: "/v1/{parent=projects/*/locations/*}/batches" 64 }; 65 option (google.api.method_signature) = "parent"; 66 } 67 68 // Deletes the batch workload resource. If the batch is not in terminal state, 69 // the delete fails and the response returns `FAILED_PRECONDITION`. 70 rpc DeleteBatch(DeleteBatchRequest) returns (google.protobuf.Empty) { 71 option (google.api.http) = { 72 delete: "/v1/{name=projects/*/locations/*/batches/*}" 73 }; 74 option (google.api.method_signature) = "name"; 75 } 76} 77 78// A request to create a batch workload. 79message CreateBatchRequest { 80 // Required. The parent resource where this batch will be created. 81 string parent = 1 [ 82 (google.api.field_behavior) = REQUIRED, 83 (google.api.resource_reference) = { 84 child_type: "dataproc.googleapis.com/Batch" 85 } 86 ]; 87 88 // Required. The batch to create. 89 Batch batch = 2 [(google.api.field_behavior) = REQUIRED]; 90 91 // Optional. The ID to use for the batch, which will become the final 92 // component of the batch's resource name. 93 // 94 // This value must be 4-63 characters. Valid characters are `/[a-z][0-9]-/`. 95 string batch_id = 3 [(google.api.field_behavior) = OPTIONAL]; 96 97 // Optional. A unique ID used to identify the request. If the service 98 // receives two 99 // [CreateBatchRequest](https://cloud.google.com/dataproc/docs/reference/rpc/google.cloud.dataproc.v1#google.cloud.dataproc.v1.CreateBatchRequest)s 100 // with the same request_id, the second request is ignored and the 101 // Operation that corresponds to the first Batch created and stored 102 // in the backend is returned. 103 // 104 // Recommendation: Set this value to a 105 // [UUID](https://en.wikipedia.org/wiki/Universally_unique_identifier). 106 // 107 // The value must contain only letters (a-z, A-Z), numbers (0-9), 108 // underscores (_), and hyphens (-). The maximum length is 40 characters. 109 string request_id = 4 [(google.api.field_behavior) = OPTIONAL]; 110} 111 112// A request to get the resource representation for a batch workload. 113message GetBatchRequest { 114 // Required. The fully qualified name of the batch to retrieve 115 // in the format 116 // "projects/PROJECT_ID/locations/DATAPROC_REGION/batches/BATCH_ID" 117 string name = 1 [ 118 (google.api.field_behavior) = REQUIRED, 119 (google.api.resource_reference) = { type: "dataproc.googleapis.com/Batch" } 120 ]; 121} 122 123// A request to list batch workloads in a project. 124message ListBatchesRequest { 125 // Required. The parent, which owns this collection of batches. 126 string parent = 1 [ 127 (google.api.field_behavior) = REQUIRED, 128 (google.api.resource_reference) = { 129 child_type: "dataproc.googleapis.com/Batch" 130 } 131 ]; 132 133 // Optional. The maximum number of batches to return in each response. 134 // The service may return fewer than this value. 135 // The default page size is 20; the maximum page size is 1000. 136 int32 page_size = 2 [(google.api.field_behavior) = OPTIONAL]; 137 138 // Optional. A page token received from a previous `ListBatches` call. 139 // Provide this token to retrieve the subsequent page. 140 string page_token = 3 [(google.api.field_behavior) = OPTIONAL]; 141 142 // Optional. A filter for the batches to return in the response. 143 // 144 // A filter is a logical expression constraining the values of various fields 145 // in each batch resource. Filters are case sensitive, and may contain 146 // multiple clauses combined with logical operators (AND/OR). 147 // Supported fields are `batch_id`, `batch_uuid`, `state`, and `create_time`. 148 // 149 // e.g. `state = RUNNING and create_time < "2023-01-01T00:00:00Z"` 150 // filters for batches in state RUNNING that were created before 2023-01-01 151 // 152 // See https://google.aip.dev/assets/misc/ebnf-filtering.txt for a detailed 153 // description of the filter syntax and a list of supported comparisons. 154 string filter = 4 [(google.api.field_behavior) = OPTIONAL]; 155 156 // Optional. Field(s) on which to sort the list of batches. 157 // 158 // Currently the only supported sort orders are unspecified (empty) and 159 // `create_time desc` to sort by most recently created batches first. 160 // 161 // See https://google.aip.dev/132#ordering for more details. 162 string order_by = 5 [(google.api.field_behavior) = OPTIONAL]; 163} 164 165// A list of batch workloads. 166message ListBatchesResponse { 167 // The batches from the specified collection. 168 repeated Batch batches = 1; 169 170 // A token, which can be sent as `page_token` to retrieve the next page. 171 // If this field is omitted, there are no subsequent pages. 172 string next_page_token = 2; 173} 174 175// A request to delete a batch workload. 176message DeleteBatchRequest { 177 // Required. The fully qualified name of the batch to retrieve 178 // in the format 179 // "projects/PROJECT_ID/locations/DATAPROC_REGION/batches/BATCH_ID" 180 string name = 1 [ 181 (google.api.field_behavior) = REQUIRED, 182 (google.api.resource_reference) = { type: "dataproc.googleapis.com/Batch" } 183 ]; 184} 185 186// A representation of a batch workload in the service. 187message Batch { 188 option (google.api.resource) = { 189 type: "dataproc.googleapis.com/Batch" 190 pattern: "projects/{project}/locations/{location}/batches/{batch}" 191 }; 192 193 // The batch state. 194 enum State { 195 // The batch state is unknown. 196 STATE_UNSPECIFIED = 0; 197 198 // The batch is created before running. 199 PENDING = 1; 200 201 // The batch is running. 202 RUNNING = 2; 203 204 // The batch is cancelling. 205 CANCELLING = 3; 206 207 // The batch cancellation was successful. 208 CANCELLED = 4; 209 210 // The batch completed successfully. 211 SUCCEEDED = 5; 212 213 // The batch is no longer running due to an error. 214 FAILED = 6; 215 } 216 217 // Historical state information. 218 message StateHistory { 219 // Output only. The state of the batch at this point in history. 220 State state = 1 [(google.api.field_behavior) = OUTPUT_ONLY]; 221 222 // Output only. Details about the state at this point in history. 223 string state_message = 2 [(google.api.field_behavior) = OUTPUT_ONLY]; 224 225 // Output only. The time when the batch entered the historical state. 226 google.protobuf.Timestamp state_start_time = 3 227 [(google.api.field_behavior) = OUTPUT_ONLY]; 228 } 229 230 // Output only. The resource name of the batch. 231 string name = 1 [(google.api.field_behavior) = OUTPUT_ONLY]; 232 233 // Output only. A batch UUID (Unique Universal Identifier). The service 234 // generates this value when it creates the batch. 235 string uuid = 2 [(google.api.field_behavior) = OUTPUT_ONLY]; 236 237 // Output only. The time when the batch was created. 238 google.protobuf.Timestamp create_time = 3 239 [(google.api.field_behavior) = OUTPUT_ONLY]; 240 241 // The application/framework-specific portion of the batch configuration. 242 oneof batch_config { 243 // Optional. PySpark batch config. 244 PySparkBatch pyspark_batch = 4 [(google.api.field_behavior) = OPTIONAL]; 245 246 // Optional. Spark batch config. 247 SparkBatch spark_batch = 5 [(google.api.field_behavior) = OPTIONAL]; 248 249 // Optional. SparkR batch config. 250 SparkRBatch spark_r_batch = 6 [(google.api.field_behavior) = OPTIONAL]; 251 252 // Optional. SparkSql batch config. 253 SparkSqlBatch spark_sql_batch = 7 [(google.api.field_behavior) = OPTIONAL]; 254 } 255 256 // Output only. Runtime information about batch execution. 257 RuntimeInfo runtime_info = 8 [(google.api.field_behavior) = OUTPUT_ONLY]; 258 259 // Output only. The state of the batch. 260 State state = 9 [(google.api.field_behavior) = OUTPUT_ONLY]; 261 262 // Output only. Batch state details, such as a failure 263 // description if the state is `FAILED`. 264 string state_message = 10 [(google.api.field_behavior) = OUTPUT_ONLY]; 265 266 // Output only. The time when the batch entered a current state. 267 google.protobuf.Timestamp state_time = 11 268 [(google.api.field_behavior) = OUTPUT_ONLY]; 269 270 // Output only. The email address of the user who created the batch. 271 string creator = 12 [(google.api.field_behavior) = OUTPUT_ONLY]; 272 273 // Optional. The labels to associate with this batch. 274 // Label **keys** must contain 1 to 63 characters, and must conform to 275 // [RFC 1035](https://www.ietf.org/rfc/rfc1035.txt). 276 // Label **values** may be empty, but, if present, must contain 1 to 63 277 // characters, and must conform to [RFC 278 // 1035](https://www.ietf.org/rfc/rfc1035.txt). No more than 32 labels can be 279 // associated with a batch. 280 map<string, string> labels = 13 [(google.api.field_behavior) = OPTIONAL]; 281 282 // Optional. Runtime configuration for the batch execution. 283 RuntimeConfig runtime_config = 14 [(google.api.field_behavior) = OPTIONAL]; 284 285 // Optional. Environment configuration for the batch execution. 286 EnvironmentConfig environment_config = 15 287 [(google.api.field_behavior) = OPTIONAL]; 288 289 // Output only. The resource name of the operation associated with this batch. 290 string operation = 16 [(google.api.field_behavior) = OUTPUT_ONLY]; 291 292 // Output only. Historical state information for the batch. 293 repeated StateHistory state_history = 17 294 [(google.api.field_behavior) = OUTPUT_ONLY]; 295} 296 297// A configuration for running an 298// [Apache 299// PySpark](https://spark.apache.org/docs/latest/api/python/getting_started/quickstart.html) 300// batch workload. 301message PySparkBatch { 302 // Required. The HCFS URI of the main Python file to use as the Spark driver. 303 // Must be a .py file. 304 string main_python_file_uri = 1 [(google.api.field_behavior) = REQUIRED]; 305 306 // Optional. The arguments to pass to the driver. Do not include arguments 307 // that can be set as batch properties, such as `--conf`, since a collision 308 // can occur that causes an incorrect batch submission. 309 repeated string args = 2 [(google.api.field_behavior) = OPTIONAL]; 310 311 // Optional. HCFS file URIs of Python files to pass to the PySpark 312 // framework. Supported file types: `.py`, `.egg`, and `.zip`. 313 repeated string python_file_uris = 3 [(google.api.field_behavior) = OPTIONAL]; 314 315 // Optional. HCFS URIs of jar files to add to the classpath of the 316 // Spark driver and tasks. 317 repeated string jar_file_uris = 4 [(google.api.field_behavior) = OPTIONAL]; 318 319 // Optional. HCFS URIs of files to be placed in the working directory of 320 // each executor. 321 repeated string file_uris = 5 [(google.api.field_behavior) = OPTIONAL]; 322 323 // Optional. HCFS URIs of archives to be extracted into the working directory 324 // of each executor. Supported file types: 325 // `.jar`, `.tar`, `.tar.gz`, `.tgz`, and `.zip`. 326 repeated string archive_uris = 6 [(google.api.field_behavior) = OPTIONAL]; 327} 328 329// A configuration for running an [Apache Spark](https://spark.apache.org/) 330// batch workload. 331message SparkBatch { 332 // The specification of the main method to call to drive the Spark 333 // workload. Specify either the jar file that contains the main class or the 334 // main class name. To pass both a main jar and a main class in that jar, add 335 // the jar to `jar_file_uris`, and then specify the main class 336 // name in `main_class`. 337 oneof driver { 338 // Optional. The HCFS URI of the jar file that contains the main class. 339 string main_jar_file_uri = 1 [(google.api.field_behavior) = OPTIONAL]; 340 341 // Optional. The name of the driver main class. The jar file that contains 342 // the class must be in the classpath or specified in `jar_file_uris`. 343 string main_class = 2 [(google.api.field_behavior) = OPTIONAL]; 344 } 345 346 // Optional. The arguments to pass to the driver. Do not include arguments 347 // that can be set as batch properties, such as `--conf`, since a collision 348 // can occur that causes an incorrect batch submission. 349 repeated string args = 3 [(google.api.field_behavior) = OPTIONAL]; 350 351 // Optional. HCFS URIs of jar files to add to the classpath of the 352 // Spark driver and tasks. 353 repeated string jar_file_uris = 4 [(google.api.field_behavior) = OPTIONAL]; 354 355 // Optional. HCFS URIs of files to be placed in the working directory of 356 // each executor. 357 repeated string file_uris = 5 [(google.api.field_behavior) = OPTIONAL]; 358 359 // Optional. HCFS URIs of archives to be extracted into the working directory 360 // of each executor. Supported file types: 361 // `.jar`, `.tar`, `.tar.gz`, `.tgz`, and `.zip`. 362 repeated string archive_uris = 6 [(google.api.field_behavior) = OPTIONAL]; 363} 364 365// A configuration for running an 366// [Apache SparkR](https://spark.apache.org/docs/latest/sparkr.html) 367// batch workload. 368message SparkRBatch { 369 // Required. The HCFS URI of the main R file to use as the driver. 370 // Must be a `.R` or `.r` file. 371 string main_r_file_uri = 1 [(google.api.field_behavior) = REQUIRED]; 372 373 // Optional. The arguments to pass to the Spark driver. Do not include 374 // arguments that can be set as batch properties, such as `--conf`, since a 375 // collision can occur that causes an incorrect batch submission. 376 repeated string args = 2 [(google.api.field_behavior) = OPTIONAL]; 377 378 // Optional. HCFS URIs of files to be placed in the working directory of 379 // each executor. 380 repeated string file_uris = 3 [(google.api.field_behavior) = OPTIONAL]; 381 382 // Optional. HCFS URIs of archives to be extracted into the working directory 383 // of each executor. Supported file types: 384 // `.jar`, `.tar`, `.tar.gz`, `.tgz`, and `.zip`. 385 repeated string archive_uris = 4 [(google.api.field_behavior) = OPTIONAL]; 386} 387 388// A configuration for running 389// [Apache Spark SQL](https://spark.apache.org/sql/) queries as a batch 390// workload. 391message SparkSqlBatch { 392 // Required. The HCFS URI of the script that contains Spark SQL queries to 393 // execute. 394 string query_file_uri = 1 [(google.api.field_behavior) = REQUIRED]; 395 396 // Optional. Mapping of query variable names to values (equivalent to the 397 // Spark SQL command: `SET name="value";`). 398 map<string, string> query_variables = 2 399 [(google.api.field_behavior) = OPTIONAL]; 400 401 // Optional. HCFS URIs of jar files to be added to the Spark CLASSPATH. 402 repeated string jar_file_uris = 3 [(google.api.field_behavior) = OPTIONAL]; 403} 404