1// Copyright 2023 Google LLC 2// 3// Licensed under the Apache License, Version 2.0 (the "License"); 4// you may not use this file except in compliance with the License. 5// You may obtain a copy of the License at 6// 7// http://www.apache.org/licenses/LICENSE-2.0 8// 9// Unless required by applicable law or agreed to in writing, software 10// distributed under the License is distributed on an "AS IS" BASIS, 11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12// See the License for the specific language governing permissions and 13// limitations under the License. 14 15syntax = "proto3"; 16 17package google.cloud.dataplex.v1; 18 19import "google/api/field_behavior.proto"; 20import "google/api/resource.proto"; 21import "google/cloud/dataplex/v1/resources.proto"; 22import "google/protobuf/duration.proto"; 23import "google/protobuf/timestamp.proto"; 24 25option go_package = "cloud.google.com/go/dataplex/apiv1/dataplexpb;dataplexpb"; 26option java_multiple_files = true; 27option java_outer_classname = "TasksProto"; 28option java_package = "com.google.cloud.dataplex.v1"; 29 30// A task represents a user-visible job. 31message Task { 32 option (google.api.resource) = { 33 type: "dataplex.googleapis.com/Task" 34 pattern: "projects/{project}/locations/{location}/lakes/{lake}/tasks/{task}" 35 }; 36 37 // Configuration for the underlying infrastructure used to run workloads. 38 message InfrastructureSpec { 39 // Batch compute resources associated with the task. 40 message BatchComputeResources { 41 // Optional. Total number of job executors. 42 // Executor Count should be between 2 and 100. [Default=2] 43 int32 executors_count = 1 [(google.api.field_behavior) = OPTIONAL]; 44 45 // Optional. Max configurable executors. 46 // If max_executors_count > executors_count, then auto-scaling is enabled. 47 // Max Executor Count should be between 2 and 1000. [Default=1000] 48 int32 max_executors_count = 2 [(google.api.field_behavior) = OPTIONAL]; 49 } 50 51 // Container Image Runtime Configuration used with Batch execution. 52 message ContainerImageRuntime { 53 // Optional. Container image to use. 54 string image = 1 [(google.api.field_behavior) = OPTIONAL]; 55 56 // Optional. A list of Java JARS to add to the classpath. 57 // Valid input includes Cloud Storage URIs to Jar binaries. 58 // For example, gs://bucket-name/my/path/to/file.jar 59 repeated string java_jars = 2 [(google.api.field_behavior) = OPTIONAL]; 60 61 // Optional. A list of python packages to be installed. 62 // Valid formats include Cloud Storage URI to a PIP installable library. 63 // For example, gs://bucket-name/my/path/to/lib.tar.gz 64 repeated string python_packages = 3 65 [(google.api.field_behavior) = OPTIONAL]; 66 67 // Optional. Override to common configuration of open source components 68 // installed on the Dataproc cluster. The properties to set on daemon 69 // config files. Property keys are specified in `prefix:property` format, 70 // for example `core:hadoop.tmp.dir`. For more information, see [Cluster 71 // properties](https://cloud.google.com/dataproc/docs/concepts/cluster-properties). 72 map<string, string> properties = 4 73 [(google.api.field_behavior) = OPTIONAL]; 74 } 75 76 // Cloud VPC Network used to run the infrastructure. 77 message VpcNetwork { 78 // The Cloud VPC network identifier. 79 oneof network_name { 80 // Optional. The Cloud VPC network in which the job is run. By default, 81 // the Cloud VPC network named Default within the project is used. 82 string network = 1 [(google.api.field_behavior) = OPTIONAL]; 83 84 // Optional. The Cloud VPC sub-network in which the job is run. 85 string sub_network = 2 [(google.api.field_behavior) = OPTIONAL]; 86 } 87 88 // Optional. List of network tags to apply to the job. 89 repeated string network_tags = 3 [(google.api.field_behavior) = OPTIONAL]; 90 } 91 92 // Hardware config. 93 oneof resources { 94 // Compute resources needed for a Task when using Dataproc Serverless. 95 BatchComputeResources batch = 52; 96 } 97 98 // Software config. 99 oneof runtime { 100 // Container Image Runtime Configuration. 101 ContainerImageRuntime container_image = 101; 102 } 103 104 // Networking config. 105 oneof network { 106 // Vpc network. 107 VpcNetwork vpc_network = 150; 108 } 109 } 110 111 // Task scheduling and trigger settings. 112 message TriggerSpec { 113 // Determines how often and when the job will run. 114 enum Type { 115 // Unspecified trigger type. 116 TYPE_UNSPECIFIED = 0; 117 118 // The task runs one-time shortly after Task Creation. 119 ON_DEMAND = 1; 120 121 // The task is scheduled to run periodically. 122 RECURRING = 2; 123 } 124 125 // Required. Immutable. Trigger type of the user-specified Task. 126 Type type = 5 [ 127 (google.api.field_behavior) = REQUIRED, 128 (google.api.field_behavior) = IMMUTABLE 129 ]; 130 131 // Optional. The first run of the task will be after this time. 132 // If not specified, the task will run shortly after being submitted if 133 // ON_DEMAND and based on the schedule if RECURRING. 134 google.protobuf.Timestamp start_time = 6 135 [(google.api.field_behavior) = OPTIONAL]; 136 137 // Optional. Prevent the task from executing. 138 // This does not cancel already running tasks. It is intended to temporarily 139 // disable RECURRING tasks. 140 bool disabled = 4 [(google.api.field_behavior) = OPTIONAL]; 141 142 // Optional. Number of retry attempts before aborting. 143 // Set to zero to never attempt to retry a failed task. 144 int32 max_retries = 7 [(google.api.field_behavior) = OPTIONAL]; 145 146 // Trigger only applies for RECURRING tasks. 147 oneof trigger { 148 // Optional. Cron schedule (https://en.wikipedia.org/wiki/Cron) for 149 // running tasks periodically. To explicitly set a timezone to the cron 150 // tab, apply a prefix in the cron tab: "CRON_TZ=${IANA_TIME_ZONE}" or 151 // "TZ=${IANA_TIME_ZONE}". The ${IANA_TIME_ZONE} may only be a valid 152 // string from IANA time zone database. For example, 153 // `CRON_TZ=America/New_York 1 * * * *`, or `TZ=America/New_York 1 * * * 154 // *`. This field is required for RECURRING tasks. 155 string schedule = 100 [(google.api.field_behavior) = OPTIONAL]; 156 } 157 } 158 159 // Execution related settings, like retry and service_account. 160 message ExecutionSpec { 161 // Optional. The arguments to pass to the task. 162 // The args can use placeholders of the format ${placeholder} as 163 // part of key/value string. These will be interpolated before passing the 164 // args to the driver. Currently supported placeholders: 165 // - ${task_id} 166 // - ${job_time} 167 // To pass positional args, set the key as TASK_ARGS. The value should be a 168 // comma-separated string of all the positional arguments. To use a 169 // delimiter other than comma, refer to 170 // https://cloud.google.com/sdk/gcloud/reference/topic/escaping. In case of 171 // other keys being present in the args, then TASK_ARGS will be passed as 172 // the last argument. 173 map<string, string> args = 4 [(google.api.field_behavior) = OPTIONAL]; 174 175 // Required. Service account to use to execute a task. 176 // If not provided, the default Compute service account for the project is 177 // used. 178 string service_account = 5 [(google.api.field_behavior) = REQUIRED]; 179 180 // Optional. The project in which jobs are run. By default, the project 181 // containing the Lake is used. If a project is provided, the 182 // [ExecutionSpec.service_account][google.cloud.dataplex.v1.Task.ExecutionSpec.service_account] 183 // must belong to this project. 184 string project = 7 [(google.api.field_behavior) = OPTIONAL]; 185 186 // Optional. The maximum duration after which the job execution is expired. 187 google.protobuf.Duration max_job_execution_lifetime = 8 188 [(google.api.field_behavior) = OPTIONAL]; 189 190 // Optional. The Cloud KMS key to use for encryption, of the form: 191 // `projects/{project_number}/locations/{location_id}/keyRings/{key-ring-name}/cryptoKeys/{key-name}`. 192 string kms_key = 9 [(google.api.field_behavior) = OPTIONAL]; 193 } 194 195 // User-specified config for running a Spark task. 196 message SparkTaskConfig { 197 // Required. The specification of the main method to call to drive the 198 // job. Specify either the jar file that contains the main class or the 199 // main class name. 200 oneof driver { 201 // The Cloud Storage URI of the jar file that contains the main class. 202 // The execution args are passed in as a sequence of named process 203 // arguments (`--key=value`). 204 string main_jar_file_uri = 100; 205 206 // The name of the driver's main class. The jar file that contains the 207 // class must be in the default CLASSPATH or specified in 208 // `jar_file_uris`. 209 // The execution args are passed in as a sequence of named process 210 // arguments (`--key=value`). 211 string main_class = 101; 212 213 // The Gcloud Storage URI of the main Python file to use as the driver. 214 // Must be a .py file. The execution args are passed in as a sequence of 215 // named process arguments (`--key=value`). 216 string python_script_file = 102; 217 218 // A reference to a query file. This can be the Cloud Storage URI of the 219 // query file or it can the path to a SqlScript Content. The execution 220 // args are used to declare a set of script variables 221 // (`set key="value";`). 222 string sql_script_file = 104; 223 224 // The query text. 225 // The execution args are used to declare a set of script variables 226 // (`set key="value";`). 227 string sql_script = 105; 228 } 229 230 // Optional. Cloud Storage URIs of files to be placed in the working 231 // directory of each executor. 232 repeated string file_uris = 3 [(google.api.field_behavior) = OPTIONAL]; 233 234 // Optional. Cloud Storage URIs of archives to be extracted into the working 235 // directory of each executor. Supported file types: .jar, .tar, .tar.gz, 236 // .tgz, and .zip. 237 repeated string archive_uris = 4 [(google.api.field_behavior) = OPTIONAL]; 238 239 // Optional. Infrastructure specification for the execution. 240 InfrastructureSpec infrastructure_spec = 6 241 [(google.api.field_behavior) = OPTIONAL]; 242 } 243 244 // Config for running scheduled notebooks. 245 message NotebookTaskConfig { 246 // Required. Path to input notebook. This can be the Cloud Storage URI of 247 // the notebook file or the path to a Notebook Content. The execution args 248 // are accessible as environment variables 249 // (`TASK_key=value`). 250 string notebook = 4 [(google.api.field_behavior) = REQUIRED]; 251 252 // Optional. Infrastructure specification for the execution. 253 InfrastructureSpec infrastructure_spec = 3 254 [(google.api.field_behavior) = OPTIONAL]; 255 256 // Optional. Cloud Storage URIs of files to be placed in the working 257 // directory of each executor. 258 repeated string file_uris = 5 [(google.api.field_behavior) = OPTIONAL]; 259 260 // Optional. Cloud Storage URIs of archives to be extracted into the working 261 // directory of each executor. Supported file types: .jar, .tar, .tar.gz, 262 // .tgz, and .zip. 263 repeated string archive_uris = 6 [(google.api.field_behavior) = OPTIONAL]; 264 } 265 266 // Status of the task execution (e.g. Jobs). 267 message ExecutionStatus { 268 // Output only. Last update time of the status. 269 google.protobuf.Timestamp update_time = 3 270 [(google.api.field_behavior) = OUTPUT_ONLY]; 271 272 // Output only. latest job execution 273 Job latest_job = 9 [(google.api.field_behavior) = OUTPUT_ONLY]; 274 } 275 276 // Output only. The relative resource name of the task, of the form: 277 // projects/{project_number}/locations/{location_id}/lakes/{lake_id}/ 278 // tasks/{task_id}. 279 string name = 1 [ 280 (google.api.field_behavior) = OUTPUT_ONLY, 281 (google.api.resource_reference) = { type: "dataplex.googleapis.com/Task" } 282 ]; 283 284 // Output only. System generated globally unique ID for the task. This ID will 285 // be different if the task is deleted and re-created with the same name. 286 string uid = 2 [(google.api.field_behavior) = OUTPUT_ONLY]; 287 288 // Output only. The time when the task was created. 289 google.protobuf.Timestamp create_time = 3 290 [(google.api.field_behavior) = OUTPUT_ONLY]; 291 292 // Output only. The time when the task was last updated. 293 google.protobuf.Timestamp update_time = 4 294 [(google.api.field_behavior) = OUTPUT_ONLY]; 295 296 // Optional. Description of the task. 297 string description = 5 [(google.api.field_behavior) = OPTIONAL]; 298 299 // Optional. User friendly display name. 300 string display_name = 6 [(google.api.field_behavior) = OPTIONAL]; 301 302 // Output only. Current state of the task. 303 State state = 7 [(google.api.field_behavior) = OUTPUT_ONLY]; 304 305 // Optional. User-defined labels for the task. 306 map<string, string> labels = 8 [(google.api.field_behavior) = OPTIONAL]; 307 308 // Required. Spec related to how often and when a task should be triggered. 309 TriggerSpec trigger_spec = 100 [(google.api.field_behavior) = REQUIRED]; 310 311 // Required. Spec related to how a task is executed. 312 ExecutionSpec execution_spec = 101 [(google.api.field_behavior) = REQUIRED]; 313 314 // Output only. Status of the latest task executions. 315 ExecutionStatus execution_status = 201 316 [(google.api.field_behavior) = OUTPUT_ONLY]; 317 318 // Task template specific user-specified config. 319 oneof config { 320 // Config related to running custom Spark tasks. 321 SparkTaskConfig spark = 300; 322 323 // Config related to running scheduled Notebooks. 324 NotebookTaskConfig notebook = 302; 325 } 326} 327 328// A job represents an instance of a task. 329message Job { 330 option (google.api.resource) = { 331 type: "dataplex.googleapis.com/Job" 332 pattern: "projects/{project}/locations/{location}/lakes/{lake}/tasks/{task}/jobs/{job}" 333 }; 334 335 enum Service { 336 // Service used to run the job is unspecified. 337 SERVICE_UNSPECIFIED = 0; 338 339 // Dataproc service is used to run this job. 340 DATAPROC = 1; 341 } 342 343 enum State { 344 // The job state is unknown. 345 STATE_UNSPECIFIED = 0; 346 347 // The job is running. 348 RUNNING = 1; 349 350 // The job is cancelling. 351 CANCELLING = 2; 352 353 // The job cancellation was successful. 354 CANCELLED = 3; 355 356 // The job completed successfully. 357 SUCCEEDED = 4; 358 359 // The job is no longer running due to an error. 360 FAILED = 5; 361 362 // The job was cancelled outside of Dataplex. 363 ABORTED = 6; 364 } 365 366 // Job execution trigger. 367 enum Trigger { 368 // The trigger is unspecified. 369 TRIGGER_UNSPECIFIED = 0; 370 371 // The job was triggered by Dataplex based on trigger spec from task 372 // definition. 373 TASK_CONFIG = 1; 374 375 // The job was triggered by the explicit call of Task API. 376 RUN_REQUEST = 2; 377 } 378 379 // Output only. The relative resource name of the job, of the form: 380 // `projects/{project_number}/locations/{location_id}/lakes/{lake_id}/tasks/{task_id}/jobs/{job_id}`. 381 string name = 1 [ 382 (google.api.field_behavior) = OUTPUT_ONLY, 383 (google.api.resource_reference) = { type: "dataplex.googleapis.com/Job" } 384 ]; 385 386 // Output only. System generated globally unique ID for the job. 387 string uid = 2 [(google.api.field_behavior) = OUTPUT_ONLY]; 388 389 // Output only. The time when the job was started. 390 google.protobuf.Timestamp start_time = 3 391 [(google.api.field_behavior) = OUTPUT_ONLY]; 392 393 // Output only. The time when the job ended. 394 google.protobuf.Timestamp end_time = 4 395 [(google.api.field_behavior) = OUTPUT_ONLY]; 396 397 // Output only. Execution state for the job. 398 State state = 5 [(google.api.field_behavior) = OUTPUT_ONLY]; 399 400 // Output only. The number of times the job has been retried (excluding the 401 // initial attempt). 402 uint32 retry_count = 6 [(google.api.field_behavior) = OUTPUT_ONLY]; 403 404 // Output only. The underlying service running a job. 405 Service service = 7 [(google.api.field_behavior) = OUTPUT_ONLY]; 406 407 // Output only. The full resource name for the job run under a particular 408 // service. 409 string service_job = 8 [(google.api.field_behavior) = OUTPUT_ONLY]; 410 411 // Output only. Additional information about the current state. 412 string message = 9 [(google.api.field_behavior) = OUTPUT_ONLY]; 413 414 // Output only. User-defined labels for the task. 415 map<string, string> labels = 10 [(google.api.field_behavior) = OUTPUT_ONLY]; 416 417 // Output only. Job execution trigger. 418 Trigger trigger = 11 [(google.api.field_behavior) = OUTPUT_ONLY]; 419 420 // Output only. Spec related to how a task is executed. 421 Task.ExecutionSpec execution_spec = 100 422 [(google.api.field_behavior) = OUTPUT_ONLY]; 423} 424