xref: /aosp_15_r20/external/googleapis/google/cloud/dataplex/v1/tasks.proto (revision d5c09012810ac0c9f33fe448fb6da8260d444cc9)
1// Copyright 2023 Google LLC
2//
3// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at
6//
7//     http://www.apache.org/licenses/LICENSE-2.0
8//
9// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS,
11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12// See the License for the specific language governing permissions and
13// limitations under the License.
14
15syntax = "proto3";
16
17package google.cloud.dataplex.v1;
18
19import "google/api/field_behavior.proto";
20import "google/api/resource.proto";
21import "google/cloud/dataplex/v1/resources.proto";
22import "google/protobuf/duration.proto";
23import "google/protobuf/timestamp.proto";
24
25option go_package = "cloud.google.com/go/dataplex/apiv1/dataplexpb;dataplexpb";
26option java_multiple_files = true;
27option java_outer_classname = "TasksProto";
28option java_package = "com.google.cloud.dataplex.v1";
29
30// A task represents a user-visible job.
31message Task {
32  option (google.api.resource) = {
33    type: "dataplex.googleapis.com/Task"
34    pattern: "projects/{project}/locations/{location}/lakes/{lake}/tasks/{task}"
35  };
36
37  // Configuration for the underlying infrastructure used to run workloads.
38  message InfrastructureSpec {
39    // Batch compute resources associated with the task.
40    message BatchComputeResources {
41      // Optional. Total number of job executors.
42      // Executor Count should be between 2 and 100. [Default=2]
43      int32 executors_count = 1 [(google.api.field_behavior) = OPTIONAL];
44
45      // Optional. Max configurable executors.
46      // If max_executors_count > executors_count, then auto-scaling is enabled.
47      // Max Executor Count should be between 2 and 1000. [Default=1000]
48      int32 max_executors_count = 2 [(google.api.field_behavior) = OPTIONAL];
49    }
50
51    // Container Image Runtime Configuration used with Batch execution.
52    message ContainerImageRuntime {
53      // Optional. Container image to use.
54      string image = 1 [(google.api.field_behavior) = OPTIONAL];
55
56      // Optional. A list of Java JARS to add to the classpath.
57      // Valid input includes Cloud Storage URIs to Jar binaries.
58      // For example, gs://bucket-name/my/path/to/file.jar
59      repeated string java_jars = 2 [(google.api.field_behavior) = OPTIONAL];
60
61      // Optional. A list of python packages to be installed.
62      // Valid formats include Cloud Storage URI to a PIP installable library.
63      // For example, gs://bucket-name/my/path/to/lib.tar.gz
64      repeated string python_packages = 3
65          [(google.api.field_behavior) = OPTIONAL];
66
67      // Optional. Override to common configuration of open source components
68      // installed on the Dataproc cluster. The properties to set on daemon
69      // config files. Property keys are specified in `prefix:property` format,
70      // for example `core:hadoop.tmp.dir`. For more information, see [Cluster
71      // properties](https://cloud.google.com/dataproc/docs/concepts/cluster-properties).
72      map<string, string> properties = 4
73          [(google.api.field_behavior) = OPTIONAL];
74    }
75
76    // Cloud VPC Network used to run the infrastructure.
77    message VpcNetwork {
78      // The Cloud VPC network identifier.
79      oneof network_name {
80        // Optional. The Cloud VPC network in which the job is run. By default,
81        // the Cloud VPC network named Default within the project is used.
82        string network = 1 [(google.api.field_behavior) = OPTIONAL];
83
84        // Optional. The Cloud VPC sub-network in which the job is run.
85        string sub_network = 2 [(google.api.field_behavior) = OPTIONAL];
86      }
87
88      // Optional. List of network tags to apply to the job.
89      repeated string network_tags = 3 [(google.api.field_behavior) = OPTIONAL];
90    }
91
92    // Hardware config.
93    oneof resources {
94      // Compute resources needed for a Task when using Dataproc Serverless.
95      BatchComputeResources batch = 52;
96    }
97
98    // Software config.
99    oneof runtime {
100      // Container Image Runtime Configuration.
101      ContainerImageRuntime container_image = 101;
102    }
103
104    // Networking config.
105    oneof network {
106      // Vpc network.
107      VpcNetwork vpc_network = 150;
108    }
109  }
110
111  // Task scheduling and trigger settings.
112  message TriggerSpec {
113    // Determines how often and when the job will run.
114    enum Type {
115      // Unspecified trigger type.
116      TYPE_UNSPECIFIED = 0;
117
118      // The task runs one-time shortly after Task Creation.
119      ON_DEMAND = 1;
120
121      // The task is scheduled to run periodically.
122      RECURRING = 2;
123    }
124
125    // Required. Immutable. Trigger type of the user-specified Task.
126    Type type = 5 [
127      (google.api.field_behavior) = REQUIRED,
128      (google.api.field_behavior) = IMMUTABLE
129    ];
130
131    // Optional. The first run of the task will be after this time.
132    // If not specified, the task will run shortly after being submitted if
133    // ON_DEMAND and based on the schedule if RECURRING.
134    google.protobuf.Timestamp start_time = 6
135        [(google.api.field_behavior) = OPTIONAL];
136
137    // Optional. Prevent the task from executing.
138    // This does not cancel already running tasks. It is intended to temporarily
139    // disable RECURRING tasks.
140    bool disabled = 4 [(google.api.field_behavior) = OPTIONAL];
141
142    // Optional. Number of retry attempts before aborting.
143    // Set to zero to never attempt to retry a failed task.
144    int32 max_retries = 7 [(google.api.field_behavior) = OPTIONAL];
145
146    // Trigger only applies for RECURRING tasks.
147    oneof trigger {
148      // Optional. Cron schedule (https://en.wikipedia.org/wiki/Cron) for
149      // running tasks periodically. To explicitly set a timezone to the cron
150      // tab, apply a prefix in the cron tab: "CRON_TZ=${IANA_TIME_ZONE}" or
151      // "TZ=${IANA_TIME_ZONE}". The ${IANA_TIME_ZONE} may only be a valid
152      // string from IANA time zone database. For example,
153      // `CRON_TZ=America/New_York 1 * * * *`, or `TZ=America/New_York 1 * * *
154      // *`. This field is required for RECURRING tasks.
155      string schedule = 100 [(google.api.field_behavior) = OPTIONAL];
156    }
157  }
158
159  // Execution related settings, like retry and service_account.
160  message ExecutionSpec {
161    // Optional. The arguments to pass to the task.
162    // The args can use placeholders of the format ${placeholder} as
163    // part of key/value string. These will be interpolated before passing the
164    // args to the driver. Currently supported placeholders:
165    // - ${task_id}
166    // - ${job_time}
167    // To pass positional args, set the key as TASK_ARGS. The value should be a
168    // comma-separated string of all the positional arguments. To use a
169    // delimiter other than comma, refer to
170    // https://cloud.google.com/sdk/gcloud/reference/topic/escaping. In case of
171    // other keys being present in the args, then TASK_ARGS will be passed as
172    // the last argument.
173    map<string, string> args = 4 [(google.api.field_behavior) = OPTIONAL];
174
175    // Required. Service account to use to execute a task.
176    // If not provided, the default Compute service account for the project is
177    // used.
178    string service_account = 5 [(google.api.field_behavior) = REQUIRED];
179
180    // Optional. The project in which jobs are run. By default, the project
181    // containing the Lake is used. If a project is provided, the
182    // [ExecutionSpec.service_account][google.cloud.dataplex.v1.Task.ExecutionSpec.service_account]
183    // must belong to this project.
184    string project = 7 [(google.api.field_behavior) = OPTIONAL];
185
186    // Optional. The maximum duration after which the job execution is expired.
187    google.protobuf.Duration max_job_execution_lifetime = 8
188        [(google.api.field_behavior) = OPTIONAL];
189
190    // Optional. The Cloud KMS key to use for encryption, of the form:
191    // `projects/{project_number}/locations/{location_id}/keyRings/{key-ring-name}/cryptoKeys/{key-name}`.
192    string kms_key = 9 [(google.api.field_behavior) = OPTIONAL];
193  }
194
195  // User-specified config for running a Spark task.
196  message SparkTaskConfig {
197    // Required. The specification of the main method to call to drive the
198    // job. Specify either the jar file that contains the main class or the
199    // main class name.
200    oneof driver {
201      // The Cloud Storage URI of the jar file that contains the main class.
202      // The execution args are passed in as a sequence of named process
203      // arguments (`--key=value`).
204      string main_jar_file_uri = 100;
205
206      // The name of the driver's main class. The jar file that contains the
207      // class must be in the default CLASSPATH or specified in
208      // `jar_file_uris`.
209      // The execution args are passed in as a sequence of named process
210      // arguments (`--key=value`).
211      string main_class = 101;
212
213      // The Gcloud Storage URI of the main Python file to use as the driver.
214      // Must be a .py file. The execution args are passed in as a sequence of
215      // named process arguments (`--key=value`).
216      string python_script_file = 102;
217
218      // A reference to a query file. This can be the Cloud Storage URI of the
219      // query file or it can the path to a SqlScript Content. The execution
220      // args are used to declare a set of script variables
221      // (`set key="value";`).
222      string sql_script_file = 104;
223
224      // The query text.
225      // The execution args are used to declare a set of script variables
226      // (`set key="value";`).
227      string sql_script = 105;
228    }
229
230    // Optional. Cloud Storage URIs of files to be placed in the working
231    // directory of each executor.
232    repeated string file_uris = 3 [(google.api.field_behavior) = OPTIONAL];
233
234    // Optional. Cloud Storage URIs of archives to be extracted into the working
235    // directory of each executor. Supported file types: .jar, .tar, .tar.gz,
236    // .tgz, and .zip.
237    repeated string archive_uris = 4 [(google.api.field_behavior) = OPTIONAL];
238
239    // Optional. Infrastructure specification for the execution.
240    InfrastructureSpec infrastructure_spec = 6
241        [(google.api.field_behavior) = OPTIONAL];
242  }
243
244  // Config for running scheduled notebooks.
245  message NotebookTaskConfig {
246    // Required. Path to input notebook. This can be the Cloud Storage URI of
247    // the notebook file or the path to a Notebook Content. The execution args
248    // are accessible as environment variables
249    // (`TASK_key=value`).
250    string notebook = 4 [(google.api.field_behavior) = REQUIRED];
251
252    // Optional. Infrastructure specification for the execution.
253    InfrastructureSpec infrastructure_spec = 3
254        [(google.api.field_behavior) = OPTIONAL];
255
256    // Optional. Cloud Storage URIs of files to be placed in the working
257    // directory of each executor.
258    repeated string file_uris = 5 [(google.api.field_behavior) = OPTIONAL];
259
260    // Optional. Cloud Storage URIs of archives to be extracted into the working
261    // directory of each executor. Supported file types: .jar, .tar, .tar.gz,
262    // .tgz, and .zip.
263    repeated string archive_uris = 6 [(google.api.field_behavior) = OPTIONAL];
264  }
265
266  // Status of the task execution (e.g. Jobs).
267  message ExecutionStatus {
268    // Output only. Last update time of the status.
269    google.protobuf.Timestamp update_time = 3
270        [(google.api.field_behavior) = OUTPUT_ONLY];
271
272    // Output only. latest job execution
273    Job latest_job = 9 [(google.api.field_behavior) = OUTPUT_ONLY];
274  }
275
276  // Output only. The relative resource name of the task, of the form:
277  // projects/{project_number}/locations/{location_id}/lakes/{lake_id}/
278  // tasks/{task_id}.
279  string name = 1 [
280    (google.api.field_behavior) = OUTPUT_ONLY,
281    (google.api.resource_reference) = { type: "dataplex.googleapis.com/Task" }
282  ];
283
284  // Output only. System generated globally unique ID for the task. This ID will
285  // be different if the task is deleted and re-created with the same name.
286  string uid = 2 [(google.api.field_behavior) = OUTPUT_ONLY];
287
288  // Output only. The time when the task was created.
289  google.protobuf.Timestamp create_time = 3
290      [(google.api.field_behavior) = OUTPUT_ONLY];
291
292  // Output only. The time when the task was last updated.
293  google.protobuf.Timestamp update_time = 4
294      [(google.api.field_behavior) = OUTPUT_ONLY];
295
296  // Optional. Description of the task.
297  string description = 5 [(google.api.field_behavior) = OPTIONAL];
298
299  // Optional. User friendly display name.
300  string display_name = 6 [(google.api.field_behavior) = OPTIONAL];
301
302  // Output only. Current state of the task.
303  State state = 7 [(google.api.field_behavior) = OUTPUT_ONLY];
304
305  // Optional. User-defined labels for the task.
306  map<string, string> labels = 8 [(google.api.field_behavior) = OPTIONAL];
307
308  // Required. Spec related to how often and when a task should be triggered.
309  TriggerSpec trigger_spec = 100 [(google.api.field_behavior) = REQUIRED];
310
311  // Required. Spec related to how a task is executed.
312  ExecutionSpec execution_spec = 101 [(google.api.field_behavior) = REQUIRED];
313
314  // Output only. Status of the latest task executions.
315  ExecutionStatus execution_status = 201
316      [(google.api.field_behavior) = OUTPUT_ONLY];
317
318  // Task template specific user-specified config.
319  oneof config {
320    // Config related to running custom Spark tasks.
321    SparkTaskConfig spark = 300;
322
323    // Config related to running scheduled Notebooks.
324    NotebookTaskConfig notebook = 302;
325  }
326}
327
328// A job represents an instance of a task.
329message Job {
330  option (google.api.resource) = {
331    type: "dataplex.googleapis.com/Job"
332    pattern: "projects/{project}/locations/{location}/lakes/{lake}/tasks/{task}/jobs/{job}"
333  };
334
335  enum Service {
336    // Service used to run the job is unspecified.
337    SERVICE_UNSPECIFIED = 0;
338
339    // Dataproc service is used to run this job.
340    DATAPROC = 1;
341  }
342
343  enum State {
344    // The job state is unknown.
345    STATE_UNSPECIFIED = 0;
346
347    // The job is running.
348    RUNNING = 1;
349
350    // The job is cancelling.
351    CANCELLING = 2;
352
353    // The job cancellation was successful.
354    CANCELLED = 3;
355
356    // The job completed successfully.
357    SUCCEEDED = 4;
358
359    // The job is no longer running due to an error.
360    FAILED = 5;
361
362    // The job was cancelled outside of Dataplex.
363    ABORTED = 6;
364  }
365
366  // Job execution trigger.
367  enum Trigger {
368    // The trigger is unspecified.
369    TRIGGER_UNSPECIFIED = 0;
370
371    // The job was triggered by Dataplex based on trigger spec from task
372    // definition.
373    TASK_CONFIG = 1;
374
375    // The job was triggered by the explicit call of Task API.
376    RUN_REQUEST = 2;
377  }
378
379  // Output only. The relative resource name of the job, of the form:
380  // `projects/{project_number}/locations/{location_id}/lakes/{lake_id}/tasks/{task_id}/jobs/{job_id}`.
381  string name = 1 [
382    (google.api.field_behavior) = OUTPUT_ONLY,
383    (google.api.resource_reference) = { type: "dataplex.googleapis.com/Job" }
384  ];
385
386  // Output only. System generated globally unique ID for the job.
387  string uid = 2 [(google.api.field_behavior) = OUTPUT_ONLY];
388
389  // Output only. The time when the job was started.
390  google.protobuf.Timestamp start_time = 3
391      [(google.api.field_behavior) = OUTPUT_ONLY];
392
393  // Output only. The time when the job ended.
394  google.protobuf.Timestamp end_time = 4
395      [(google.api.field_behavior) = OUTPUT_ONLY];
396
397  // Output only. Execution state for the job.
398  State state = 5 [(google.api.field_behavior) = OUTPUT_ONLY];
399
400  // Output only. The number of times the job has been retried (excluding the
401  // initial attempt).
402  uint32 retry_count = 6 [(google.api.field_behavior) = OUTPUT_ONLY];
403
404  // Output only. The underlying service running a job.
405  Service service = 7 [(google.api.field_behavior) = OUTPUT_ONLY];
406
407  // Output only. The full resource name for the job run under a particular
408  // service.
409  string service_job = 8 [(google.api.field_behavior) = OUTPUT_ONLY];
410
411  // Output only. Additional information about the current state.
412  string message = 9 [(google.api.field_behavior) = OUTPUT_ONLY];
413
414  // Output only. User-defined labels for the task.
415  map<string, string> labels = 10 [(google.api.field_behavior) = OUTPUT_ONLY];
416
417  // Output only. Job execution trigger.
418  Trigger trigger = 11 [(google.api.field_behavior) = OUTPUT_ONLY];
419
420  // Output only. Spec related to how a task is executed.
421  Task.ExecutionSpec execution_spec = 100
422      [(google.api.field_behavior) = OUTPUT_ONLY];
423}
424