xref: /aosp_15_r20/external/googleapis/google/cloud/dataplex/v1/processing.proto (revision d5c09012810ac0c9f33fe448fb6da8260d444cc9)
1// Copyright 2023 Google LLC
2//
3// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at
6//
7//     http://www.apache.org/licenses/LICENSE-2.0
8//
9// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS,
11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12// See the License for the specific language governing permissions and
13// limitations under the License.
14
15syntax = "proto3";
16
17package google.cloud.dataplex.v1;
18
19import "google/api/field_behavior.proto";
20import "google/api/resource.proto";
21
22option go_package = "cloud.google.com/go/dataplex/apiv1/dataplexpb;dataplexpb";
23option java_multiple_files = true;
24option java_outer_classname = "ProcessingProto";
25option java_package = "com.google.cloud.dataplex.v1";
26
27// DataScan scheduling and trigger settings.
28message Trigger {
29  // The scan runs once via `RunDataScan` API.
30  message OnDemand {}
31
32  // The scan is scheduled to run periodically.
33  message Schedule {
34    // Required. [Cron](https://en.wikipedia.org/wiki/Cron) schedule for running
35    // scans periodically.
36    //
37    // To explicitly set a timezone in the cron tab, apply a prefix in the
38    // cron tab: **"CRON_TZ=${IANA_TIME_ZONE}"** or **"TZ=${IANA_TIME_ZONE}"**.
39    // The **${IANA_TIME_ZONE}** may only be a valid string from IANA time zone
40    // database
41    // ([wikipedia](https://en.wikipedia.org/wiki/List_of_tz_database_time_zones#List)).
42    // For example, `CRON_TZ=America/New_York 1 * * * *`, or
43    // `TZ=America/New_York 1 * * * *`.
44    //
45    // This field is required for Schedule scans.
46    string cron = 1 [(google.api.field_behavior) = REQUIRED];
47  }
48
49  // DataScan scheduling and trigger settings.
50  //
51  // If not specified, the default is `onDemand`.
52  oneof mode {
53    // The scan runs once via `RunDataScan` API.
54    OnDemand on_demand = 100;
55
56    // The scan is scheduled to run periodically.
57    Schedule schedule = 101;
58  }
59}
60
61// The data source for DataScan.
62message DataSource {
63  // The source is required and immutable. Once it is set, it cannot be change
64  // to others.
65  oneof source {
66    // Immutable. The Dataplex entity that represents the data source (e.g.
67    // BigQuery table) for DataScan, of the form:
68    // `projects/{project_number}/locations/{location_id}/lakes/{lake_id}/zones/{zone_id}/entities/{entity_id}`.
69    string entity = 100 [
70      (google.api.field_behavior) = IMMUTABLE,
71      (google.api.resource_reference) = {
72        type: "dataplex.googleapis.com/Entity"
73      }
74    ];
75
76    // Immutable. The service-qualified full resource name of the cloud resource
77    // for a DataScan job to scan against. The field could be: BigQuery table of
78    // type "TABLE" for DataProfileScan/DataQualityScan Format:
79    // //bigquery.googleapis.com/projects/PROJECT_ID/datasets/DATASET_ID/tables/TABLE_ID
80    string resource = 101 [(google.api.field_behavior) = IMMUTABLE];
81  }
82}
83
84// The data scanned during processing (e.g. in incremental DataScan)
85message ScannedData {
86  // A data range denoted by a pair of start/end values of a field.
87  message IncrementalField {
88    // The field that contains values which monotonically increases over time
89    // (e.g. a timestamp column).
90    string field = 1;
91
92    // Value that marks the start of the range.
93    string start = 2;
94
95    // Value that marks the end of the range.
96    string end = 3;
97  }
98
99  // The range of scanned data
100  oneof data_range {
101    // The range denoted by values of an incremental field
102    IncrementalField incremental_field = 1;
103  }
104}
105