1// Copyright 2022 Google LLC
2//
3// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at
6//
7//     http://www.apache.org/licenses/LICENSE-2.0
8//
9// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS,
11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12// See the License for the specific language governing permissions and
13// limitations under the License.
14
15syntax = "proto3";
16
17package google.privacy.dlp.v2;
18
19import "google/api/resource.proto";
20import "google/protobuf/timestamp.proto";
21
22option csharp_namespace = "Google.Cloud.Dlp.V2";
23option go_package = "cloud.google.com/go/dlp/apiv2/dlppb;dlppb";
24option java_multiple_files = true;
25option java_outer_classname = "DlpStorage";
26option java_package = "com.google.privacy.dlp.v2";
27option php_namespace = "Google\\Cloud\\Dlp\\V2";
28option ruby_package = "Google::Cloud::Dlp::V2";
29
30// Type of information detected by the API.
31message InfoType {
32  // Name of the information type. Either a name of your choosing when
33  // creating a CustomInfoType, or one of the names listed
34  // at https://cloud.google.com/dlp/docs/infotypes-reference when specifying
35  // a built-in type.  When sending Cloud DLP results to Data Catalog, infoType
36  // names should conform to the pattern `[A-Za-z0-9$_-]{1,64}`.
37  string name = 1;
38
39  // Optional version name for this InfoType.
40  string version = 2;
41}
42
43// Score is a summary of all elements in the data profile.
44// A higher number means more sensitive.
45message SensitivityScore {
46  // Various score levels for resources.
47  enum SensitivityScoreLevel {
48    // Unused.
49    SENSITIVITY_SCORE_UNSPECIFIED = 0;
50
51    // No sensitive information detected. Limited access.
52    SENSITIVITY_LOW = 10;
53
54    // Medium risk - PII, potentially sensitive data, or fields with free-text
55    // data that are at higher risk of having intermittent sensitive data.
56    // Consider limiting access.
57    SENSITIVITY_MODERATE = 20;
58
59    // High risk – SPII may be present. Exfiltration of data may lead to user
60    // data loss. Re-identification of users may be possible. Consider limiting
61    // usage and or removing SPII.
62    SENSITIVITY_HIGH = 30;
63  }
64
65  // The score applied to the resource.
66  SensitivityScoreLevel score = 1;
67}
68
69// Categorization of results based on how likely they are to represent a match,
70// based on the number of elements they contain which imply a match.
71enum Likelihood {
72  // Default value; same as POSSIBLE.
73  LIKELIHOOD_UNSPECIFIED = 0;
74
75  // Few matching elements.
76  VERY_UNLIKELY = 1;
77
78  UNLIKELY = 2;
79
80  // Some matching elements.
81  POSSIBLE = 3;
82
83  LIKELY = 4;
84
85  // Many matching elements.
86  VERY_LIKELY = 5;
87}
88
89// A reference to a StoredInfoType to use with scanning.
90message StoredType {
91  // Resource name of the requested `StoredInfoType`, for example
92  // `organizations/433245324/storedInfoTypes/432452342` or
93  // `projects/project-id/storedInfoTypes/432452342`.
94  string name = 1;
95
96  // Timestamp indicating when the version of the `StoredInfoType` used for
97  // inspection was created. Output-only field, populated by the system.
98  google.protobuf.Timestamp create_time = 2;
99}
100
101// Custom information type provided by the user. Used to find domain-specific
102// sensitive information configurable to the data in question.
103message CustomInfoType {
104  // Custom information type based on a dictionary of words or phrases. This can
105  // be used to match sensitive information specific to the data, such as a list
106  // of employee IDs or job titles.
107  //
108  // Dictionary words are case-insensitive and all characters other than letters
109  // and digits in the unicode [Basic Multilingual
110  // Plane](https://en.wikipedia.org/wiki/Plane_%28Unicode%29#Basic_Multilingual_Plane)
111  // will be replaced with whitespace when scanning for matches, so the
112  // dictionary phrase "Sam Johnson" will match all three phrases "sam johnson",
113  // "Sam, Johnson", and "Sam (Johnson)". Additionally, the characters
114  // surrounding any match must be of a different type than the adjacent
115  // characters within the word, so letters must be next to non-letters and
116  // digits next to non-digits. For example, the dictionary word "jen" will
117  // match the first three letters of the text "jen123" but will return no
118  // matches for "jennifer".
119  //
120  // Dictionary words containing a large number of characters that are not
121  // letters or digits may result in unexpected findings because such characters
122  // are treated as whitespace. The
123  // [limits](https://cloud.google.com/dlp/limits) page contains details about
124  // the size limits of dictionaries. For dictionaries that do not fit within
125  // these constraints, consider using `LargeCustomDictionaryConfig` in the
126  // `StoredInfoType` API.
127  message Dictionary {
128    // Message defining a list of words or phrases to search for in the data.
129    message WordList {
130      // Words or phrases defining the dictionary. The dictionary must contain
131      // at least one phrase and every phrase must contain at least 2 characters
132      // that are letters or digits. [required]
133      repeated string words = 1;
134    }
135
136    oneof source {
137      // List of words or phrases to search for.
138      WordList word_list = 1;
139
140      // Newline-delimited file of words in Cloud Storage. Only a single file
141      // is accepted.
142      CloudStoragePath cloud_storage_path = 3;
143    }
144  }
145
146  // Message defining a custom regular expression.
147  message Regex {
148    // Pattern defining the regular expression. Its syntax
149    // (https://github.com/google/re2/wiki/Syntax) can be found under the
150    // google/re2 repository on GitHub.
151    string pattern = 1;
152
153    // The index of the submatch to extract as findings. When not
154    // specified, the entire match is returned. No more than 3 may be included.
155    repeated int32 group_indexes = 2;
156  }
157
158  // Message for detecting output from deidentification transformations
159  // such as
160  // [`CryptoReplaceFfxFpeConfig`](https://cloud.google.com/dlp/docs/reference/rest/v2/organizations.deidentifyTemplates#cryptoreplaceffxfpeconfig).
161  // These types of transformations are
162  // those that perform pseudonymization, thereby producing a "surrogate" as
163  // output. This should be used in conjunction with a field on the
164  // transformation such as `surrogate_info_type`. This CustomInfoType does
165  // not support the use of `detection_rules`.
166  message SurrogateType {
167
168  }
169
170  // Deprecated; use `InspectionRuleSet` instead. Rule for modifying a
171  // `CustomInfoType` to alter behavior under certain circumstances, depending
172  // on the specific details of the rule. Not supported for the `surrogate_type`
173  // custom infoType.
174  message DetectionRule {
175    // Message for specifying a window around a finding to apply a detection
176    // rule.
177    message Proximity {
178      // Number of characters before the finding to consider. For tabular data,
179      // if you want to modify the likelihood of an entire column of findngs,
180      // set this to 1. For more information, see
181      // [Hotword example: Set the match likelihood of a table column]
182      // (https://cloud.google.com/dlp/docs/creating-custom-infotypes-likelihood#match-column-values).
183      int32 window_before = 1;
184
185      // Number of characters after the finding to consider.
186      int32 window_after = 2;
187    }
188
189    // Message for specifying an adjustment to the likelihood of a finding as
190    // part of a detection rule.
191    message LikelihoodAdjustment {
192      oneof adjustment {
193        // Set the likelihood of a finding to a fixed value.
194        Likelihood fixed_likelihood = 1;
195
196        // Increase or decrease the likelihood by the specified number of
197        // levels. For example, if a finding would be `POSSIBLE` without the
198        // detection rule and `relative_likelihood` is 1, then it is upgraded to
199        // `LIKELY`, while a value of -1 would downgrade it to `UNLIKELY`.
200        // Likelihood may never drop below `VERY_UNLIKELY` or exceed
201        // `VERY_LIKELY`, so applying an adjustment of 1 followed by an
202        // adjustment of -1 when base likelihood is `VERY_LIKELY` will result in
203        // a final likelihood of `LIKELY`.
204        int32 relative_likelihood = 2;
205      }
206    }
207
208    // The rule that adjusts the likelihood of findings within a certain
209    // proximity of hotwords.
210    message HotwordRule {
211      // Regular expression pattern defining what qualifies as a hotword.
212      Regex hotword_regex = 1;
213
214      // Range of characters within which the entire hotword must reside.
215      // The total length of the window cannot exceed 1000 characters.
216      // The finding itself will be included in the window, so that hotwords can
217      // be used to match substrings of the finding itself. Suppose you
218      // want Cloud DLP to promote the likelihood of the phone number
219      // regex "\(\d{3}\) \d{3}-\d{4}" if the area code is known to be the
220      // area code of a company's office. In this case, use the hotword regex
221      // "\(xxx\)", where "xxx" is the area code in question.
222      //
223      // For tabular data, if you want to modify the likelihood of an entire
224      // column of findngs, see
225      // [Hotword example: Set the match likelihood of a table column]
226      // (https://cloud.google.com/dlp/docs/creating-custom-infotypes-likelihood#match-column-values).
227      Proximity proximity = 2;
228
229      // Likelihood adjustment to apply to all matching findings.
230      LikelihoodAdjustment likelihood_adjustment = 3;
231    }
232
233    oneof type {
234      // Hotword-based detection rule.
235      HotwordRule hotword_rule = 1;
236    }
237  }
238
239  enum ExclusionType {
240    // A finding of this custom info type will not be excluded from results.
241    EXCLUSION_TYPE_UNSPECIFIED = 0;
242
243    // A finding of this custom info type will be excluded from final results,
244    // but can still affect rule execution.
245    EXCLUSION_TYPE_EXCLUDE = 1;
246  }
247
248  // CustomInfoType can either be a new infoType, or an extension of built-in
249  // infoType, when the name matches one of existing infoTypes and that infoType
250  // is specified in `InspectContent.info_types` field. Specifying the latter
251  // adds findings to the one detected by the system. If built-in info type is
252  // not specified in `InspectContent.info_types` list then the name is treated
253  // as a custom info type.
254  InfoType info_type = 1;
255
256  // Likelihood to return for this CustomInfoType. This base value can be
257  // altered by a detection rule if the finding meets the criteria specified by
258  // the rule. Defaults to `VERY_LIKELY` if not specified.
259  Likelihood likelihood = 6;
260
261  oneof type {
262    // A list of phrases to detect as a CustomInfoType.
263    Dictionary dictionary = 2;
264
265    // Regular expression based CustomInfoType.
266    Regex regex = 3;
267
268    // Message for detecting output from deidentification transformations that
269    // support reversing.
270    SurrogateType surrogate_type = 4;
271
272    // Load an existing `StoredInfoType` resource for use in
273    // `InspectDataSource`. Not currently supported in `InspectContent`.
274    StoredType stored_type = 5;
275  }
276
277  // Set of detection rules to apply to all findings of this CustomInfoType.
278  // Rules are applied in order that they are specified. Not supported for the
279  // `surrogate_type` CustomInfoType.
280  repeated DetectionRule detection_rules = 7;
281
282  // If set to EXCLUSION_TYPE_EXCLUDE this infoType will not cause a finding
283  // to be returned. It still can be used for rules matching.
284  ExclusionType exclusion_type = 8;
285}
286
287// General identifier of a data field in a storage service.
288message FieldId {
289  // Name describing the field.
290  string name = 1;
291}
292
293// Datastore partition ID.
294// A partition ID identifies a grouping of entities. The grouping is always
295// by project and namespace, however the namespace ID may be empty.
296//
297// A partition ID contains several dimensions:
298// project ID and namespace ID.
299message PartitionId {
300  // The ID of the project to which the entities belong.
301  string project_id = 2;
302
303  // If not empty, the ID of the namespace to which the entities belong.
304  string namespace_id = 4;
305}
306
307// A representation of a Datastore kind.
308message KindExpression {
309  // The name of the kind.
310  string name = 1;
311}
312
313// Options defining a data set within Google Cloud Datastore.
314message DatastoreOptions {
315  // A partition ID identifies a grouping of entities. The grouping is always
316  // by project and namespace, however the namespace ID may be empty.
317  PartitionId partition_id = 1;
318
319  // The kind to process.
320  KindExpression kind = 2;
321}
322
323// Definitions of file type groups to scan. New types will be added to this
324// list.
325enum FileType {
326  // Includes all files.
327  FILE_TYPE_UNSPECIFIED = 0;
328
329  // Includes all file extensions not covered by another entry. Binary
330  // scanning attempts to convert the content of the file to utf_8 to scan
331  // the file.
332  // If you wish to avoid this fall back, specify one or more of the other
333  // FileType's in your storage scan.
334  BINARY_FILE = 1;
335
336  // Included file extensions:
337  //   asc,asp, aspx, brf, c, cc,cfm, cgi, cpp, csv, cxx, c++, cs, css, dart,
338  //   dat, dot, eml,, epbub, ged, go, h, hh, hpp, hxx, h++, hs, html, htm,
339  //   mkd, markdown, m, ml, mli, perl, pl, plist, pm, php, phtml, pht,
340  //   properties, py, pyw, rb, rbw, rs, rss,  rc, scala, sh, sql, swift, tex,
341  //   shtml, shtm, xhtml, lhs, ics, ini, java, js, json, kix, kml, ocaml, md,
342  //   txt, text, tsv, vb, vcard, vcs, wml, xcodeproj, xml, xsl, xsd, yml, yaml.
343  TEXT_FILE = 2;
344
345  // Included file extensions:
346  //   bmp, gif, jpg, jpeg, jpe, png.
347  // bytes_limit_per_file has no effect on image files.
348  // Image inspection is restricted to 'global', 'us', 'asia', and 'europe'.
349  IMAGE = 3;
350
351  // Word files >30 MB will be scanned as binary files.
352  // Included file extensions:
353  //   docx, dotx, docm, dotm
354  WORD = 5;
355
356  // PDF files >30 MB will be scanned as binary files.
357  // Included file extensions:
358  //   pdf
359  PDF = 6;
360
361  // Included file extensions:
362  //   avro
363  AVRO = 7;
364
365  // Included file extensions:
366  //   csv
367  CSV = 8;
368
369  // Included file extensions:
370  //   tsv
371  TSV = 9;
372
373  // Powerpoint files >30 MB will be scanned as binary files.
374  // Included file extensions:
375  //   pptx, pptm, potx, potm, pot
376  POWERPOINT = 11;
377
378  // Excel files >30 MB will be scanned as binary files.
379  // Included file extensions:
380  //   xlsx, xlsm, xltx, xltm
381  EXCEL = 12;
382}
383
384// Message representing a set of files in a Cloud Storage bucket. Regular
385// expressions are used to allow fine-grained control over which files in the
386// bucket to include.
387//
388// Included files are those that match at least one item in `include_regex` and
389// do not match any items in `exclude_regex`. Note that a file that matches
390// items from both lists will _not_ be included. For a match to occur, the
391// entire file path (i.e., everything in the url after the bucket name) must
392// match the regular expression.
393//
394// For example, given the input `{bucket_name: "mybucket", include_regex:
395// ["directory1/.*"], exclude_regex:
396// ["directory1/excluded.*"]}`:
397//
398// * `gs://mybucket/directory1/myfile` will be included
399// * `gs://mybucket/directory1/directory2/myfile` will be included (`.*` matches
400// across `/`)
401// * `gs://mybucket/directory0/directory1/myfile` will _not_ be included (the
402// full path doesn't match any items in `include_regex`)
403// * `gs://mybucket/directory1/excludedfile` will _not_ be included (the path
404// matches an item in `exclude_regex`)
405//
406// If `include_regex` is left empty, it will match all files by default
407// (this is equivalent to setting `include_regex: [".*"]`).
408//
409// Some other common use cases:
410//
411// * `{bucket_name: "mybucket", exclude_regex: [".*\.pdf"]}` will include all
412// files in `mybucket` except for .pdf files
413// * `{bucket_name: "mybucket", include_regex: ["directory/[^/]+"]}` will
414// include all files directly under `gs://mybucket/directory/`, without matching
415// across `/`
416message CloudStorageRegexFileSet {
417  // The name of a Cloud Storage bucket. Required.
418  string bucket_name = 1;
419
420  // A list of regular expressions matching file paths to include. All files in
421  // the bucket that match at least one of these regular expressions will be
422  // included in the set of files, except for those that also match an item in
423  // `exclude_regex`. Leaving this field empty will match all files by default
424  // (this is equivalent to including `.*` in the list).
425  //
426  // Regular expressions use RE2
427  // [syntax](https://github.com/google/re2/wiki/Syntax); a guide can be found
428  // under the google/re2 repository on GitHub.
429  repeated string include_regex = 2;
430
431  // A list of regular expressions matching file paths to exclude. All files in
432  // the bucket that match at least one of these regular expressions will be
433  // excluded from the scan.
434  //
435  // Regular expressions use RE2
436  // [syntax](https://github.com/google/re2/wiki/Syntax); a guide can be found
437  // under the google/re2 repository on GitHub.
438  repeated string exclude_regex = 3;
439}
440
441// Options defining a file or a set of files within a Cloud Storage
442// bucket.
443message CloudStorageOptions {
444  // Set of files to scan.
445  message FileSet {
446    // The Cloud Storage url of the file(s) to scan, in the format
447    // `gs://<bucket>/<path>`. Trailing wildcard in the path is allowed.
448    //
449    // If the url ends in a trailing slash, the bucket or directory represented
450    // by the url will be scanned non-recursively (content in sub-directories
451    // will not be scanned). This means that `gs://mybucket/` is equivalent to
452    // `gs://mybucket/*`, and `gs://mybucket/directory/` is equivalent to
453    // `gs://mybucket/directory/*`.
454    //
455    // Exactly one of `url` or `regex_file_set` must be set.
456    string url = 1;
457
458    // The regex-filtered set of files to scan. Exactly one of `url` or
459    // `regex_file_set` must be set.
460    CloudStorageRegexFileSet regex_file_set = 2;
461  }
462
463  // How to sample bytes if not all bytes are scanned. Meaningful only when used
464  // in conjunction with bytes_limit_per_file. If not specified, scanning would
465  // start from the top.
466  enum SampleMethod {
467    SAMPLE_METHOD_UNSPECIFIED = 0;
468
469    // Scan from the top (default).
470    TOP = 1;
471
472    // For each file larger than bytes_limit_per_file, randomly pick the offset
473    // to start scanning. The scanned bytes are contiguous.
474    RANDOM_START = 2;
475  }
476
477  // The set of one or more files to scan.
478  FileSet file_set = 1;
479
480  // Max number of bytes to scan from a file. If a scanned file's size is bigger
481  // than this value then the rest of the bytes are omitted. Only one
482  // of bytes_limit_per_file and bytes_limit_per_file_percent can be specified.
483  // Cannot be set if de-identification is requested.
484  int64 bytes_limit_per_file = 4;
485
486  // Max percentage of bytes to scan from a file. The rest are omitted. The
487  // number of bytes scanned is rounded down. Must be between 0 and 100,
488  // inclusively. Both 0 and 100 means no limit. Defaults to 0. Only one
489  // of bytes_limit_per_file and bytes_limit_per_file_percent can be specified.
490  // Cannot be set if de-identification is requested.
491  int32 bytes_limit_per_file_percent = 8;
492
493  // List of file type groups to include in the scan.
494  // If empty, all files are scanned and available data format processors
495  // are applied. In addition, the binary content of the selected files
496  // is always scanned as well.
497  // Images are scanned only as binary if the specified region
498  // does not support image inspection and no file_types were specified.
499  // Image inspection is restricted to 'global', 'us', 'asia', and 'europe'.
500  repeated FileType file_types = 5;
501
502  SampleMethod sample_method = 6;
503
504  // Limits the number of files to scan to this percentage of the input FileSet.
505  // Number of files scanned is rounded down. Must be between 0 and 100,
506  // inclusively. Both 0 and 100 means no limit. Defaults to 0.
507  int32 files_limit_percent = 7;
508}
509
510// Message representing a set of files in Cloud Storage.
511message CloudStorageFileSet {
512  // The url, in the format `gs://<bucket>/<path>`. Trailing wildcard in the
513  // path is allowed.
514  string url = 1;
515}
516
517// Message representing a single file or path in Cloud Storage.
518message CloudStoragePath {
519  // A url representing a file or path (no wildcards) in Cloud Storage.
520  // Example: gs://[BUCKET_NAME]/dictionary.txt
521  string path = 1;
522}
523
524// Options defining BigQuery table and row identifiers.
525message BigQueryOptions {
526  // How to sample rows if not all rows are scanned. Meaningful only when used
527  // in conjunction with either rows_limit or rows_limit_percent. If not
528  // specified, rows are scanned in the order BigQuery reads them.
529  enum SampleMethod {
530    SAMPLE_METHOD_UNSPECIFIED = 0;
531
532    // Scan groups of rows in the order BigQuery provides (default). Multiple
533    // groups of rows may be scanned in parallel, so results may not appear in
534    // the same order the rows are read.
535    TOP = 1;
536
537    // Randomly pick groups of rows to scan.
538    RANDOM_START = 2;
539  }
540
541  // Complete BigQuery table reference.
542  BigQueryTable table_reference = 1;
543
544  // Table fields that may uniquely identify a row within the table. When
545  // `actions.saveFindings.outputConfig.table` is specified, the values of
546  // columns specified here are available in the output table under
547  // `location.content_locations.record_location.record_key.id_values`. Nested
548  // fields such as `person.birthdate.year` are allowed.
549  repeated FieldId identifying_fields = 2;
550
551  // Max number of rows to scan. If the table has more rows than this value, the
552  // rest of the rows are omitted. If not set, or if set to 0, all rows will be
553  // scanned. Only one of rows_limit and rows_limit_percent can be specified.
554  // Cannot be used in conjunction with TimespanConfig.
555  int64 rows_limit = 3;
556
557  // Max percentage of rows to scan. The rest are omitted. The number of rows
558  // scanned is rounded down. Must be between 0 and 100, inclusively. Both 0 and
559  // 100 means no limit. Defaults to 0. Only one of rows_limit and
560  // rows_limit_percent can be specified. Cannot be used in conjunction with
561  // TimespanConfig.
562  int32 rows_limit_percent = 6;
563
564  SampleMethod sample_method = 4;
565
566  // References to fields excluded from scanning. This allows you to skip
567  // inspection of entire columns which you know have no findings.
568  repeated FieldId excluded_fields = 5;
569
570  // Limit scanning only to these fields.
571  repeated FieldId included_fields = 7;
572}
573
574// Shared message indicating Cloud storage type.
575message StorageConfig {
576  // Configuration of the timespan of the items to include in scanning.
577  // Currently only supported when inspecting Cloud Storage and BigQuery.
578  message TimespanConfig {
579    // Exclude files, tables, or rows older than this value.
580    // If not set, no lower time limit is applied.
581    google.protobuf.Timestamp start_time = 1;
582
583    // Exclude files, tables, or rows newer than this value.
584    // If not set, no upper time limit is applied.
585    google.protobuf.Timestamp end_time = 2;
586
587    // Specification of the field containing the timestamp of scanned items.
588    // Used for data sources like Datastore and BigQuery.
589    //
590    // <b>For BigQuery</b>
591    //
592    // If this value is not specified and the table was modified between the
593    // given start and end times, the entire table will be scanned. If this
594    // value is specified, then rows are filtered based on the given start and
595    // end times. Rows with a `NULL` value in the provided BigQuery column are
596    // skipped.
597    // Valid data types of the provided BigQuery column are: `INTEGER`, `DATE`,
598    // `TIMESTAMP`, and `DATETIME`.
599    //
600    // If your BigQuery table is [partitioned at ingestion
601    // time](https://cloud.google.com/bigquery/docs/partitioned-tables#ingestion_time),
602    // you can use any of the following pseudo-columns as your timestamp field.
603    // When used with Cloud DLP, these pseudo-column names are case sensitive.
604    //
605    // <ul>
606    // <li><code>_PARTITIONTIME</code></li>
607    // <li><code>_PARTITIONDATE</code></li>
608    // <li><code>_PARTITION_LOAD_TIME</code></li>
609    // </ul>
610    //
611    // <b>For Datastore</b>
612    //
613    // If this value is specified, then entities are filtered based on the given
614    // start and end times. If an entity does not contain the provided timestamp
615    // property or contains empty or invalid values, then it is included.
616    // Valid data types of the provided timestamp property are: `TIMESTAMP`.
617    //
618    // See the
619    // [known issue](https://cloud.google.com/dlp/docs/known-issues#bq-timespan)
620    // related to this operation.
621    FieldId timestamp_field = 3;
622
623    // When the job is started by a JobTrigger we will automatically figure out
624    // a valid start_time to avoid scanning files that have not been modified
625    // since the last time the JobTrigger executed. This will be based on the
626    // time of the execution of the last run of the JobTrigger or the timespan
627    // end_time used in the last run of the JobTrigger.
628    bool enable_auto_population_of_timespan_config = 4;
629  }
630
631  oneof type {
632    // Google Cloud Datastore options.
633    DatastoreOptions datastore_options = 2;
634
635    // Cloud Storage options.
636    CloudStorageOptions cloud_storage_options = 3;
637
638    // BigQuery options.
639    BigQueryOptions big_query_options = 4;
640
641    // Hybrid inspection options.
642    HybridOptions hybrid_options = 9;
643  }
644
645  TimespanConfig timespan_config = 6;
646}
647
648// Configuration to control jobs where the content being inspected is outside
649// of Google Cloud Platform.
650message HybridOptions {
651  // A short description of where the data is coming from. Will be stored once
652  // in the job. 256 max length.
653  string description = 1;
654
655  // These are labels that each inspection request must include within their
656  // 'finding_labels' map. Request may contain others, but any missing one of
657  // these will be rejected.
658  //
659  // Label keys must be between 1 and 63 characters long and must conform
660  // to the following regular expression: `[a-z]([-a-z0-9]*[a-z0-9])?`.
661  //
662  // No more than 10 keys can be required.
663  repeated string required_finding_label_keys = 2;
664
665  // To organize findings, these labels will be added to each finding.
666  //
667  // Label keys must be between 1 and 63 characters long and must conform
668  // to the following regular expression: `[a-z]([-a-z0-9]*[a-z0-9])?`.
669  //
670  // Label values must be between 0 and 63 characters long and must conform
671  // to the regular expression `([a-z]([-a-z0-9]*[a-z0-9])?)?`.
672  //
673  // No more than 10 labels can be associated with a given finding.
674  //
675  // Examples:
676  // * `"environment" : "production"`
677  // * `"pipeline" : "etl"`
678  map<string, string> labels = 3;
679
680  // If the container is a table, additional information to make findings
681  // meaningful such as the columns that are primary keys.
682  TableOptions table_options = 4;
683}
684
685// Row key for identifying a record in BigQuery table.
686message BigQueryKey {
687  // Complete BigQuery table reference.
688  BigQueryTable table_reference = 1;
689
690  // Row number inferred at the time the table was scanned. This value is
691  // nondeterministic, cannot be queried, and may be null for inspection
692  // jobs. To locate findings within a table, specify
693  // `inspect_job.storage_config.big_query_options.identifying_fields` in
694  // `CreateDlpJobRequest`.
695  int64 row_number = 2;
696}
697
698// Record key for a finding in Cloud Datastore.
699message DatastoreKey {
700  // Datastore entity key.
701  Key entity_key = 1;
702}
703
704// A unique identifier for a Datastore entity.
705// If a key's partition ID or any of its path kinds or names are
706// reserved/read-only, the key is reserved/read-only.
707// A reserved/read-only key is forbidden in certain documented contexts.
708message Key {
709  // A (kind, ID/name) pair used to construct a key path.
710  //
711  // If either name or ID is set, the element is complete.
712  // If neither is set, the element is incomplete.
713  message PathElement {
714    // The kind of the entity.
715    // A kind matching regex `__.*__` is reserved/read-only.
716    // A kind must not contain more than 1500 bytes when UTF-8 encoded.
717    // Cannot be `""`.
718    string kind = 1;
719
720    // The type of ID.
721    oneof id_type {
722      // The auto-allocated ID of the entity.
723      // Never equal to zero. Values less than zero are discouraged and may not
724      // be supported in the future.
725      int64 id = 2;
726
727      // The name of the entity.
728      // A name matching regex `__.*__` is reserved/read-only.
729      // A name must not be more than 1500 bytes when UTF-8 encoded.
730      // Cannot be `""`.
731      string name = 3;
732    }
733  }
734
735  // Entities are partitioned into subsets, currently identified by a project
736  // ID and namespace ID.
737  // Queries are scoped to a single partition.
738  PartitionId partition_id = 1;
739
740  // The entity path.
741  // An entity path consists of one or more elements composed of a kind and a
742  // string or numerical identifier, which identify entities. The first
743  // element identifies a _root entity_, the second element identifies
744  // a _child_ of the root entity, the third element identifies a child of the
745  // second entity, and so forth. The entities identified by all prefixes of
746  // the path are called the element's _ancestors_.
747  //
748  // A path can never be empty, and a path can have at most 100 elements.
749  repeated PathElement path = 2;
750}
751
752// Message for a unique key indicating a record that contains a finding.
753message RecordKey {
754  oneof type {
755    DatastoreKey datastore_key = 2;
756
757    BigQueryKey big_query_key = 3;
758  }
759
760  // Values of identifying columns in the given row. Order of values matches
761  // the order of `identifying_fields` specified in the scanning request.
762  repeated string id_values = 5;
763}
764
765// Message defining the location of a BigQuery table. A table is uniquely
766// identified  by its project_id, dataset_id, and table_name. Within a query
767// a table is often referenced with a string in the format of:
768// `<project_id>:<dataset_id>.<table_id>` or
769// `<project_id>.<dataset_id>.<table_id>`.
770message BigQueryTable {
771  // The Google Cloud Platform project ID of the project containing the table.
772  // If omitted, project ID is inferred from the API call.
773  string project_id = 1;
774
775  // Dataset ID of the table.
776  string dataset_id = 2;
777
778  // Name of the table.
779  string table_id = 3;
780}
781
782// Message defining a field of a BigQuery table.
783message BigQueryField {
784  // Source table of the field.
785  BigQueryTable table = 1;
786
787  // Designated field in the BigQuery table.
788  FieldId field = 2;
789}
790
791// An entity in a dataset is a field or set of fields that correspond to a
792// single person. For example, in medical records the `EntityId` might be a
793// patient identifier, or for financial records it might be an account
794// identifier. This message is used when generalizations or analysis must take
795// into account that multiple rows correspond to the same entity.
796message EntityId {
797  // Composite key indicating which field contains the entity identifier.
798  FieldId field = 1;
799}
800
801// Instructions regarding the table content being inspected.
802message TableOptions {
803  // The columns that are the primary keys for table objects included in
804  // ContentItem. A copy of this cell's value will stored alongside alongside
805  // each finding so that the finding can be traced to the specific row it came
806  // from. No more than 3 may be provided.
807  repeated FieldId identifying_fields = 1;
808}
809