1// Copyright 2022 Google LLC 2// 3// Licensed under the Apache License, Version 2.0 (the "License"); 4// you may not use this file except in compliance with the License. 5// You may obtain a copy of the License at 6// 7// http://www.apache.org/licenses/LICENSE-2.0 8// 9// Unless required by applicable law or agreed to in writing, software 10// distributed under the License is distributed on an "AS IS" BASIS, 11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12// See the License for the specific language governing permissions and 13// limitations under the License. 14 15syntax = "proto3"; 16 17package google.privacy.dlp.v2; 18 19import "google/api/resource.proto"; 20import "google/protobuf/timestamp.proto"; 21 22option csharp_namespace = "Google.Cloud.Dlp.V2"; 23option go_package = "cloud.google.com/go/dlp/apiv2/dlppb;dlppb"; 24option java_multiple_files = true; 25option java_outer_classname = "DlpStorage"; 26option java_package = "com.google.privacy.dlp.v2"; 27option php_namespace = "Google\\Cloud\\Dlp\\V2"; 28option ruby_package = "Google::Cloud::Dlp::V2"; 29 30// Type of information detected by the API. 31message InfoType { 32 // Name of the information type. Either a name of your choosing when 33 // creating a CustomInfoType, or one of the names listed 34 // at https://cloud.google.com/dlp/docs/infotypes-reference when specifying 35 // a built-in type. When sending Cloud DLP results to Data Catalog, infoType 36 // names should conform to the pattern `[A-Za-z0-9$_-]{1,64}`. 37 string name = 1; 38 39 // Optional version name for this InfoType. 40 string version = 2; 41} 42 43// Score is a summary of all elements in the data profile. 44// A higher number means more sensitive. 45message SensitivityScore { 46 // Various score levels for resources. 47 enum SensitivityScoreLevel { 48 // Unused. 49 SENSITIVITY_SCORE_UNSPECIFIED = 0; 50 51 // No sensitive information detected. Limited access. 52 SENSITIVITY_LOW = 10; 53 54 // Medium risk - PII, potentially sensitive data, or fields with free-text 55 // data that are at higher risk of having intermittent sensitive data. 56 // Consider limiting access. 57 SENSITIVITY_MODERATE = 20; 58 59 // High risk – SPII may be present. Exfiltration of data may lead to user 60 // data loss. Re-identification of users may be possible. Consider limiting 61 // usage and or removing SPII. 62 SENSITIVITY_HIGH = 30; 63 } 64 65 // The score applied to the resource. 66 SensitivityScoreLevel score = 1; 67} 68 69// Categorization of results based on how likely they are to represent a match, 70// based on the number of elements they contain which imply a match. 71enum Likelihood { 72 // Default value; same as POSSIBLE. 73 LIKELIHOOD_UNSPECIFIED = 0; 74 75 // Few matching elements. 76 VERY_UNLIKELY = 1; 77 78 UNLIKELY = 2; 79 80 // Some matching elements. 81 POSSIBLE = 3; 82 83 LIKELY = 4; 84 85 // Many matching elements. 86 VERY_LIKELY = 5; 87} 88 89// A reference to a StoredInfoType to use with scanning. 90message StoredType { 91 // Resource name of the requested `StoredInfoType`, for example 92 // `organizations/433245324/storedInfoTypes/432452342` or 93 // `projects/project-id/storedInfoTypes/432452342`. 94 string name = 1; 95 96 // Timestamp indicating when the version of the `StoredInfoType` used for 97 // inspection was created. Output-only field, populated by the system. 98 google.protobuf.Timestamp create_time = 2; 99} 100 101// Custom information type provided by the user. Used to find domain-specific 102// sensitive information configurable to the data in question. 103message CustomInfoType { 104 // Custom information type based on a dictionary of words or phrases. This can 105 // be used to match sensitive information specific to the data, such as a list 106 // of employee IDs or job titles. 107 // 108 // Dictionary words are case-insensitive and all characters other than letters 109 // and digits in the unicode [Basic Multilingual 110 // Plane](https://en.wikipedia.org/wiki/Plane_%28Unicode%29#Basic_Multilingual_Plane) 111 // will be replaced with whitespace when scanning for matches, so the 112 // dictionary phrase "Sam Johnson" will match all three phrases "sam johnson", 113 // "Sam, Johnson", and "Sam (Johnson)". Additionally, the characters 114 // surrounding any match must be of a different type than the adjacent 115 // characters within the word, so letters must be next to non-letters and 116 // digits next to non-digits. For example, the dictionary word "jen" will 117 // match the first three letters of the text "jen123" but will return no 118 // matches for "jennifer". 119 // 120 // Dictionary words containing a large number of characters that are not 121 // letters or digits may result in unexpected findings because such characters 122 // are treated as whitespace. The 123 // [limits](https://cloud.google.com/dlp/limits) page contains details about 124 // the size limits of dictionaries. For dictionaries that do not fit within 125 // these constraints, consider using `LargeCustomDictionaryConfig` in the 126 // `StoredInfoType` API. 127 message Dictionary { 128 // Message defining a list of words or phrases to search for in the data. 129 message WordList { 130 // Words or phrases defining the dictionary. The dictionary must contain 131 // at least one phrase and every phrase must contain at least 2 characters 132 // that are letters or digits. [required] 133 repeated string words = 1; 134 } 135 136 oneof source { 137 // List of words or phrases to search for. 138 WordList word_list = 1; 139 140 // Newline-delimited file of words in Cloud Storage. Only a single file 141 // is accepted. 142 CloudStoragePath cloud_storage_path = 3; 143 } 144 } 145 146 // Message defining a custom regular expression. 147 message Regex { 148 // Pattern defining the regular expression. Its syntax 149 // (https://github.com/google/re2/wiki/Syntax) can be found under the 150 // google/re2 repository on GitHub. 151 string pattern = 1; 152 153 // The index of the submatch to extract as findings. When not 154 // specified, the entire match is returned. No more than 3 may be included. 155 repeated int32 group_indexes = 2; 156 } 157 158 // Message for detecting output from deidentification transformations 159 // such as 160 // [`CryptoReplaceFfxFpeConfig`](https://cloud.google.com/dlp/docs/reference/rest/v2/organizations.deidentifyTemplates#cryptoreplaceffxfpeconfig). 161 // These types of transformations are 162 // those that perform pseudonymization, thereby producing a "surrogate" as 163 // output. This should be used in conjunction with a field on the 164 // transformation such as `surrogate_info_type`. This CustomInfoType does 165 // not support the use of `detection_rules`. 166 message SurrogateType { 167 168 } 169 170 // Deprecated; use `InspectionRuleSet` instead. Rule for modifying a 171 // `CustomInfoType` to alter behavior under certain circumstances, depending 172 // on the specific details of the rule. Not supported for the `surrogate_type` 173 // custom infoType. 174 message DetectionRule { 175 // Message for specifying a window around a finding to apply a detection 176 // rule. 177 message Proximity { 178 // Number of characters before the finding to consider. For tabular data, 179 // if you want to modify the likelihood of an entire column of findngs, 180 // set this to 1. For more information, see 181 // [Hotword example: Set the match likelihood of a table column] 182 // (https://cloud.google.com/dlp/docs/creating-custom-infotypes-likelihood#match-column-values). 183 int32 window_before = 1; 184 185 // Number of characters after the finding to consider. 186 int32 window_after = 2; 187 } 188 189 // Message for specifying an adjustment to the likelihood of a finding as 190 // part of a detection rule. 191 message LikelihoodAdjustment { 192 oneof adjustment { 193 // Set the likelihood of a finding to a fixed value. 194 Likelihood fixed_likelihood = 1; 195 196 // Increase or decrease the likelihood by the specified number of 197 // levels. For example, if a finding would be `POSSIBLE` without the 198 // detection rule and `relative_likelihood` is 1, then it is upgraded to 199 // `LIKELY`, while a value of -1 would downgrade it to `UNLIKELY`. 200 // Likelihood may never drop below `VERY_UNLIKELY` or exceed 201 // `VERY_LIKELY`, so applying an adjustment of 1 followed by an 202 // adjustment of -1 when base likelihood is `VERY_LIKELY` will result in 203 // a final likelihood of `LIKELY`. 204 int32 relative_likelihood = 2; 205 } 206 } 207 208 // The rule that adjusts the likelihood of findings within a certain 209 // proximity of hotwords. 210 message HotwordRule { 211 // Regular expression pattern defining what qualifies as a hotword. 212 Regex hotword_regex = 1; 213 214 // Range of characters within which the entire hotword must reside. 215 // The total length of the window cannot exceed 1000 characters. 216 // The finding itself will be included in the window, so that hotwords can 217 // be used to match substrings of the finding itself. Suppose you 218 // want Cloud DLP to promote the likelihood of the phone number 219 // regex "\(\d{3}\) \d{3}-\d{4}" if the area code is known to be the 220 // area code of a company's office. In this case, use the hotword regex 221 // "\(xxx\)", where "xxx" is the area code in question. 222 // 223 // For tabular data, if you want to modify the likelihood of an entire 224 // column of findngs, see 225 // [Hotword example: Set the match likelihood of a table column] 226 // (https://cloud.google.com/dlp/docs/creating-custom-infotypes-likelihood#match-column-values). 227 Proximity proximity = 2; 228 229 // Likelihood adjustment to apply to all matching findings. 230 LikelihoodAdjustment likelihood_adjustment = 3; 231 } 232 233 oneof type { 234 // Hotword-based detection rule. 235 HotwordRule hotword_rule = 1; 236 } 237 } 238 239 enum ExclusionType { 240 // A finding of this custom info type will not be excluded from results. 241 EXCLUSION_TYPE_UNSPECIFIED = 0; 242 243 // A finding of this custom info type will be excluded from final results, 244 // but can still affect rule execution. 245 EXCLUSION_TYPE_EXCLUDE = 1; 246 } 247 248 // CustomInfoType can either be a new infoType, or an extension of built-in 249 // infoType, when the name matches one of existing infoTypes and that infoType 250 // is specified in `InspectContent.info_types` field. Specifying the latter 251 // adds findings to the one detected by the system. If built-in info type is 252 // not specified in `InspectContent.info_types` list then the name is treated 253 // as a custom info type. 254 InfoType info_type = 1; 255 256 // Likelihood to return for this CustomInfoType. This base value can be 257 // altered by a detection rule if the finding meets the criteria specified by 258 // the rule. Defaults to `VERY_LIKELY` if not specified. 259 Likelihood likelihood = 6; 260 261 oneof type { 262 // A list of phrases to detect as a CustomInfoType. 263 Dictionary dictionary = 2; 264 265 // Regular expression based CustomInfoType. 266 Regex regex = 3; 267 268 // Message for detecting output from deidentification transformations that 269 // support reversing. 270 SurrogateType surrogate_type = 4; 271 272 // Load an existing `StoredInfoType` resource for use in 273 // `InspectDataSource`. Not currently supported in `InspectContent`. 274 StoredType stored_type = 5; 275 } 276 277 // Set of detection rules to apply to all findings of this CustomInfoType. 278 // Rules are applied in order that they are specified. Not supported for the 279 // `surrogate_type` CustomInfoType. 280 repeated DetectionRule detection_rules = 7; 281 282 // If set to EXCLUSION_TYPE_EXCLUDE this infoType will not cause a finding 283 // to be returned. It still can be used for rules matching. 284 ExclusionType exclusion_type = 8; 285} 286 287// General identifier of a data field in a storage service. 288message FieldId { 289 // Name describing the field. 290 string name = 1; 291} 292 293// Datastore partition ID. 294// A partition ID identifies a grouping of entities. The grouping is always 295// by project and namespace, however the namespace ID may be empty. 296// 297// A partition ID contains several dimensions: 298// project ID and namespace ID. 299message PartitionId { 300 // The ID of the project to which the entities belong. 301 string project_id = 2; 302 303 // If not empty, the ID of the namespace to which the entities belong. 304 string namespace_id = 4; 305} 306 307// A representation of a Datastore kind. 308message KindExpression { 309 // The name of the kind. 310 string name = 1; 311} 312 313// Options defining a data set within Google Cloud Datastore. 314message DatastoreOptions { 315 // A partition ID identifies a grouping of entities. The grouping is always 316 // by project and namespace, however the namespace ID may be empty. 317 PartitionId partition_id = 1; 318 319 // The kind to process. 320 KindExpression kind = 2; 321} 322 323// Definitions of file type groups to scan. New types will be added to this 324// list. 325enum FileType { 326 // Includes all files. 327 FILE_TYPE_UNSPECIFIED = 0; 328 329 // Includes all file extensions not covered by another entry. Binary 330 // scanning attempts to convert the content of the file to utf_8 to scan 331 // the file. 332 // If you wish to avoid this fall back, specify one or more of the other 333 // FileType's in your storage scan. 334 BINARY_FILE = 1; 335 336 // Included file extensions: 337 // asc,asp, aspx, brf, c, cc,cfm, cgi, cpp, csv, cxx, c++, cs, css, dart, 338 // dat, dot, eml,, epbub, ged, go, h, hh, hpp, hxx, h++, hs, html, htm, 339 // mkd, markdown, m, ml, mli, perl, pl, plist, pm, php, phtml, pht, 340 // properties, py, pyw, rb, rbw, rs, rss, rc, scala, sh, sql, swift, tex, 341 // shtml, shtm, xhtml, lhs, ics, ini, java, js, json, kix, kml, ocaml, md, 342 // txt, text, tsv, vb, vcard, vcs, wml, xcodeproj, xml, xsl, xsd, yml, yaml. 343 TEXT_FILE = 2; 344 345 // Included file extensions: 346 // bmp, gif, jpg, jpeg, jpe, png. 347 // bytes_limit_per_file has no effect on image files. 348 // Image inspection is restricted to 'global', 'us', 'asia', and 'europe'. 349 IMAGE = 3; 350 351 // Word files >30 MB will be scanned as binary files. 352 // Included file extensions: 353 // docx, dotx, docm, dotm 354 WORD = 5; 355 356 // PDF files >30 MB will be scanned as binary files. 357 // Included file extensions: 358 // pdf 359 PDF = 6; 360 361 // Included file extensions: 362 // avro 363 AVRO = 7; 364 365 // Included file extensions: 366 // csv 367 CSV = 8; 368 369 // Included file extensions: 370 // tsv 371 TSV = 9; 372 373 // Powerpoint files >30 MB will be scanned as binary files. 374 // Included file extensions: 375 // pptx, pptm, potx, potm, pot 376 POWERPOINT = 11; 377 378 // Excel files >30 MB will be scanned as binary files. 379 // Included file extensions: 380 // xlsx, xlsm, xltx, xltm 381 EXCEL = 12; 382} 383 384// Message representing a set of files in a Cloud Storage bucket. Regular 385// expressions are used to allow fine-grained control over which files in the 386// bucket to include. 387// 388// Included files are those that match at least one item in `include_regex` and 389// do not match any items in `exclude_regex`. Note that a file that matches 390// items from both lists will _not_ be included. For a match to occur, the 391// entire file path (i.e., everything in the url after the bucket name) must 392// match the regular expression. 393// 394// For example, given the input `{bucket_name: "mybucket", include_regex: 395// ["directory1/.*"], exclude_regex: 396// ["directory1/excluded.*"]}`: 397// 398// * `gs://mybucket/directory1/myfile` will be included 399// * `gs://mybucket/directory1/directory2/myfile` will be included (`.*` matches 400// across `/`) 401// * `gs://mybucket/directory0/directory1/myfile` will _not_ be included (the 402// full path doesn't match any items in `include_regex`) 403// * `gs://mybucket/directory1/excludedfile` will _not_ be included (the path 404// matches an item in `exclude_regex`) 405// 406// If `include_regex` is left empty, it will match all files by default 407// (this is equivalent to setting `include_regex: [".*"]`). 408// 409// Some other common use cases: 410// 411// * `{bucket_name: "mybucket", exclude_regex: [".*\.pdf"]}` will include all 412// files in `mybucket` except for .pdf files 413// * `{bucket_name: "mybucket", include_regex: ["directory/[^/]+"]}` will 414// include all files directly under `gs://mybucket/directory/`, without matching 415// across `/` 416message CloudStorageRegexFileSet { 417 // The name of a Cloud Storage bucket. Required. 418 string bucket_name = 1; 419 420 // A list of regular expressions matching file paths to include. All files in 421 // the bucket that match at least one of these regular expressions will be 422 // included in the set of files, except for those that also match an item in 423 // `exclude_regex`. Leaving this field empty will match all files by default 424 // (this is equivalent to including `.*` in the list). 425 // 426 // Regular expressions use RE2 427 // [syntax](https://github.com/google/re2/wiki/Syntax); a guide can be found 428 // under the google/re2 repository on GitHub. 429 repeated string include_regex = 2; 430 431 // A list of regular expressions matching file paths to exclude. All files in 432 // the bucket that match at least one of these regular expressions will be 433 // excluded from the scan. 434 // 435 // Regular expressions use RE2 436 // [syntax](https://github.com/google/re2/wiki/Syntax); a guide can be found 437 // under the google/re2 repository on GitHub. 438 repeated string exclude_regex = 3; 439} 440 441// Options defining a file or a set of files within a Cloud Storage 442// bucket. 443message CloudStorageOptions { 444 // Set of files to scan. 445 message FileSet { 446 // The Cloud Storage url of the file(s) to scan, in the format 447 // `gs://<bucket>/<path>`. Trailing wildcard in the path is allowed. 448 // 449 // If the url ends in a trailing slash, the bucket or directory represented 450 // by the url will be scanned non-recursively (content in sub-directories 451 // will not be scanned). This means that `gs://mybucket/` is equivalent to 452 // `gs://mybucket/*`, and `gs://mybucket/directory/` is equivalent to 453 // `gs://mybucket/directory/*`. 454 // 455 // Exactly one of `url` or `regex_file_set` must be set. 456 string url = 1; 457 458 // The regex-filtered set of files to scan. Exactly one of `url` or 459 // `regex_file_set` must be set. 460 CloudStorageRegexFileSet regex_file_set = 2; 461 } 462 463 // How to sample bytes if not all bytes are scanned. Meaningful only when used 464 // in conjunction with bytes_limit_per_file. If not specified, scanning would 465 // start from the top. 466 enum SampleMethod { 467 SAMPLE_METHOD_UNSPECIFIED = 0; 468 469 // Scan from the top (default). 470 TOP = 1; 471 472 // For each file larger than bytes_limit_per_file, randomly pick the offset 473 // to start scanning. The scanned bytes are contiguous. 474 RANDOM_START = 2; 475 } 476 477 // The set of one or more files to scan. 478 FileSet file_set = 1; 479 480 // Max number of bytes to scan from a file. If a scanned file's size is bigger 481 // than this value then the rest of the bytes are omitted. Only one 482 // of bytes_limit_per_file and bytes_limit_per_file_percent can be specified. 483 // Cannot be set if de-identification is requested. 484 int64 bytes_limit_per_file = 4; 485 486 // Max percentage of bytes to scan from a file. The rest are omitted. The 487 // number of bytes scanned is rounded down. Must be between 0 and 100, 488 // inclusively. Both 0 and 100 means no limit. Defaults to 0. Only one 489 // of bytes_limit_per_file and bytes_limit_per_file_percent can be specified. 490 // Cannot be set if de-identification is requested. 491 int32 bytes_limit_per_file_percent = 8; 492 493 // List of file type groups to include in the scan. 494 // If empty, all files are scanned and available data format processors 495 // are applied. In addition, the binary content of the selected files 496 // is always scanned as well. 497 // Images are scanned only as binary if the specified region 498 // does not support image inspection and no file_types were specified. 499 // Image inspection is restricted to 'global', 'us', 'asia', and 'europe'. 500 repeated FileType file_types = 5; 501 502 SampleMethod sample_method = 6; 503 504 // Limits the number of files to scan to this percentage of the input FileSet. 505 // Number of files scanned is rounded down. Must be between 0 and 100, 506 // inclusively. Both 0 and 100 means no limit. Defaults to 0. 507 int32 files_limit_percent = 7; 508} 509 510// Message representing a set of files in Cloud Storage. 511message CloudStorageFileSet { 512 // The url, in the format `gs://<bucket>/<path>`. Trailing wildcard in the 513 // path is allowed. 514 string url = 1; 515} 516 517// Message representing a single file or path in Cloud Storage. 518message CloudStoragePath { 519 // A url representing a file or path (no wildcards) in Cloud Storage. 520 // Example: gs://[BUCKET_NAME]/dictionary.txt 521 string path = 1; 522} 523 524// Options defining BigQuery table and row identifiers. 525message BigQueryOptions { 526 // How to sample rows if not all rows are scanned. Meaningful only when used 527 // in conjunction with either rows_limit or rows_limit_percent. If not 528 // specified, rows are scanned in the order BigQuery reads them. 529 enum SampleMethod { 530 SAMPLE_METHOD_UNSPECIFIED = 0; 531 532 // Scan groups of rows in the order BigQuery provides (default). Multiple 533 // groups of rows may be scanned in parallel, so results may not appear in 534 // the same order the rows are read. 535 TOP = 1; 536 537 // Randomly pick groups of rows to scan. 538 RANDOM_START = 2; 539 } 540 541 // Complete BigQuery table reference. 542 BigQueryTable table_reference = 1; 543 544 // Table fields that may uniquely identify a row within the table. When 545 // `actions.saveFindings.outputConfig.table` is specified, the values of 546 // columns specified here are available in the output table under 547 // `location.content_locations.record_location.record_key.id_values`. Nested 548 // fields such as `person.birthdate.year` are allowed. 549 repeated FieldId identifying_fields = 2; 550 551 // Max number of rows to scan. If the table has more rows than this value, the 552 // rest of the rows are omitted. If not set, or if set to 0, all rows will be 553 // scanned. Only one of rows_limit and rows_limit_percent can be specified. 554 // Cannot be used in conjunction with TimespanConfig. 555 int64 rows_limit = 3; 556 557 // Max percentage of rows to scan. The rest are omitted. The number of rows 558 // scanned is rounded down. Must be between 0 and 100, inclusively. Both 0 and 559 // 100 means no limit. Defaults to 0. Only one of rows_limit and 560 // rows_limit_percent can be specified. Cannot be used in conjunction with 561 // TimespanConfig. 562 int32 rows_limit_percent = 6; 563 564 SampleMethod sample_method = 4; 565 566 // References to fields excluded from scanning. This allows you to skip 567 // inspection of entire columns which you know have no findings. 568 repeated FieldId excluded_fields = 5; 569 570 // Limit scanning only to these fields. 571 repeated FieldId included_fields = 7; 572} 573 574// Shared message indicating Cloud storage type. 575message StorageConfig { 576 // Configuration of the timespan of the items to include in scanning. 577 // Currently only supported when inspecting Cloud Storage and BigQuery. 578 message TimespanConfig { 579 // Exclude files, tables, or rows older than this value. 580 // If not set, no lower time limit is applied. 581 google.protobuf.Timestamp start_time = 1; 582 583 // Exclude files, tables, or rows newer than this value. 584 // If not set, no upper time limit is applied. 585 google.protobuf.Timestamp end_time = 2; 586 587 // Specification of the field containing the timestamp of scanned items. 588 // Used for data sources like Datastore and BigQuery. 589 // 590 // <b>For BigQuery</b> 591 // 592 // If this value is not specified and the table was modified between the 593 // given start and end times, the entire table will be scanned. If this 594 // value is specified, then rows are filtered based on the given start and 595 // end times. Rows with a `NULL` value in the provided BigQuery column are 596 // skipped. 597 // Valid data types of the provided BigQuery column are: `INTEGER`, `DATE`, 598 // `TIMESTAMP`, and `DATETIME`. 599 // 600 // If your BigQuery table is [partitioned at ingestion 601 // time](https://cloud.google.com/bigquery/docs/partitioned-tables#ingestion_time), 602 // you can use any of the following pseudo-columns as your timestamp field. 603 // When used with Cloud DLP, these pseudo-column names are case sensitive. 604 // 605 // <ul> 606 // <li><code>_PARTITIONTIME</code></li> 607 // <li><code>_PARTITIONDATE</code></li> 608 // <li><code>_PARTITION_LOAD_TIME</code></li> 609 // </ul> 610 // 611 // <b>For Datastore</b> 612 // 613 // If this value is specified, then entities are filtered based on the given 614 // start and end times. If an entity does not contain the provided timestamp 615 // property or contains empty or invalid values, then it is included. 616 // Valid data types of the provided timestamp property are: `TIMESTAMP`. 617 // 618 // See the 619 // [known issue](https://cloud.google.com/dlp/docs/known-issues#bq-timespan) 620 // related to this operation. 621 FieldId timestamp_field = 3; 622 623 // When the job is started by a JobTrigger we will automatically figure out 624 // a valid start_time to avoid scanning files that have not been modified 625 // since the last time the JobTrigger executed. This will be based on the 626 // time of the execution of the last run of the JobTrigger or the timespan 627 // end_time used in the last run of the JobTrigger. 628 bool enable_auto_population_of_timespan_config = 4; 629 } 630 631 oneof type { 632 // Google Cloud Datastore options. 633 DatastoreOptions datastore_options = 2; 634 635 // Cloud Storage options. 636 CloudStorageOptions cloud_storage_options = 3; 637 638 // BigQuery options. 639 BigQueryOptions big_query_options = 4; 640 641 // Hybrid inspection options. 642 HybridOptions hybrid_options = 9; 643 } 644 645 TimespanConfig timespan_config = 6; 646} 647 648// Configuration to control jobs where the content being inspected is outside 649// of Google Cloud Platform. 650message HybridOptions { 651 // A short description of where the data is coming from. Will be stored once 652 // in the job. 256 max length. 653 string description = 1; 654 655 // These are labels that each inspection request must include within their 656 // 'finding_labels' map. Request may contain others, but any missing one of 657 // these will be rejected. 658 // 659 // Label keys must be between 1 and 63 characters long and must conform 660 // to the following regular expression: `[a-z]([-a-z0-9]*[a-z0-9])?`. 661 // 662 // No more than 10 keys can be required. 663 repeated string required_finding_label_keys = 2; 664 665 // To organize findings, these labels will be added to each finding. 666 // 667 // Label keys must be between 1 and 63 characters long and must conform 668 // to the following regular expression: `[a-z]([-a-z0-9]*[a-z0-9])?`. 669 // 670 // Label values must be between 0 and 63 characters long and must conform 671 // to the regular expression `([a-z]([-a-z0-9]*[a-z0-9])?)?`. 672 // 673 // No more than 10 labels can be associated with a given finding. 674 // 675 // Examples: 676 // * `"environment" : "production"` 677 // * `"pipeline" : "etl"` 678 map<string, string> labels = 3; 679 680 // If the container is a table, additional information to make findings 681 // meaningful such as the columns that are primary keys. 682 TableOptions table_options = 4; 683} 684 685// Row key for identifying a record in BigQuery table. 686message BigQueryKey { 687 // Complete BigQuery table reference. 688 BigQueryTable table_reference = 1; 689 690 // Row number inferred at the time the table was scanned. This value is 691 // nondeterministic, cannot be queried, and may be null for inspection 692 // jobs. To locate findings within a table, specify 693 // `inspect_job.storage_config.big_query_options.identifying_fields` in 694 // `CreateDlpJobRequest`. 695 int64 row_number = 2; 696} 697 698// Record key for a finding in Cloud Datastore. 699message DatastoreKey { 700 // Datastore entity key. 701 Key entity_key = 1; 702} 703 704// A unique identifier for a Datastore entity. 705// If a key's partition ID or any of its path kinds or names are 706// reserved/read-only, the key is reserved/read-only. 707// A reserved/read-only key is forbidden in certain documented contexts. 708message Key { 709 // A (kind, ID/name) pair used to construct a key path. 710 // 711 // If either name or ID is set, the element is complete. 712 // If neither is set, the element is incomplete. 713 message PathElement { 714 // The kind of the entity. 715 // A kind matching regex `__.*__` is reserved/read-only. 716 // A kind must not contain more than 1500 bytes when UTF-8 encoded. 717 // Cannot be `""`. 718 string kind = 1; 719 720 // The type of ID. 721 oneof id_type { 722 // The auto-allocated ID of the entity. 723 // Never equal to zero. Values less than zero are discouraged and may not 724 // be supported in the future. 725 int64 id = 2; 726 727 // The name of the entity. 728 // A name matching regex `__.*__` is reserved/read-only. 729 // A name must not be more than 1500 bytes when UTF-8 encoded. 730 // Cannot be `""`. 731 string name = 3; 732 } 733 } 734 735 // Entities are partitioned into subsets, currently identified by a project 736 // ID and namespace ID. 737 // Queries are scoped to a single partition. 738 PartitionId partition_id = 1; 739 740 // The entity path. 741 // An entity path consists of one or more elements composed of a kind and a 742 // string or numerical identifier, which identify entities. The first 743 // element identifies a _root entity_, the second element identifies 744 // a _child_ of the root entity, the third element identifies a child of the 745 // second entity, and so forth. The entities identified by all prefixes of 746 // the path are called the element's _ancestors_. 747 // 748 // A path can never be empty, and a path can have at most 100 elements. 749 repeated PathElement path = 2; 750} 751 752// Message for a unique key indicating a record that contains a finding. 753message RecordKey { 754 oneof type { 755 DatastoreKey datastore_key = 2; 756 757 BigQueryKey big_query_key = 3; 758 } 759 760 // Values of identifying columns in the given row. Order of values matches 761 // the order of `identifying_fields` specified in the scanning request. 762 repeated string id_values = 5; 763} 764 765// Message defining the location of a BigQuery table. A table is uniquely 766// identified by its project_id, dataset_id, and table_name. Within a query 767// a table is often referenced with a string in the format of: 768// `<project_id>:<dataset_id>.<table_id>` or 769// `<project_id>.<dataset_id>.<table_id>`. 770message BigQueryTable { 771 // The Google Cloud Platform project ID of the project containing the table. 772 // If omitted, project ID is inferred from the API call. 773 string project_id = 1; 774 775 // Dataset ID of the table. 776 string dataset_id = 2; 777 778 // Name of the table. 779 string table_id = 3; 780} 781 782// Message defining a field of a BigQuery table. 783message BigQueryField { 784 // Source table of the field. 785 BigQueryTable table = 1; 786 787 // Designated field in the BigQuery table. 788 FieldId field = 2; 789} 790 791// An entity in a dataset is a field or set of fields that correspond to a 792// single person. For example, in medical records the `EntityId` might be a 793// patient identifier, or for financial records it might be an account 794// identifier. This message is used when generalizations or analysis must take 795// into account that multiple rows correspond to the same entity. 796message EntityId { 797 // Composite key indicating which field contains the entity identifier. 798 FieldId field = 1; 799} 800 801// Instructions regarding the table content being inspected. 802message TableOptions { 803 // The columns that are the primary keys for table objects included in 804 // ContentItem. A copy of this cell's value will stored alongside alongside 805 // each finding so that the finding can be traced to the specific row it came 806 // from. No more than 3 may be provided. 807 repeated FieldId identifying_fields = 1; 808} 809