1// Copyright 2023 Google LLC 2// 3// Licensed under the Apache License, Version 2.0 (the "License"); 4// you may not use this file except in compliance with the License. 5// You may obtain a copy of the License at 6// 7// http://www.apache.org/licenses/LICENSE-2.0 8// 9// Unless required by applicable law or agreed to in writing, software 10// distributed under the License is distributed on an "AS IS" BASIS, 11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12// See the License for the specific language governing permissions and 13// limitations under the License. 14 15syntax = "proto3"; 16 17package google.cloud.dataplex.v1; 18 19import "google/api/field_behavior.proto"; 20import "google/api/resource.proto"; 21import "google/protobuf/duration.proto"; 22import "google/protobuf/timestamp.proto"; 23 24option go_package = "cloud.google.com/go/dataplex/apiv1/dataplexpb;dataplexpb"; 25option java_multiple_files = true; 26option java_outer_classname = "LogsProto"; 27option java_package = "com.google.cloud.dataplex.v1"; 28 29// The payload associated with Discovery data processing. 30message DiscoveryEvent { 31 // The type of the event. 32 enum EventType { 33 // An unspecified event type. 34 EVENT_TYPE_UNSPECIFIED = 0; 35 36 // An event representing discovery configuration in effect. 37 CONFIG = 1; 38 39 // An event representing a metadata entity being created. 40 ENTITY_CREATED = 2; 41 42 // An event representing a metadata entity being updated. 43 ENTITY_UPDATED = 3; 44 45 // An event representing a metadata entity being deleted. 46 ENTITY_DELETED = 4; 47 48 // An event representing a partition being created. 49 PARTITION_CREATED = 5; 50 51 // An event representing a partition being updated. 52 PARTITION_UPDATED = 6; 53 54 // An event representing a partition being deleted. 55 PARTITION_DELETED = 7; 56 } 57 58 // The type of the entity. 59 enum EntityType { 60 // An unspecified event type. 61 ENTITY_TYPE_UNSPECIFIED = 0; 62 63 // Entities representing structured data. 64 TABLE = 1; 65 66 // Entities representing unstructured data. 67 FILESET = 2; 68 } 69 70 // Details about configuration events. 71 message ConfigDetails { 72 // A list of discovery configuration parameters in effect. 73 // The keys are the field paths within DiscoverySpec. 74 // Eg. includePatterns, excludePatterns, csvOptions.disableTypeInference, 75 // etc. 76 map<string, string> parameters = 1; 77 } 78 79 // Details about the entity. 80 message EntityDetails { 81 // The name of the entity resource. 82 // The name is the fully-qualified resource name. 83 string entity = 1; 84 85 // The type of the entity resource. 86 EntityType type = 2; 87 } 88 89 // Details about the partition. 90 message PartitionDetails { 91 // The name to the partition resource. 92 // The name is the fully-qualified resource name. 93 string partition = 1; 94 95 // The name to the containing entity resource. 96 // The name is the fully-qualified resource name. 97 string entity = 2; 98 99 // The type of the containing entity resource. 100 EntityType type = 3; 101 102 // The locations of the data items (e.g., a Cloud Storage objects) sampled 103 // for metadata inference. 104 repeated string sampled_data_locations = 4; 105 } 106 107 // Details about the action. 108 message ActionDetails { 109 // The type of action. 110 // Eg. IncompatibleDataSchema, InvalidDataFormat 111 string type = 1; 112 } 113 114 // The log message. 115 string message = 1; 116 117 // The id of the associated lake. 118 string lake_id = 2; 119 120 // The id of the associated zone. 121 string zone_id = 3; 122 123 // The id of the associated asset. 124 string asset_id = 4; 125 126 // The data location associated with the event. 127 string data_location = 5; 128 129 // The type of the event being logged. 130 EventType type = 10; 131 132 // Additional details about the event. 133 oneof details { 134 // Details about discovery configuration in effect. 135 ConfigDetails config = 20; 136 137 // Details about the entity associated with the event. 138 EntityDetails entity = 21; 139 140 // Details about the partition associated with the event. 141 PartitionDetails partition = 22; 142 143 // Details about the action associated with the event. 144 ActionDetails action = 23; 145 } 146} 147 148// The payload associated with Job logs that contains events describing jobs 149// that have run within a Lake. 150message JobEvent { 151 // The type of the job. 152 enum Type { 153 // Unspecified job type. 154 TYPE_UNSPECIFIED = 0; 155 156 // Spark jobs. 157 SPARK = 1; 158 159 // Notebook jobs. 160 NOTEBOOK = 2; 161 } 162 163 // The completion status of the job. 164 enum State { 165 // Unspecified job state. 166 STATE_UNSPECIFIED = 0; 167 168 // Job successfully completed. 169 SUCCEEDED = 1; 170 171 // Job was unsuccessful. 172 FAILED = 2; 173 174 // Job was cancelled by the user. 175 CANCELLED = 3; 176 177 // Job was cancelled or aborted via the service executing the job. 178 ABORTED = 4; 179 } 180 181 // The service used to execute the job. 182 enum Service { 183 // Unspecified service. 184 SERVICE_UNSPECIFIED = 0; 185 186 // Cloud Dataproc. 187 DATAPROC = 1; 188 } 189 190 // Job Execution trigger. 191 enum ExecutionTrigger { 192 // The job execution trigger is unspecified. 193 EXECUTION_TRIGGER_UNSPECIFIED = 0; 194 195 // The job was triggered by Dataplex based on trigger spec from task 196 // definition. 197 TASK_CONFIG = 1; 198 199 // The job was triggered by the explicit call of Task API. 200 RUN_REQUEST = 2; 201 } 202 203 // The log message. 204 string message = 1; 205 206 // The unique id identifying the job. 207 string job_id = 2; 208 209 // The time when the job started running. 210 google.protobuf.Timestamp start_time = 3; 211 212 // The time when the job ended running. 213 google.protobuf.Timestamp end_time = 4; 214 215 // The job state on completion. 216 State state = 5; 217 218 // The number of retries. 219 int32 retries = 6; 220 221 // The type of the job. 222 Type type = 7; 223 224 // The service used to execute the job. 225 Service service = 8; 226 227 // The reference to the job within the service. 228 string service_job = 9; 229 230 // Job execution trigger. 231 ExecutionTrigger execution_trigger = 11; 232} 233 234// These messages contain information about sessions within an environment. 235// The monitored resource is 'Environment'. 236message SessionEvent { 237 // The type of the event. 238 enum EventType { 239 // An unspecified event type. 240 EVENT_TYPE_UNSPECIFIED = 0; 241 242 // Event when the session is assigned to a user. 243 START = 1; 244 245 // Event for stop of a session. 246 STOP = 2; 247 248 // Query events in the session. 249 QUERY = 3; 250 251 // Event for creation of a cluster. It is not yet assigned to a user. 252 // This comes before START in the sequence 253 CREATE = 4; 254 } 255 256 // Execution details of the query. 257 message QueryDetail { 258 // Query Execution engine. 259 enum Engine { 260 // An unspecified Engine type. 261 ENGINE_UNSPECIFIED = 0; 262 263 // Spark-sql engine is specified in Query. 264 SPARK_SQL = 1; 265 266 // BigQuery engine is specified in Query. 267 BIGQUERY = 2; 268 } 269 270 // The unique Query id identifying the query. 271 string query_id = 1; 272 273 // The query text executed. 274 string query_text = 2; 275 276 // Query Execution engine. 277 Engine engine = 3; 278 279 // Time taken for execution of the query. 280 google.protobuf.Duration duration = 4; 281 282 // The size of results the query produced. 283 int64 result_size_bytes = 5; 284 285 // The data processed by the query. 286 int64 data_processed_bytes = 6; 287 } 288 289 // The log message. 290 string message = 1; 291 292 // The information about the user that created the session. It will be the 293 // email address of the user. 294 string user_id = 2; 295 296 // Unique identifier for the session. 297 string session_id = 3; 298 299 // The type of the event. 300 EventType type = 4; 301 302 // Additional information about the Query metadata. 303 oneof detail { 304 // The execution details of the query. 305 QueryDetail query = 5; 306 } 307 308 // The status of the event. 309 bool event_succeeded = 6; 310 311 // If the session is associated with an environment with fast startup enabled, 312 // and was created before being assigned to a user. 313 bool fast_startup_enabled = 7; 314 315 // The idle duration of a warm pooled session before it is assigned to user. 316 google.protobuf.Duration unassigned_duration = 8; 317} 318 319// Payload associated with Governance related log events. 320message GovernanceEvent { 321 // Information about Entity resource that the log event is associated with. 322 message Entity { 323 // Type of entity. 324 enum EntityType { 325 // An unspecified Entity type. 326 ENTITY_TYPE_UNSPECIFIED = 0; 327 328 // Table entity type. 329 TABLE = 1; 330 331 // Fileset entity type. 332 FILESET = 2; 333 } 334 335 // The Entity resource the log event is associated with. 336 // Format: 337 // `projects/{project_number}/locations/{location_id}/lakes/{lake_id}/zones/{zone_id}/entities/{entity_id}` 338 string entity = 1 [(google.api.resource_reference) = { 339 type: "dataplex.googleapis.com/Entity" 340 }]; 341 342 // Type of entity. 343 EntityType entity_type = 2; 344 } 345 346 // Type of governance log event. 347 enum EventType { 348 // An unspecified event type. 349 EVENT_TYPE_UNSPECIFIED = 0; 350 351 // Resource IAM policy update event. 352 RESOURCE_IAM_POLICY_UPDATE = 1; 353 354 // BigQuery table create event. 355 BIGQUERY_TABLE_CREATE = 2; 356 357 // BigQuery table update event. 358 BIGQUERY_TABLE_UPDATE = 3; 359 360 // BigQuery table delete event. 361 BIGQUERY_TABLE_DELETE = 4; 362 363 // BigQuery connection create event. 364 BIGQUERY_CONNECTION_CREATE = 5; 365 366 // BigQuery connection update event. 367 BIGQUERY_CONNECTION_UPDATE = 6; 368 369 // BigQuery connection delete event. 370 BIGQUERY_CONNECTION_DELETE = 7; 371 372 // BigQuery taxonomy created. 373 BIGQUERY_TAXONOMY_CREATE = 10; 374 375 // BigQuery policy tag created. 376 BIGQUERY_POLICY_TAG_CREATE = 11; 377 378 // BigQuery policy tag deleted. 379 BIGQUERY_POLICY_TAG_DELETE = 12; 380 381 // BigQuery set iam policy for policy tag. 382 BIGQUERY_POLICY_TAG_SET_IAM_POLICY = 13; 383 384 // Access policy update event. 385 ACCESS_POLICY_UPDATE = 14; 386 387 // Number of resources matched with particular Query. 388 GOVERNANCE_RULE_MATCHED_RESOURCES = 15; 389 390 // Rule processing exceeds the allowed limit. 391 GOVERNANCE_RULE_SEARCH_LIMIT_EXCEEDS = 16; 392 393 // Rule processing errors. 394 GOVERNANCE_RULE_ERRORS = 17; 395 396 // Governance rule processing Event. 397 GOVERNANCE_RULE_PROCESSING = 18; 398 } 399 400 // The log message. 401 string message = 1; 402 403 // The type of the event. 404 EventType event_type = 2; 405 406 // Entity resource information if the log event is associated with a 407 // specific entity. 408 optional Entity entity = 3; 409} 410 411// These messages contain information about the execution of a datascan. 412// The monitored resource is 'DataScan' 413// Next ID: 13 414message DataScanEvent { 415 // The type of the data scan. 416 enum ScanType { 417 // An unspecified data scan type. 418 SCAN_TYPE_UNSPECIFIED = 0; 419 420 // Data scan for data profile. 421 DATA_PROFILE = 1; 422 423 // Data scan for data quality. 424 DATA_QUALITY = 2; 425 } 426 427 // The job state of the data scan. 428 enum State { 429 // Unspecified job state. 430 STATE_UNSPECIFIED = 0; 431 432 // Data scan job started. 433 STARTED = 1; 434 435 // Data scan job successfully completed. 436 SUCCEEDED = 2; 437 438 // Data scan job was unsuccessful. 439 FAILED = 3; 440 441 // Data scan job was cancelled. 442 CANCELLED = 4; 443 444 // Data scan job was createed. 445 CREATED = 5; 446 } 447 448 // The trigger type for the data scan. 449 enum Trigger { 450 // An unspecified trigger type. 451 TRIGGER_UNSPECIFIED = 0; 452 453 // Data scan triggers on demand. 454 ON_DEMAND = 1; 455 456 // Data scan triggers as per schedule. 457 SCHEDULE = 2; 458 } 459 460 // The scope of job for the data scan. 461 enum Scope { 462 // An unspecified scope type. 463 SCOPE_UNSPECIFIED = 0; 464 465 // Data scan runs on all of the data. 466 FULL = 1; 467 468 // Data scan runs on incremental data. 469 INCREMENTAL = 2; 470 } 471 472 // Data profile result for data scan job. 473 message DataProfileResult { 474 // The count of rows processed in the data scan job. 475 int64 row_count = 1; 476 } 477 478 // Data quality result for data scan job. 479 message DataQualityResult { 480 // The count of rows processed in the data scan job. 481 int64 row_count = 1; 482 483 // Whether the data quality result was `pass` or not. 484 bool passed = 2; 485 486 // The result of each dimension for data quality result. 487 // The key of the map is the name of the dimension. 488 // The value is the bool value depicting whether the dimension result was 489 // `pass` or not. 490 map<string, bool> dimension_passed = 3; 491 492 // The table-level data quality score for the data scan job. 493 // 494 // The data quality score ranges between [0, 100] (up to two decimal 495 // points). 496 float score = 4; 497 498 // The score of each dimension for data quality result. 499 // The key of the map is the name of the dimension. 500 // The value is the data quality score for the dimension. 501 // 502 // The score ranges between [0, 100] (up to two decimal 503 // points). 504 map<string, float> dimension_score = 5; 505 506 // The score of each column scanned in the data scan job. 507 // The key of the map is the name of the column. 508 // The value is the data quality score for the column. 509 // 510 // The score ranges between [0, 100] (up to two decimal 511 // points). 512 map<string, float> column_score = 6; 513 } 514 515 // Applied configs for data profile type data scan job. 516 message DataProfileAppliedConfigs { 517 // The percentage of the records selected from the dataset for DataScan. 518 // 519 // * Value ranges between 0.0 and 100.0. 520 // * Value 0.0 or 100.0 imply that sampling was not applied. 521 float sampling_percent = 1; 522 523 // Boolean indicating whether a row filter was applied in the DataScan job. 524 bool row_filter_applied = 2; 525 526 // Boolean indicating whether a column filter was applied in the DataScan 527 // job. 528 bool column_filter_applied = 3; 529 } 530 531 // Applied configs for data quality type data scan job. 532 message DataQualityAppliedConfigs { 533 // The percentage of the records selected from the dataset for DataScan. 534 // 535 // * Value ranges between 0.0 and 100.0. 536 // * Value 0.0 or 100.0 imply that sampling was not applied. 537 float sampling_percent = 1; 538 539 // Boolean indicating whether a row filter was applied in the DataScan job. 540 bool row_filter_applied = 2; 541 } 542 543 // Post scan actions result for data scan job. 544 message PostScanActionsResult { 545 // The result of BigQuery export post scan action. 546 message BigQueryExportResult { 547 // Execution state for the exporting. 548 enum State { 549 // The exporting state is unspecified. 550 STATE_UNSPECIFIED = 0; 551 552 // The exporting completed successfully. 553 SUCCEEDED = 1; 554 555 // The exporting is no longer running due to an error. 556 FAILED = 2; 557 558 // The exporting is skipped due to no valid scan result to export 559 // (usually caused by scan failed). 560 SKIPPED = 3; 561 } 562 563 // Execution state for the BigQuery exporting. 564 State state = 1; 565 566 // Additional information about the BigQuery exporting. 567 string message = 2; 568 } 569 570 // The result of BigQuery export post scan action. 571 BigQueryExportResult bigquery_export_result = 1; 572 } 573 574 // The data source of the data scan 575 string data_source = 1; 576 577 // The identifier of the specific data scan job this log entry is for. 578 string job_id = 2; 579 580 // The time when the data scan job was created. 581 google.protobuf.Timestamp create_time = 12; 582 583 // The time when the data scan job started to run. 584 google.protobuf.Timestamp start_time = 3; 585 586 // The time when the data scan job finished. 587 google.protobuf.Timestamp end_time = 4; 588 589 // The type of the data scan. 590 ScanType type = 5; 591 592 // The status of the data scan job. 593 State state = 6; 594 595 // The message describing the data scan job event. 596 string message = 7; 597 598 // A version identifier of the spec which was used to execute this job. 599 string spec_version = 8; 600 601 // The trigger type of the data scan job. 602 Trigger trigger = 9; 603 604 // The scope of the data scan (e.g. full, incremental). 605 Scope scope = 10; 606 607 // The result of the data scan job. 608 oneof result { 609 // Data profile result for data profile type data scan. 610 DataProfileResult data_profile = 101; 611 612 // Data quality result for data quality type data scan. 613 DataQualityResult data_quality = 102; 614 } 615 616 // The applied configs in the data scan job. 617 oneof appliedConfigs { 618 // Applied configs for data profile type data scan. 619 DataProfileAppliedConfigs data_profile_configs = 201; 620 621 // Applied configs for data quality type data scan. 622 DataQualityAppliedConfigs data_quality_configs = 202; 623 } 624 625 // The result of post scan actions. 626 PostScanActionsResult post_scan_actions_result = 11; 627} 628 629// Information about the result of a data quality rule for data quality scan. 630// The monitored resource is 'DataScan'. 631message DataQualityScanRuleResult { 632 // The type of the data quality rule. 633 enum RuleType { 634 // An unspecified rule type. 635 RULE_TYPE_UNSPECIFIED = 0; 636 637 // Please see 638 // https://cloud.google.com/dataplex/docs/reference/rest/v1/DataQualityRule#nonnullexpectation. 639 NON_NULL_EXPECTATION = 1; 640 641 // Please see 642 // https://cloud.google.com/dataplex/docs/reference/rest/v1/DataQualityRule#rangeexpectation. 643 RANGE_EXPECTATION = 2; 644 645 // Please see 646 // https://cloud.google.com/dataplex/docs/reference/rest/v1/DataQualityRule#regexexpectation. 647 REGEX_EXPECTATION = 3; 648 649 // Please see 650 // https://cloud.google.com/dataplex/docs/reference/rest/v1/DataQualityRule#rowconditionexpectation. 651 ROW_CONDITION_EXPECTATION = 4; 652 653 // Please see 654 // https://cloud.google.com/dataplex/docs/reference/rest/v1/DataQualityRule#setexpectation. 655 SET_EXPECTATION = 5; 656 657 // Please see 658 // https://cloud.google.com/dataplex/docs/reference/rest/v1/DataQualityRule#statisticrangeexpectation. 659 STATISTIC_RANGE_EXPECTATION = 6; 660 661 // Please see 662 // https://cloud.google.com/dataplex/docs/reference/rest/v1/DataQualityRule#tableconditionexpectation. 663 TABLE_CONDITION_EXPECTATION = 7; 664 665 // Please see 666 // https://cloud.google.com/dataplex/docs/reference/rest/v1/DataQualityRule#uniquenessexpectation. 667 UNIQUENESS_EXPECTATION = 8; 668 } 669 670 // The evaluation type of the data quality rule. 671 enum EvaluationType { 672 // An unspecified evaluation type. 673 EVALUATION_TYPE_UNSPECIFIED = 0; 674 675 // The rule evaluation is done at per row level. 676 PER_ROW = 1; 677 678 // The rule evaluation is done for an aggregate of rows. 679 AGGREGATE = 2; 680 } 681 682 // Whether the data quality rule passed or failed. 683 enum Result { 684 // An unspecified result. 685 RESULT_UNSPECIFIED = 0; 686 687 // The data quality rule passed. 688 PASSED = 1; 689 690 // The data quality rule failed. 691 FAILED = 2; 692 } 693 694 // Identifier of the specific data scan job this log entry is for. 695 string job_id = 1; 696 697 // The data source of the data scan (e.g. BigQuery table name). 698 string data_source = 2; 699 700 // The column which this rule is evaluated against. 701 string column = 3; 702 703 // The name of the data quality rule. 704 string rule_name = 4; 705 706 // The type of the data quality rule. 707 RuleType rule_type = 5; 708 709 // The evaluation type of the data quality rule. 710 EvaluationType evalution_type = 6; 711 712 // The dimension of the data quality rule. 713 string rule_dimension = 7; 714 715 // The passing threshold ([0.0, 100.0]) of the data quality rule. 716 double threshold_percent = 8; 717 718 // The result of the data quality rule. 719 Result result = 9; 720 721 // The number of rows evaluated against the data quality rule. 722 // This field is only valid for rules of PER_ROW evaluation type. 723 int64 evaluated_row_count = 10; 724 725 // The number of rows which passed a rule evaluation. 726 // This field is only valid for rules of PER_ROW evaluation type. 727 int64 passed_row_count = 11; 728 729 // The number of rows with null values in the specified column. 730 int64 null_row_count = 12; 731} 732