1// Copyright 2023 Google LLC 2// 3// Licensed under the Apache License, Version 2.0 (the "License"); 4// you may not use this file except in compliance with the License. 5// You may obtain a copy of the License at 6// 7// http://www.apache.org/licenses/LICENSE-2.0 8// 9// Unless required by applicable law or agreed to in writing, software 10// distributed under the License is distributed on an "AS IS" BASIS, 11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12// See the License for the specific language governing permissions and 13// limitations under the License. 14 15syntax = "proto3"; 16 17package google.cloud.dataplex.v1; 18 19import "google/api/annotations.proto"; 20import "google/api/client.proto"; 21import "google/api/field_behavior.proto"; 22import "google/api/resource.proto"; 23import "google/protobuf/empty.proto"; 24import "google/protobuf/timestamp.proto"; 25 26option go_package = "cloud.google.com/go/dataplex/apiv1/dataplexpb;dataplexpb"; 27option java_multiple_files = true; 28option java_outer_classname = "MetadataProto"; 29option java_package = "com.google.cloud.dataplex.v1"; 30 31// Metadata service manages metadata resources such as tables, filesets and 32// partitions. 33service MetadataService { 34 option (google.api.default_host) = "dataplex.googleapis.com"; 35 option (google.api.oauth_scopes) = 36 "https://www.googleapis.com/auth/cloud-platform"; 37 38 // Create a metadata entity. 39 rpc CreateEntity(CreateEntityRequest) returns (Entity) { 40 option (google.api.http) = { 41 post: "/v1/{parent=projects/*/locations/*/lakes/*/zones/*}/entities" 42 body: "entity" 43 }; 44 option (google.api.method_signature) = "parent,entity"; 45 } 46 47 // Update a metadata entity. Only supports full resource update. 48 rpc UpdateEntity(UpdateEntityRequest) returns (Entity) { 49 option (google.api.http) = { 50 put: "/v1/{entity.name=projects/*/locations/*/lakes/*/zones/*/entities/*}" 51 body: "entity" 52 }; 53 } 54 55 // Delete a metadata entity. 56 rpc DeleteEntity(DeleteEntityRequest) returns (google.protobuf.Empty) { 57 option (google.api.http) = { 58 delete: "/v1/{name=projects/*/locations/*/lakes/*/zones/*/entities/*}" 59 }; 60 option (google.api.method_signature) = "name"; 61 } 62 63 // Get a metadata entity. 64 rpc GetEntity(GetEntityRequest) returns (Entity) { 65 option (google.api.http) = { 66 get: "/v1/{name=projects/*/locations/*/lakes/*/zones/*/entities/*}" 67 }; 68 option (google.api.method_signature) = "name"; 69 } 70 71 // List metadata entities in a zone. 72 rpc ListEntities(ListEntitiesRequest) returns (ListEntitiesResponse) { 73 option (google.api.http) = { 74 get: "/v1/{parent=projects/*/locations/*/lakes/*/zones/*}/entities" 75 }; 76 option (google.api.method_signature) = "parent"; 77 } 78 79 // Create a metadata partition. 80 rpc CreatePartition(CreatePartitionRequest) returns (Partition) { 81 option (google.api.http) = { 82 post: "/v1/{parent=projects/*/locations/*/lakes/*/zones/*/entities/*}/partitions" 83 body: "partition" 84 }; 85 option (google.api.method_signature) = "parent,partition"; 86 } 87 88 // Delete a metadata partition. 89 rpc DeletePartition(DeletePartitionRequest) returns (google.protobuf.Empty) { 90 option (google.api.http) = { 91 delete: "/v1/{name=projects/*/locations/*/lakes/*/zones/*/entities/*/partitions/**}" 92 }; 93 option (google.api.method_signature) = "name"; 94 } 95 96 // Get a metadata partition of an entity. 97 rpc GetPartition(GetPartitionRequest) returns (Partition) { 98 option (google.api.http) = { 99 get: "/v1/{name=projects/*/locations/*/lakes/*/zones/*/entities/*/partitions/**}" 100 }; 101 option (google.api.method_signature) = "name"; 102 } 103 104 // List metadata partitions of an entity. 105 rpc ListPartitions(ListPartitionsRequest) returns (ListPartitionsResponse) { 106 option (google.api.http) = { 107 get: "/v1/{parent=projects/*/locations/*/lakes/*/zones/*/entities/*}/partitions" 108 }; 109 option (google.api.method_signature) = "parent"; 110 } 111} 112 113// Create a metadata entity request. 114message CreateEntityRequest { 115 // Required. The resource name of the parent zone: 116 // `projects/{project_number}/locations/{location_id}/lakes/{lake_id}/zones/{zone_id}`. 117 string parent = 1 [ 118 (google.api.field_behavior) = REQUIRED, 119 (google.api.resource_reference) = { type: "dataplex.googleapis.com/Zone" } 120 ]; 121 122 // Required. Entity resource. 123 Entity entity = 3 [(google.api.field_behavior) = REQUIRED]; 124 125 // Optional. Only validate the request, but do not perform mutations. 126 // The default is false. 127 bool validate_only = 4 [(google.api.field_behavior) = OPTIONAL]; 128} 129 130// Update a metadata entity request. 131// The exiting entity will be fully replaced by the entity in the request. 132// The entity ID is mutable. To modify the ID, use the current entity ID in the 133// request URL and specify the new ID in the request body. 134message UpdateEntityRequest { 135 // Required. Update description. 136 Entity entity = 2 [(google.api.field_behavior) = REQUIRED]; 137 138 // Optional. Only validate the request, but do not perform mutations. 139 // The default is false. 140 bool validate_only = 3 [(google.api.field_behavior) = OPTIONAL]; 141} 142 143// Delete a metadata entity request. 144message DeleteEntityRequest { 145 // Required. The resource name of the entity: 146 // `projects/{project_number}/locations/{location_id}/lakes/{lake_id}/zones/{zone_id}/entities/{entity_id}`. 147 string name = 1 [ 148 (google.api.field_behavior) = REQUIRED, 149 (google.api.resource_reference) = { type: "dataplex.googleapis.com/Entity" } 150 ]; 151 152 // Required. The etag associated with the entity, which can be retrieved with 153 // a [GetEntity][] request. 154 string etag = 2 [(google.api.field_behavior) = REQUIRED]; 155} 156 157// List metadata entities request. 158message ListEntitiesRequest { 159 // Entity views. 160 enum EntityView { 161 // The default unset value. Return both table and fileset entities 162 // if unspecified. 163 ENTITY_VIEW_UNSPECIFIED = 0; 164 165 // Only list table entities. 166 TABLES = 1; 167 168 // Only list fileset entities. 169 FILESETS = 2; 170 } 171 172 // Required. The resource name of the parent zone: 173 // `projects/{project_number}/locations/{location_id}/lakes/{lake_id}/zones/{zone_id}`. 174 string parent = 1 [ 175 (google.api.field_behavior) = REQUIRED, 176 (google.api.resource_reference) = { type: "dataplex.googleapis.com/Zone" } 177 ]; 178 179 // Required. Specify the entity view to make a partial list request. 180 EntityView view = 2 [(google.api.field_behavior) = REQUIRED]; 181 182 // Optional. Maximum number of entities to return. The service may return 183 // fewer than this value. If unspecified, 100 entities will be returned by 184 // default. The maximum value is 500; larger values will will be truncated to 185 // 500. 186 int32 page_size = 3 [(google.api.field_behavior) = OPTIONAL]; 187 188 // Optional. Page token received from a previous `ListEntities` call. Provide 189 // this to retrieve the subsequent page. When paginating, all other parameters 190 // provided to `ListEntities` must match the call that provided the 191 // page token. 192 string page_token = 4 [(google.api.field_behavior) = OPTIONAL]; 193 194 // Optional. The following filter parameters can be added to the URL to limit 195 // the entities returned by the API: 196 // 197 // - Entity ID: ?filter="id=entityID" 198 // - Asset ID: ?filter="asset=assetID" 199 // - Data path ?filter="data_path=gs://my-bucket" 200 // - Is HIVE compatible: ?filter="hive_compatible=true" 201 // - Is BigQuery compatible: ?filter="bigquery_compatible=true" 202 string filter = 5 [(google.api.field_behavior) = OPTIONAL]; 203} 204 205// List metadata entities response. 206message ListEntitiesResponse { 207 // Entities in the specified parent zone. 208 repeated Entity entities = 1; 209 210 // Token to retrieve the next page of results, or empty if there are no 211 // remaining results in the list. 212 string next_page_token = 2; 213} 214 215// Get metadata entity request. 216message GetEntityRequest { 217 // Entity views for get entity partial result. 218 enum EntityView { 219 // The API will default to the `BASIC` view. 220 ENTITY_VIEW_UNSPECIFIED = 0; 221 222 // Minimal view that does not include the schema. 223 BASIC = 1; 224 225 // Include basic information and schema. 226 SCHEMA = 2; 227 228 // Include everything. Currently, this is the same as the SCHEMA view. 229 FULL = 4; 230 } 231 232 // Required. The resource name of the entity: 233 // `projects/{project_number}/locations/{location_id}/lakes/{lake_id}/zones/{zone_id}/entities/{entity_id}.` 234 string name = 1 [ 235 (google.api.field_behavior) = REQUIRED, 236 (google.api.resource_reference) = { type: "dataplex.googleapis.com/Entity" } 237 ]; 238 239 // Optional. Used to select the subset of entity information to return. 240 // Defaults to `BASIC`. 241 EntityView view = 2 [(google.api.field_behavior) = OPTIONAL]; 242} 243 244// List metadata partitions request. 245message ListPartitionsRequest { 246 // Required. The resource name of the parent entity: 247 // `projects/{project_number}/locations/{location_id}/lakes/{lake_id}/zones/{zone_id}/entities/{entity_id}`. 248 string parent = 1 [ 249 (google.api.field_behavior) = REQUIRED, 250 (google.api.resource_reference) = { type: "dataplex.googleapis.com/Entity" } 251 ]; 252 253 // Optional. Maximum number of partitions to return. The service may return 254 // fewer than this value. If unspecified, 100 partitions will be returned by 255 // default. The maximum page size is 500; larger values will will be truncated 256 // to 500. 257 int32 page_size = 2 [(google.api.field_behavior) = OPTIONAL]; 258 259 // Optional. Page token received from a previous `ListPartitions` call. 260 // Provide this to retrieve the subsequent page. When paginating, all other 261 // parameters provided to `ListPartitions` must match the call that provided 262 // the page token. 263 string page_token = 3 [(google.api.field_behavior) = OPTIONAL]; 264 265 // Optional. Filter the partitions returned to the caller using a key value 266 // pair expression. Supported operators and syntax: 267 // 268 // - logic operators: AND, OR 269 // - comparison operators: <, >, >=, <= ,=, != 270 // - LIKE operators: 271 // - The right hand of a LIKE operator supports "." and 272 // "*" for wildcard searches, for example "value1 LIKE ".*oo.*" 273 // - parenthetical grouping: ( ) 274 // 275 // Sample filter expression: `?filter="key1 < value1 OR key2 > value2" 276 // 277 // **Notes:** 278 // 279 // - Keys to the left of operators are case insensitive. 280 // - Partition results are sorted first by creation time, then by 281 // lexicographic order. 282 // - Up to 20 key value filter pairs are allowed, but due to performance 283 // considerations, only the first 10 will be used as a filter. 284 string filter = 4 [(google.api.field_behavior) = OPTIONAL]; 285} 286 287// Create metadata partition request. 288message CreatePartitionRequest { 289 // Required. The resource name of the parent zone: 290 // `projects/{project_number}/locations/{location_id}/lakes/{lake_id}/zones/{zone_id}/entities/{entity_id}`. 291 string parent = 1 [ 292 (google.api.field_behavior) = REQUIRED, 293 (google.api.resource_reference) = { type: "dataplex.googleapis.com/Entity" } 294 ]; 295 296 // Required. Partition resource. 297 Partition partition = 3 [(google.api.field_behavior) = REQUIRED]; 298 299 // Optional. Only validate the request, but do not perform mutations. 300 // The default is false. 301 bool validate_only = 4 [(google.api.field_behavior) = OPTIONAL]; 302} 303 304// Delete metadata partition request. 305message DeletePartitionRequest { 306 // Required. The resource name of the partition. 307 // format: 308 // `projects/{project_number}/locations/{location_id}/lakes/{lake_id}/zones/{zone_id}/entities/{entity_id}/partitions/{partition_value_path}`. 309 // The {partition_value_path} segment consists of an ordered sequence of 310 // partition values separated by "/". All values must be provided. 311 string name = 1 [ 312 (google.api.field_behavior) = REQUIRED, 313 (google.api.resource_reference) = { 314 type: "dataplex.googleapis.com/Partition" 315 } 316 ]; 317 318 // Optional. The etag associated with the partition. 319 string etag = 2 [deprecated = true, (google.api.field_behavior) = OPTIONAL]; 320} 321 322// List metadata partitions response. 323message ListPartitionsResponse { 324 // Partitions under the specified parent entity. 325 repeated Partition partitions = 1; 326 327 // Token to retrieve the next page of results, or empty if there are no 328 // remaining results in the list. 329 string next_page_token = 2; 330} 331 332// Get metadata partition request. 333message GetPartitionRequest { 334 // Required. The resource name of the partition: 335 // `projects/{project_number}/locations/{location_id}/lakes/{lake_id}/zones/{zone_id}/entities/{entity_id}/partitions/{partition_value_path}`. 336 // The {partition_value_path} segment consists of an ordered sequence of 337 // partition values separated by "/". All values must be provided. 338 string name = 1 [ 339 (google.api.field_behavior) = REQUIRED, 340 (google.api.resource_reference) = { 341 type: "dataplex.googleapis.com/Partition" 342 } 343 ]; 344} 345 346// Represents tables and fileset metadata contained within a zone. 347message Entity { 348 option (google.api.resource) = { 349 type: "dataplex.googleapis.com/Entity" 350 pattern: "projects/{project}/locations/{location}/lakes/{lake}/zones/{zone}/entities/{entity}" 351 }; 352 353 // The type of entity. 354 enum Type { 355 // Type unspecified. 356 TYPE_UNSPECIFIED = 0; 357 358 // Structured and semi-structured data. 359 TABLE = 1; 360 361 // Unstructured data. 362 FILESET = 2; 363 } 364 365 // Provides compatibility information for various metadata stores. 366 message CompatibilityStatus { 367 // Provides compatibility information for a specific metadata store. 368 message Compatibility { 369 // Output only. Whether the entity is compatible and can be represented in 370 // the metadata store. 371 bool compatible = 1 [(google.api.field_behavior) = OUTPUT_ONLY]; 372 373 // Output only. Provides additional detail if the entity is incompatible 374 // with the metadata store. 375 string reason = 2 [(google.api.field_behavior) = OUTPUT_ONLY]; 376 } 377 378 // Output only. Whether this entity is compatible with Hive Metastore. 379 Compatibility hive_metastore = 1 380 [(google.api.field_behavior) = OUTPUT_ONLY]; 381 382 // Output only. Whether this entity is compatible with BigQuery. 383 Compatibility bigquery = 2 [(google.api.field_behavior) = OUTPUT_ONLY]; 384 } 385 386 // Output only. The resource name of the entity, of the form: 387 // `projects/{project_number}/locations/{location_id}/lakes/{lake_id}/zones/{zone_id}/entities/{id}`. 388 string name = 1 [ 389 (google.api.field_behavior) = OUTPUT_ONLY, 390 (google.api.resource_reference) = { type: "dataplex.googleapis.com/Entity" } 391 ]; 392 393 // Optional. Display name must be shorter than or equal to 256 characters. 394 string display_name = 2 [(google.api.field_behavior) = OPTIONAL]; 395 396 // Optional. User friendly longer description text. Must be shorter than or 397 // equal to 1024 characters. 398 string description = 3 [(google.api.field_behavior) = OPTIONAL]; 399 400 // Output only. The time when the entity was created. 401 google.protobuf.Timestamp create_time = 5 402 [(google.api.field_behavior) = OUTPUT_ONLY]; 403 404 // Output only. The time when the entity was last updated. 405 google.protobuf.Timestamp update_time = 6 406 [(google.api.field_behavior) = OUTPUT_ONLY]; 407 408 // Required. A user-provided entity ID. It is mutable, and will be used as the 409 // published table name. Specifying a new ID in an update entity 410 // request will override the existing value. 411 // The ID must contain only letters (a-z, A-Z), numbers (0-9), and 412 // underscores, and consist of 256 or fewer characters. 413 string id = 7 [(google.api.field_behavior) = REQUIRED]; 414 415 // Optional. The etag associated with the entity, which can be retrieved with 416 // a [GetEntity][] request. Required for update and delete requests. 417 string etag = 8 [(google.api.field_behavior) = OPTIONAL]; 418 419 // Required. Immutable. The type of entity. 420 Type type = 10 [ 421 (google.api.field_behavior) = REQUIRED, 422 (google.api.field_behavior) = IMMUTABLE 423 ]; 424 425 // Required. Immutable. The ID of the asset associated with the storage 426 // location containing the entity data. The entity must be with in the same 427 // zone with the asset. 428 string asset = 11 [ 429 (google.api.field_behavior) = REQUIRED, 430 (google.api.field_behavior) = IMMUTABLE 431 ]; 432 433 // Required. Immutable. The storage path of the entity data. 434 // For Cloud Storage data, this is the fully-qualified path to the entity, 435 // such as `gs://bucket/path/to/data`. For BigQuery data, this is the name of 436 // the table resource, such as 437 // `projects/project_id/datasets/dataset_id/tables/table_id`. 438 string data_path = 12 [ 439 (google.api.field_behavior) = REQUIRED, 440 (google.api.field_behavior) = IMMUTABLE 441 ]; 442 443 // Optional. The set of items within the data path constituting the data in 444 // the entity, represented as a glob path. Example: 445 // `gs://bucket/path/to/data/**/*.csv`. 446 string data_path_pattern = 13 [(google.api.field_behavior) = OPTIONAL]; 447 448 // Output only. The name of the associated Data Catalog entry. 449 string catalog_entry = 14 [(google.api.field_behavior) = OUTPUT_ONLY]; 450 451 // Required. Immutable. Identifies the storage system of the entity data. 452 StorageSystem system = 15 [ 453 (google.api.field_behavior) = REQUIRED, 454 (google.api.field_behavior) = IMMUTABLE 455 ]; 456 457 // Required. Identifies the storage format of the entity data. 458 // It does not apply to entities with data stored in BigQuery. 459 StorageFormat format = 16 [(google.api.field_behavior) = REQUIRED]; 460 461 // Output only. Metadata stores that the entity is compatible with. 462 CompatibilityStatus compatibility = 19 463 [(google.api.field_behavior) = OUTPUT_ONLY]; 464 465 // Output only. Identifies the access mechanism to the entity. Not user 466 // settable. 467 StorageAccess access = 21 [(google.api.field_behavior) = OUTPUT_ONLY]; 468 469 // Output only. System generated unique ID for the Entity. This ID will be 470 // different if the Entity is deleted and re-created with the same name. 471 string uid = 22 [(google.api.field_behavior) = OUTPUT_ONLY]; 472 473 // Required. The description of the data structure and layout. 474 // The schema is not included in list responses. It is only included in 475 // `SCHEMA` and `FULL` entity views of a `GetEntity` response. 476 Schema schema = 50 [(google.api.field_behavior) = REQUIRED]; 477} 478 479// Represents partition metadata contained within entity instances. 480message Partition { 481 option (google.api.resource) = { 482 type: "dataplex.googleapis.com/Partition" 483 pattern: "projects/{project}/locations/{location}/lakes/{lake}/zones/{zone}/entities/{entity}/partitions/{partition}" 484 }; 485 486 // Output only. Partition values used in the HTTP URL must be 487 // double encoded. For example, `url_encode(url_encode(value))` can be used 488 // to encode "US:CA/CA#Sunnyvale so that the request URL ends 489 // with "/partitions/US%253ACA/CA%2523Sunnyvale". 490 // The name field in the response retains the encoded format. 491 string name = 1 [ 492 (google.api.field_behavior) = OUTPUT_ONLY, 493 (google.api.resource_reference) = { 494 type: "dataplex.googleapis.com/Partition" 495 } 496 ]; 497 498 // Required. Immutable. The set of values representing the partition, which 499 // correspond to the partition schema defined in the parent entity. 500 repeated string values = 2 [ 501 (google.api.field_behavior) = REQUIRED, 502 (google.api.field_behavior) = IMMUTABLE 503 ]; 504 505 // Required. Immutable. The location of the entity data within the partition, 506 // for example, `gs://bucket/path/to/entity/key1=value1/key2=value2`. Or 507 // `projects/<project_id>/datasets/<dataset_id>/tables/<table_id>` 508 string location = 3 [ 509 (google.api.field_behavior) = REQUIRED, 510 (google.api.field_behavior) = IMMUTABLE 511 ]; 512 513 // Optional. The etag for this partition. 514 string etag = 4 [deprecated = true, (google.api.field_behavior) = OPTIONAL]; 515} 516 517// Schema information describing the structure and layout of the data. 518message Schema { 519 // Type information for fields in schemas and partition schemas. 520 enum Type { 521 // SchemaType unspecified. 522 TYPE_UNSPECIFIED = 0; 523 524 // Boolean field. 525 BOOLEAN = 1; 526 527 // Single byte numeric field. 528 BYTE = 2; 529 530 // 16-bit numeric field. 531 INT16 = 3; 532 533 // 32-bit numeric field. 534 INT32 = 4; 535 536 // 64-bit numeric field. 537 INT64 = 5; 538 539 // Floating point numeric field. 540 FLOAT = 6; 541 542 // Double precision numeric field. 543 DOUBLE = 7; 544 545 // Real value numeric field. 546 DECIMAL = 8; 547 548 // Sequence of characters field. 549 STRING = 9; 550 551 // Sequence of bytes field. 552 BINARY = 10; 553 554 // Date and time field. 555 TIMESTAMP = 11; 556 557 // Date field. 558 DATE = 12; 559 560 // Time field. 561 TIME = 13; 562 563 // Structured field. Nested fields that define the structure of the map. 564 // If all nested fields are nullable, this field represents a union. 565 RECORD = 14; 566 567 // Null field that does not have values. 568 NULL = 100; 569 } 570 571 // Additional qualifiers to define field semantics. 572 enum Mode { 573 // Mode unspecified. 574 MODE_UNSPECIFIED = 0; 575 576 // The field has required semantics. 577 REQUIRED = 1; 578 579 // The field has optional semantics, and may be null. 580 NULLABLE = 2; 581 582 // The field has repeated (0 or more) semantics, and is a list of values. 583 REPEATED = 3; 584 } 585 586 // Represents a column field within a table schema. 587 message SchemaField { 588 // Required. The name of the field. Must contain only letters, numbers and 589 // underscores, with a maximum length of 767 characters, 590 // and must begin with a letter or underscore. 591 string name = 1 [(google.api.field_behavior) = REQUIRED]; 592 593 // Optional. User friendly field description. Must be less than or equal to 594 // 1024 characters. 595 string description = 2 [(google.api.field_behavior) = OPTIONAL]; 596 597 // Required. The type of field. 598 Type type = 3 [(google.api.field_behavior) = REQUIRED]; 599 600 // Required. Additional field semantics. 601 Mode mode = 4 [(google.api.field_behavior) = REQUIRED]; 602 603 // Optional. Any nested field for complex types. 604 repeated SchemaField fields = 10 [(google.api.field_behavior) = OPTIONAL]; 605 } 606 607 // Represents a key field within the entity's partition structure. You could 608 // have up to 20 partition fields, but only the first 10 partitions have the 609 // filtering ability due to performance consideration. **Note:** 610 // Partition fields are immutable. 611 message PartitionField { 612 // Required. Partition field name must consist of letters, numbers, and 613 // underscores only, with a maximum of length of 256 characters, and must 614 // begin with a letter or underscore.. 615 string name = 1 [(google.api.field_behavior) = REQUIRED]; 616 617 // Required. Immutable. The type of field. 618 Type type = 2 [ 619 (google.api.field_behavior) = REQUIRED, 620 (google.api.field_behavior) = IMMUTABLE 621 ]; 622 } 623 624 // The structure of paths within the entity, which represent partitions. 625 enum PartitionStyle { 626 // PartitionStyle unspecified 627 PARTITION_STYLE_UNSPECIFIED = 0; 628 629 // Partitions are hive-compatible. 630 // Examples: `gs://bucket/path/to/table/dt=2019-10-31/lang=en`, 631 // `gs://bucket/path/to/table/dt=2019-10-31/lang=en/late`. 632 HIVE_COMPATIBLE = 1; 633 } 634 635 // Required. Set to `true` if user-managed or `false` if managed by Dataplex. 636 // The default is `false` (managed by Dataplex). 637 // 638 // - Set to `false`to enable Dataplex discovery to update the schema. 639 // including new data discovery, schema inference, and schema evolution. 640 // Users retain the ability to input and edit the schema. Dataplex 641 // treats schema input by the user as though produced 642 // by a previous Dataplex discovery operation, and it will 643 // evolve the schema and take action based on that treatment. 644 // 645 // - Set to `true` to fully manage the entity 646 // schema. This setting guarantees that Dataplex will not 647 // change schema fields. 648 bool user_managed = 1 [(google.api.field_behavior) = REQUIRED]; 649 650 // Optional. The sequence of fields describing data in table entities. 651 // **Note:** BigQuery SchemaFields are immutable. 652 repeated SchemaField fields = 2 [(google.api.field_behavior) = OPTIONAL]; 653 654 // Optional. The sequence of fields describing the partition structure in 655 // entities. If this field is empty, there are no partitions within the data. 656 repeated PartitionField partition_fields = 3 657 [(google.api.field_behavior) = OPTIONAL]; 658 659 // Optional. The structure of paths containing partition data within the 660 // entity. 661 PartitionStyle partition_style = 4 [(google.api.field_behavior) = OPTIONAL]; 662} 663 664// Describes the format of the data within its storage location. 665message StorageFormat { 666 // Describes CSV and similar semi-structured data formats. 667 message CsvOptions { 668 // Optional. The character encoding of the data. Accepts "US-ASCII", 669 // "UTF-8", and "ISO-8859-1". Defaults to UTF-8 if unspecified. 670 string encoding = 1 [(google.api.field_behavior) = OPTIONAL]; 671 672 // Optional. The number of rows to interpret as header rows that should be 673 // skipped when reading data rows. Defaults to 0. 674 int32 header_rows = 2 [(google.api.field_behavior) = OPTIONAL]; 675 676 // Optional. The delimiter used to separate values. Defaults to ','. 677 string delimiter = 3 [(google.api.field_behavior) = OPTIONAL]; 678 679 // Optional. The character used to quote column values. Accepts '"' 680 // (double quotation mark) or ''' (single quotation mark). Defaults to 681 // '"' (double quotation mark) if unspecified. 682 string quote = 4 [(google.api.field_behavior) = OPTIONAL]; 683 } 684 685 // Describes JSON data format. 686 message JsonOptions { 687 // Optional. The character encoding of the data. Accepts "US-ASCII", "UTF-8" 688 // and "ISO-8859-1". Defaults to UTF-8 if not specified. 689 string encoding = 1 [(google.api.field_behavior) = OPTIONAL]; 690 } 691 692 // Describes Iceberg data format. 693 message IcebergOptions { 694 // Optional. The location of where the iceberg metadata is present, must be 695 // within the table path 696 string metadata_location = 1 [(google.api.field_behavior) = OPTIONAL]; 697 } 698 699 // The specific file format of the data. 700 enum Format { 701 // Format unspecified. 702 FORMAT_UNSPECIFIED = 0; 703 704 // Parquet-formatted structured data. 705 PARQUET = 1; 706 707 // Avro-formatted structured data. 708 AVRO = 2; 709 710 // Orc-formatted structured data. 711 ORC = 3; 712 713 // Csv-formatted semi-structured data. 714 CSV = 100; 715 716 // Json-formatted semi-structured data. 717 JSON = 101; 718 719 // Image data formats (such as jpg and png). 720 IMAGE = 200; 721 722 // Audio data formats (such as mp3, and wav). 723 AUDIO = 201; 724 725 // Video data formats (such as mp4 and mpg). 726 VIDEO = 202; 727 728 // Textual data formats (such as txt and xml). 729 TEXT = 203; 730 731 // TensorFlow record format. 732 TFRECORD = 204; 733 734 // Data that doesn't match a specific format. 735 OTHER = 1000; 736 737 // Data of an unknown format. 738 UNKNOWN = 1001; 739 } 740 741 // The specific compressed file format of the data. 742 enum CompressionFormat { 743 // CompressionFormat unspecified. Implies uncompressed data. 744 COMPRESSION_FORMAT_UNSPECIFIED = 0; 745 746 // GZip compressed set of files. 747 GZIP = 2; 748 749 // BZip2 compressed set of files. 750 BZIP2 = 3; 751 } 752 753 // Output only. The data format associated with the stored data, which 754 // represents content type values. The value is inferred from mime type. 755 Format format = 1 [(google.api.field_behavior) = OUTPUT_ONLY]; 756 757 // Optional. The compression type associated with the stored data. 758 // If unspecified, the data is uncompressed. 759 CompressionFormat compression_format = 2 760 [(google.api.field_behavior) = OPTIONAL]; 761 762 // Required. The mime type descriptor for the data. Must match the pattern 763 // {type}/{subtype}. Supported values: 764 // 765 // - application/x-parquet 766 // - application/x-avro 767 // - application/x-orc 768 // - application/x-tfrecord 769 // - application/x-parquet+iceberg 770 // - application/x-avro+iceberg 771 // - application/x-orc+iceberg 772 // - application/json 773 // - application/{subtypes} 774 // - text/csv 775 // - text/<subtypes> 776 // - image/{image subtype} 777 // - video/{video subtype} 778 // - audio/{audio subtype} 779 string mime_type = 3 [(google.api.field_behavior) = REQUIRED]; 780 781 // Additional format-specific options. 782 oneof options { 783 // Optional. Additional information about CSV formatted data. 784 CsvOptions csv = 10 [(google.api.field_behavior) = OPTIONAL]; 785 786 // Optional. Additional information about CSV formatted data. 787 JsonOptions json = 11 [(google.api.field_behavior) = OPTIONAL]; 788 789 // Optional. Additional information about iceberg tables. 790 IcebergOptions iceberg = 12 [(google.api.field_behavior) = OPTIONAL]; 791 } 792} 793 794// Describes the access mechanism of the data within its storage location. 795message StorageAccess { 796 // Access Mode determines how data stored within the Entity is read. 797 enum AccessMode { 798 // Access mode unspecified. 799 ACCESS_MODE_UNSPECIFIED = 0; 800 801 // Default. Data is accessed directly using storage APIs. 802 DIRECT = 1; 803 804 // Data is accessed through a managed interface using BigQuery APIs. 805 MANAGED = 2; 806 } 807 808 // Output only. Describes the read access mechanism of the data. Not user 809 // settable. 810 AccessMode read = 21 [(google.api.field_behavior) = OUTPUT_ONLY]; 811} 812 813// Identifies the cloud system that manages the data storage. 814enum StorageSystem { 815 // Storage system unspecified. 816 STORAGE_SYSTEM_UNSPECIFIED = 0; 817 818 // The entity data is contained within a Cloud Storage bucket. 819 CLOUD_STORAGE = 1; 820 821 // The entity data is contained within a BigQuery dataset. 822 BIGQUERY = 2; 823} 824