xref: /aosp_15_r20/external/googleapis/google/cloud/dataplex/v1/metadata.proto (revision d5c09012810ac0c9f33fe448fb6da8260d444cc9)
1// Copyright 2023 Google LLC
2//
3// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at
6//
7//     http://www.apache.org/licenses/LICENSE-2.0
8//
9// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS,
11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12// See the License for the specific language governing permissions and
13// limitations under the License.
14
15syntax = "proto3";
16
17package google.cloud.dataplex.v1;
18
19import "google/api/annotations.proto";
20import "google/api/client.proto";
21import "google/api/field_behavior.proto";
22import "google/api/resource.proto";
23import "google/protobuf/empty.proto";
24import "google/protobuf/timestamp.proto";
25
26option go_package = "cloud.google.com/go/dataplex/apiv1/dataplexpb;dataplexpb";
27option java_multiple_files = true;
28option java_outer_classname = "MetadataProto";
29option java_package = "com.google.cloud.dataplex.v1";
30
31// Metadata service manages metadata resources such as tables, filesets and
32// partitions.
33service MetadataService {
34  option (google.api.default_host) = "dataplex.googleapis.com";
35  option (google.api.oauth_scopes) =
36      "https://www.googleapis.com/auth/cloud-platform";
37
38  // Create a metadata entity.
39  rpc CreateEntity(CreateEntityRequest) returns (Entity) {
40    option (google.api.http) = {
41      post: "/v1/{parent=projects/*/locations/*/lakes/*/zones/*}/entities"
42      body: "entity"
43    };
44    option (google.api.method_signature) = "parent,entity";
45  }
46
47  // Update a metadata entity. Only supports full resource update.
48  rpc UpdateEntity(UpdateEntityRequest) returns (Entity) {
49    option (google.api.http) = {
50      put: "/v1/{entity.name=projects/*/locations/*/lakes/*/zones/*/entities/*}"
51      body: "entity"
52    };
53  }
54
55  // Delete a metadata entity.
56  rpc DeleteEntity(DeleteEntityRequest) returns (google.protobuf.Empty) {
57    option (google.api.http) = {
58      delete: "/v1/{name=projects/*/locations/*/lakes/*/zones/*/entities/*}"
59    };
60    option (google.api.method_signature) = "name";
61  }
62
63  // Get a metadata entity.
64  rpc GetEntity(GetEntityRequest) returns (Entity) {
65    option (google.api.http) = {
66      get: "/v1/{name=projects/*/locations/*/lakes/*/zones/*/entities/*}"
67    };
68    option (google.api.method_signature) = "name";
69  }
70
71  // List metadata entities in a zone.
72  rpc ListEntities(ListEntitiesRequest) returns (ListEntitiesResponse) {
73    option (google.api.http) = {
74      get: "/v1/{parent=projects/*/locations/*/lakes/*/zones/*}/entities"
75    };
76    option (google.api.method_signature) = "parent";
77  }
78
79  // Create a metadata partition.
80  rpc CreatePartition(CreatePartitionRequest) returns (Partition) {
81    option (google.api.http) = {
82      post: "/v1/{parent=projects/*/locations/*/lakes/*/zones/*/entities/*}/partitions"
83      body: "partition"
84    };
85    option (google.api.method_signature) = "parent,partition";
86  }
87
88  // Delete a metadata partition.
89  rpc DeletePartition(DeletePartitionRequest) returns (google.protobuf.Empty) {
90    option (google.api.http) = {
91      delete: "/v1/{name=projects/*/locations/*/lakes/*/zones/*/entities/*/partitions/**}"
92    };
93    option (google.api.method_signature) = "name";
94  }
95
96  // Get a metadata partition of an entity.
97  rpc GetPartition(GetPartitionRequest) returns (Partition) {
98    option (google.api.http) = {
99      get: "/v1/{name=projects/*/locations/*/lakes/*/zones/*/entities/*/partitions/**}"
100    };
101    option (google.api.method_signature) = "name";
102  }
103
104  // List metadata partitions of an entity.
105  rpc ListPartitions(ListPartitionsRequest) returns (ListPartitionsResponse) {
106    option (google.api.http) = {
107      get: "/v1/{parent=projects/*/locations/*/lakes/*/zones/*/entities/*}/partitions"
108    };
109    option (google.api.method_signature) = "parent";
110  }
111}
112
113// Create a metadata entity request.
114message CreateEntityRequest {
115  // Required. The resource name of the parent zone:
116  // `projects/{project_number}/locations/{location_id}/lakes/{lake_id}/zones/{zone_id}`.
117  string parent = 1 [
118    (google.api.field_behavior) = REQUIRED,
119    (google.api.resource_reference) = { type: "dataplex.googleapis.com/Zone" }
120  ];
121
122  // Required. Entity resource.
123  Entity entity = 3 [(google.api.field_behavior) = REQUIRED];
124
125  // Optional. Only validate the request, but do not perform mutations.
126  // The default is false.
127  bool validate_only = 4 [(google.api.field_behavior) = OPTIONAL];
128}
129
130// Update a metadata entity request.
131// The exiting entity will be fully replaced by the entity in the request.
132// The entity ID is mutable. To modify the ID, use the current entity ID in the
133// request URL and specify the new ID in the request body.
134message UpdateEntityRequest {
135  // Required. Update description.
136  Entity entity = 2 [(google.api.field_behavior) = REQUIRED];
137
138  // Optional. Only validate the request, but do not perform mutations.
139  // The default is false.
140  bool validate_only = 3 [(google.api.field_behavior) = OPTIONAL];
141}
142
143// Delete a metadata entity request.
144message DeleteEntityRequest {
145  // Required. The resource name of the entity:
146  // `projects/{project_number}/locations/{location_id}/lakes/{lake_id}/zones/{zone_id}/entities/{entity_id}`.
147  string name = 1 [
148    (google.api.field_behavior) = REQUIRED,
149    (google.api.resource_reference) = { type: "dataplex.googleapis.com/Entity" }
150  ];
151
152  // Required. The etag associated with the entity, which can be retrieved with
153  // a [GetEntity][] request.
154  string etag = 2 [(google.api.field_behavior) = REQUIRED];
155}
156
157// List metadata entities request.
158message ListEntitiesRequest {
159  // Entity views.
160  enum EntityView {
161    // The default unset value. Return both table and fileset entities
162    // if unspecified.
163    ENTITY_VIEW_UNSPECIFIED = 0;
164
165    // Only list table entities.
166    TABLES = 1;
167
168    // Only list fileset entities.
169    FILESETS = 2;
170  }
171
172  // Required. The resource name of the parent zone:
173  // `projects/{project_number}/locations/{location_id}/lakes/{lake_id}/zones/{zone_id}`.
174  string parent = 1 [
175    (google.api.field_behavior) = REQUIRED,
176    (google.api.resource_reference) = { type: "dataplex.googleapis.com/Zone" }
177  ];
178
179  // Required. Specify the entity view to make a partial list request.
180  EntityView view = 2 [(google.api.field_behavior) = REQUIRED];
181
182  // Optional. Maximum number of entities to return. The service may return
183  // fewer than this value. If unspecified, 100 entities will be returned by
184  // default. The maximum value is 500; larger values will will be truncated to
185  // 500.
186  int32 page_size = 3 [(google.api.field_behavior) = OPTIONAL];
187
188  // Optional. Page token received from a previous `ListEntities` call. Provide
189  // this to retrieve the subsequent page. When paginating, all other parameters
190  // provided to `ListEntities` must match the call that provided the
191  // page token.
192  string page_token = 4 [(google.api.field_behavior) = OPTIONAL];
193
194  // Optional. The following filter parameters can be added to the URL to limit
195  // the entities returned by the API:
196  //
197  // - Entity ID: ?filter="id=entityID"
198  // - Asset ID: ?filter="asset=assetID"
199  // - Data path ?filter="data_path=gs://my-bucket"
200  // - Is HIVE compatible: ?filter="hive_compatible=true"
201  // - Is BigQuery compatible: ?filter="bigquery_compatible=true"
202  string filter = 5 [(google.api.field_behavior) = OPTIONAL];
203}
204
205// List metadata entities response.
206message ListEntitiesResponse {
207  // Entities in the specified parent zone.
208  repeated Entity entities = 1;
209
210  // Token to retrieve the next page of results, or empty if there are no
211  // remaining results in the list.
212  string next_page_token = 2;
213}
214
215// Get metadata entity request.
216message GetEntityRequest {
217  // Entity views for get entity partial result.
218  enum EntityView {
219    // The API will default to the `BASIC` view.
220    ENTITY_VIEW_UNSPECIFIED = 0;
221
222    // Minimal view that does not include the schema.
223    BASIC = 1;
224
225    // Include basic information and schema.
226    SCHEMA = 2;
227
228    // Include everything. Currently, this is the same as the SCHEMA view.
229    FULL = 4;
230  }
231
232  // Required. The resource name of the entity:
233  // `projects/{project_number}/locations/{location_id}/lakes/{lake_id}/zones/{zone_id}/entities/{entity_id}.`
234  string name = 1 [
235    (google.api.field_behavior) = REQUIRED,
236    (google.api.resource_reference) = { type: "dataplex.googleapis.com/Entity" }
237  ];
238
239  // Optional. Used to select the subset of entity information to return.
240  // Defaults to `BASIC`.
241  EntityView view = 2 [(google.api.field_behavior) = OPTIONAL];
242}
243
244// List metadata partitions request.
245message ListPartitionsRequest {
246  // Required. The resource name of the parent entity:
247  // `projects/{project_number}/locations/{location_id}/lakes/{lake_id}/zones/{zone_id}/entities/{entity_id}`.
248  string parent = 1 [
249    (google.api.field_behavior) = REQUIRED,
250    (google.api.resource_reference) = { type: "dataplex.googleapis.com/Entity" }
251  ];
252
253  // Optional. Maximum number of partitions to return. The service may return
254  // fewer than this value. If unspecified, 100 partitions will be returned by
255  // default. The maximum page size is 500; larger values will will be truncated
256  // to 500.
257  int32 page_size = 2 [(google.api.field_behavior) = OPTIONAL];
258
259  // Optional. Page token received from a previous `ListPartitions` call.
260  // Provide this to retrieve the subsequent page. When paginating, all other
261  // parameters provided to `ListPartitions` must match the call that provided
262  // the page token.
263  string page_token = 3 [(google.api.field_behavior) = OPTIONAL];
264
265  // Optional. Filter the partitions returned to the caller using a key value
266  // pair expression. Supported operators and syntax:
267  //
268  // - logic operators: AND, OR
269  // - comparison operators: <, >, >=, <= ,=, !=
270  // - LIKE operators:
271  //   - The right hand of a LIKE operator supports "." and
272  //     "*" for wildcard searches, for example "value1 LIKE ".*oo.*"
273  // - parenthetical grouping: ( )
274  //
275  // Sample filter expression: `?filter="key1 < value1 OR key2 > value2"
276  //
277  // **Notes:**
278  //
279  // - Keys to the left of operators are case insensitive.
280  // - Partition results are sorted first by creation time, then by
281  //   lexicographic order.
282  // - Up to 20 key value filter pairs are allowed, but due to performance
283  //   considerations, only the first 10 will be used as a filter.
284  string filter = 4 [(google.api.field_behavior) = OPTIONAL];
285}
286
287// Create metadata partition request.
288message CreatePartitionRequest {
289  // Required. The resource name of the parent zone:
290  // `projects/{project_number}/locations/{location_id}/lakes/{lake_id}/zones/{zone_id}/entities/{entity_id}`.
291  string parent = 1 [
292    (google.api.field_behavior) = REQUIRED,
293    (google.api.resource_reference) = { type: "dataplex.googleapis.com/Entity" }
294  ];
295
296  // Required. Partition resource.
297  Partition partition = 3 [(google.api.field_behavior) = REQUIRED];
298
299  // Optional. Only validate the request, but do not perform mutations.
300  // The default is false.
301  bool validate_only = 4 [(google.api.field_behavior) = OPTIONAL];
302}
303
304// Delete metadata partition request.
305message DeletePartitionRequest {
306  // Required. The resource name of the partition.
307  // format:
308  // `projects/{project_number}/locations/{location_id}/lakes/{lake_id}/zones/{zone_id}/entities/{entity_id}/partitions/{partition_value_path}`.
309  // The {partition_value_path} segment consists of an ordered sequence of
310  // partition values separated by "/". All values must be provided.
311  string name = 1 [
312    (google.api.field_behavior) = REQUIRED,
313    (google.api.resource_reference) = {
314      type: "dataplex.googleapis.com/Partition"
315    }
316  ];
317
318  // Optional. The etag associated with the partition.
319  string etag = 2 [deprecated = true, (google.api.field_behavior) = OPTIONAL];
320}
321
322// List metadata partitions response.
323message ListPartitionsResponse {
324  // Partitions under the specified parent entity.
325  repeated Partition partitions = 1;
326
327  // Token to retrieve the next page of results, or empty if there are no
328  // remaining results in the list.
329  string next_page_token = 2;
330}
331
332// Get metadata partition request.
333message GetPartitionRequest {
334  // Required. The resource name of the partition:
335  // `projects/{project_number}/locations/{location_id}/lakes/{lake_id}/zones/{zone_id}/entities/{entity_id}/partitions/{partition_value_path}`.
336  // The {partition_value_path} segment consists of an ordered sequence of
337  // partition values separated by "/". All values must be provided.
338  string name = 1 [
339    (google.api.field_behavior) = REQUIRED,
340    (google.api.resource_reference) = {
341      type: "dataplex.googleapis.com/Partition"
342    }
343  ];
344}
345
346// Represents tables and fileset metadata contained within a zone.
347message Entity {
348  option (google.api.resource) = {
349    type: "dataplex.googleapis.com/Entity"
350    pattern: "projects/{project}/locations/{location}/lakes/{lake}/zones/{zone}/entities/{entity}"
351  };
352
353  // The type of entity.
354  enum Type {
355    // Type unspecified.
356    TYPE_UNSPECIFIED = 0;
357
358    // Structured and semi-structured data.
359    TABLE = 1;
360
361    // Unstructured data.
362    FILESET = 2;
363  }
364
365  // Provides compatibility information for various metadata stores.
366  message CompatibilityStatus {
367    // Provides compatibility information for a specific metadata store.
368    message Compatibility {
369      // Output only. Whether the entity is compatible and can be represented in
370      // the metadata store.
371      bool compatible = 1 [(google.api.field_behavior) = OUTPUT_ONLY];
372
373      // Output only. Provides additional detail if the entity is incompatible
374      // with the metadata store.
375      string reason = 2 [(google.api.field_behavior) = OUTPUT_ONLY];
376    }
377
378    // Output only. Whether this entity is compatible with Hive Metastore.
379    Compatibility hive_metastore = 1
380        [(google.api.field_behavior) = OUTPUT_ONLY];
381
382    // Output only. Whether this entity is compatible with BigQuery.
383    Compatibility bigquery = 2 [(google.api.field_behavior) = OUTPUT_ONLY];
384  }
385
386  // Output only. The resource name of the entity, of the form:
387  // `projects/{project_number}/locations/{location_id}/lakes/{lake_id}/zones/{zone_id}/entities/{id}`.
388  string name = 1 [
389    (google.api.field_behavior) = OUTPUT_ONLY,
390    (google.api.resource_reference) = { type: "dataplex.googleapis.com/Entity" }
391  ];
392
393  // Optional. Display name must be shorter than or equal to 256 characters.
394  string display_name = 2 [(google.api.field_behavior) = OPTIONAL];
395
396  // Optional. User friendly longer description text. Must be shorter than or
397  // equal to 1024 characters.
398  string description = 3 [(google.api.field_behavior) = OPTIONAL];
399
400  // Output only. The time when the entity was created.
401  google.protobuf.Timestamp create_time = 5
402      [(google.api.field_behavior) = OUTPUT_ONLY];
403
404  // Output only. The time when the entity was last updated.
405  google.protobuf.Timestamp update_time = 6
406      [(google.api.field_behavior) = OUTPUT_ONLY];
407
408  // Required. A user-provided entity ID. It is mutable, and will be used as the
409  // published table name. Specifying a new ID in an update entity
410  // request will override the existing value.
411  // The ID must contain only letters (a-z, A-Z), numbers (0-9), and
412  // underscores, and consist of 256 or fewer characters.
413  string id = 7 [(google.api.field_behavior) = REQUIRED];
414
415  // Optional. The etag associated with the entity, which can be retrieved with
416  // a [GetEntity][] request. Required for update and delete requests.
417  string etag = 8 [(google.api.field_behavior) = OPTIONAL];
418
419  // Required. Immutable. The type of entity.
420  Type type = 10 [
421    (google.api.field_behavior) = REQUIRED,
422    (google.api.field_behavior) = IMMUTABLE
423  ];
424
425  // Required. Immutable. The ID of the asset associated with the storage
426  // location containing the entity data. The entity must be with in the same
427  // zone with the asset.
428  string asset = 11 [
429    (google.api.field_behavior) = REQUIRED,
430    (google.api.field_behavior) = IMMUTABLE
431  ];
432
433  // Required. Immutable. The storage path of the entity data.
434  // For Cloud Storage data, this is the fully-qualified path to the entity,
435  // such as `gs://bucket/path/to/data`. For BigQuery data, this is the name of
436  // the table resource, such as
437  // `projects/project_id/datasets/dataset_id/tables/table_id`.
438  string data_path = 12 [
439    (google.api.field_behavior) = REQUIRED,
440    (google.api.field_behavior) = IMMUTABLE
441  ];
442
443  // Optional. The set of items within the data path constituting the data in
444  // the entity, represented as a glob path. Example:
445  // `gs://bucket/path/to/data/**/*.csv`.
446  string data_path_pattern = 13 [(google.api.field_behavior) = OPTIONAL];
447
448  // Output only. The name of the associated Data Catalog entry.
449  string catalog_entry = 14 [(google.api.field_behavior) = OUTPUT_ONLY];
450
451  // Required. Immutable. Identifies the storage system of the entity data.
452  StorageSystem system = 15 [
453    (google.api.field_behavior) = REQUIRED,
454    (google.api.field_behavior) = IMMUTABLE
455  ];
456
457  // Required. Identifies the storage format of the entity data.
458  // It does not apply to entities with data stored in BigQuery.
459  StorageFormat format = 16 [(google.api.field_behavior) = REQUIRED];
460
461  // Output only. Metadata stores that the entity is compatible with.
462  CompatibilityStatus compatibility = 19
463      [(google.api.field_behavior) = OUTPUT_ONLY];
464
465  // Output only. Identifies the access mechanism to the entity. Not user
466  // settable.
467  StorageAccess access = 21 [(google.api.field_behavior) = OUTPUT_ONLY];
468
469  // Output only. System generated unique ID for the Entity. This ID will be
470  // different if the Entity is deleted and re-created with the same name.
471  string uid = 22 [(google.api.field_behavior) = OUTPUT_ONLY];
472
473  // Required. The description of the data structure and layout.
474  // The schema is not included in list responses. It is only included in
475  // `SCHEMA` and `FULL` entity views of a `GetEntity` response.
476  Schema schema = 50 [(google.api.field_behavior) = REQUIRED];
477}
478
479// Represents partition metadata contained within entity instances.
480message Partition {
481  option (google.api.resource) = {
482    type: "dataplex.googleapis.com/Partition"
483    pattern: "projects/{project}/locations/{location}/lakes/{lake}/zones/{zone}/entities/{entity}/partitions/{partition}"
484  };
485
486  // Output only. Partition values used in the HTTP URL must be
487  // double encoded. For example, `url_encode(url_encode(value))` can be used
488  // to encode "US:CA/CA#Sunnyvale so that the request URL ends
489  // with "/partitions/US%253ACA/CA%2523Sunnyvale".
490  // The name field in the response retains the encoded format.
491  string name = 1 [
492    (google.api.field_behavior) = OUTPUT_ONLY,
493    (google.api.resource_reference) = {
494      type: "dataplex.googleapis.com/Partition"
495    }
496  ];
497
498  // Required. Immutable. The set of values representing the partition, which
499  // correspond to the partition schema defined in the parent entity.
500  repeated string values = 2 [
501    (google.api.field_behavior) = REQUIRED,
502    (google.api.field_behavior) = IMMUTABLE
503  ];
504
505  // Required. Immutable. The location of the entity data within the partition,
506  // for example, `gs://bucket/path/to/entity/key1=value1/key2=value2`. Or
507  // `projects/<project_id>/datasets/<dataset_id>/tables/<table_id>`
508  string location = 3 [
509    (google.api.field_behavior) = REQUIRED,
510    (google.api.field_behavior) = IMMUTABLE
511  ];
512
513  // Optional. The etag for this partition.
514  string etag = 4 [deprecated = true, (google.api.field_behavior) = OPTIONAL];
515}
516
517// Schema information describing the structure and layout of the data.
518message Schema {
519  // Type information for fields in schemas and partition schemas.
520  enum Type {
521    // SchemaType unspecified.
522    TYPE_UNSPECIFIED = 0;
523
524    // Boolean field.
525    BOOLEAN = 1;
526
527    // Single byte numeric field.
528    BYTE = 2;
529
530    // 16-bit numeric field.
531    INT16 = 3;
532
533    // 32-bit numeric field.
534    INT32 = 4;
535
536    // 64-bit numeric field.
537    INT64 = 5;
538
539    // Floating point numeric field.
540    FLOAT = 6;
541
542    // Double precision numeric field.
543    DOUBLE = 7;
544
545    // Real value numeric field.
546    DECIMAL = 8;
547
548    // Sequence of characters field.
549    STRING = 9;
550
551    // Sequence of bytes field.
552    BINARY = 10;
553
554    // Date and time field.
555    TIMESTAMP = 11;
556
557    // Date field.
558    DATE = 12;
559
560    // Time field.
561    TIME = 13;
562
563    // Structured field. Nested fields that define the structure of the map.
564    // If all nested fields are nullable, this field represents a union.
565    RECORD = 14;
566
567    // Null field that does not have values.
568    NULL = 100;
569  }
570
571  // Additional qualifiers to define field semantics.
572  enum Mode {
573    // Mode unspecified.
574    MODE_UNSPECIFIED = 0;
575
576    // The field has required semantics.
577    REQUIRED = 1;
578
579    // The field has optional semantics, and may be null.
580    NULLABLE = 2;
581
582    // The field has repeated (0 or more) semantics, and is a list of values.
583    REPEATED = 3;
584  }
585
586  // Represents a column field within a table schema.
587  message SchemaField {
588    // Required. The name of the field. Must contain only letters, numbers and
589    // underscores, with a maximum length of 767 characters,
590    // and must begin with a letter or underscore.
591    string name = 1 [(google.api.field_behavior) = REQUIRED];
592
593    // Optional. User friendly field description. Must be less than or equal to
594    // 1024 characters.
595    string description = 2 [(google.api.field_behavior) = OPTIONAL];
596
597    // Required. The type of field.
598    Type type = 3 [(google.api.field_behavior) = REQUIRED];
599
600    // Required. Additional field semantics.
601    Mode mode = 4 [(google.api.field_behavior) = REQUIRED];
602
603    // Optional. Any nested field for complex types.
604    repeated SchemaField fields = 10 [(google.api.field_behavior) = OPTIONAL];
605  }
606
607  // Represents a key field within the entity's partition structure. You could
608  // have up to 20 partition fields, but only the first 10 partitions have the
609  // filtering ability due to performance consideration. **Note:**
610  // Partition fields are immutable.
611  message PartitionField {
612    // Required. Partition field name must consist of letters, numbers, and
613    // underscores only, with a maximum of length of 256 characters, and must
614    // begin with a letter or underscore..
615    string name = 1 [(google.api.field_behavior) = REQUIRED];
616
617    // Required. Immutable. The type of field.
618    Type type = 2 [
619      (google.api.field_behavior) = REQUIRED,
620      (google.api.field_behavior) = IMMUTABLE
621    ];
622  }
623
624  // The structure of paths within the entity, which represent partitions.
625  enum PartitionStyle {
626    // PartitionStyle unspecified
627    PARTITION_STYLE_UNSPECIFIED = 0;
628
629    // Partitions are hive-compatible.
630    // Examples: `gs://bucket/path/to/table/dt=2019-10-31/lang=en`,
631    // `gs://bucket/path/to/table/dt=2019-10-31/lang=en/late`.
632    HIVE_COMPATIBLE = 1;
633  }
634
635  // Required. Set to `true` if user-managed or `false` if managed by Dataplex.
636  // The default is `false` (managed by Dataplex).
637  //
638  // - Set to `false`to enable Dataplex discovery to update the schema.
639  //   including new data discovery, schema inference, and schema evolution.
640  //   Users retain the ability to input and edit the schema. Dataplex
641  //   treats schema input by the user as though produced
642  //   by a previous Dataplex discovery operation, and it will
643  //   evolve the schema and take action based on that treatment.
644  //
645  // - Set to `true` to fully manage the entity
646  //   schema. This setting guarantees that Dataplex will not
647  //   change schema fields.
648  bool user_managed = 1 [(google.api.field_behavior) = REQUIRED];
649
650  // Optional. The sequence of fields describing data in table entities.
651  // **Note:** BigQuery SchemaFields are immutable.
652  repeated SchemaField fields = 2 [(google.api.field_behavior) = OPTIONAL];
653
654  // Optional. The sequence of fields describing the partition structure in
655  // entities. If this field is empty, there are no partitions within the data.
656  repeated PartitionField partition_fields = 3
657      [(google.api.field_behavior) = OPTIONAL];
658
659  // Optional. The structure of paths containing partition data within the
660  // entity.
661  PartitionStyle partition_style = 4 [(google.api.field_behavior) = OPTIONAL];
662}
663
664// Describes the format of the data within its storage location.
665message StorageFormat {
666  // Describes CSV and similar semi-structured data formats.
667  message CsvOptions {
668    // Optional. The character encoding of the data. Accepts "US-ASCII",
669    // "UTF-8", and "ISO-8859-1". Defaults to UTF-8 if unspecified.
670    string encoding = 1 [(google.api.field_behavior) = OPTIONAL];
671
672    // Optional. The number of rows to interpret as header rows that should be
673    // skipped when reading data rows. Defaults to 0.
674    int32 header_rows = 2 [(google.api.field_behavior) = OPTIONAL];
675
676    // Optional. The delimiter used to separate values. Defaults to ','.
677    string delimiter = 3 [(google.api.field_behavior) = OPTIONAL];
678
679    // Optional. The character used to quote column values. Accepts '"'
680    // (double quotation mark) or ''' (single quotation mark). Defaults to
681    // '"' (double quotation mark) if unspecified.
682    string quote = 4 [(google.api.field_behavior) = OPTIONAL];
683  }
684
685  // Describes JSON data format.
686  message JsonOptions {
687    // Optional. The character encoding of the data. Accepts "US-ASCII", "UTF-8"
688    // and "ISO-8859-1". Defaults to UTF-8 if not specified.
689    string encoding = 1 [(google.api.field_behavior) = OPTIONAL];
690  }
691
692  // Describes Iceberg data format.
693  message IcebergOptions {
694    // Optional. The location of where the iceberg metadata is present, must be
695    // within the table path
696    string metadata_location = 1 [(google.api.field_behavior) = OPTIONAL];
697  }
698
699  // The specific file format of the data.
700  enum Format {
701    // Format unspecified.
702    FORMAT_UNSPECIFIED = 0;
703
704    // Parquet-formatted structured data.
705    PARQUET = 1;
706
707    // Avro-formatted structured data.
708    AVRO = 2;
709
710    // Orc-formatted structured data.
711    ORC = 3;
712
713    // Csv-formatted semi-structured data.
714    CSV = 100;
715
716    // Json-formatted semi-structured data.
717    JSON = 101;
718
719    // Image data formats (such as jpg and png).
720    IMAGE = 200;
721
722    // Audio data formats (such as mp3, and wav).
723    AUDIO = 201;
724
725    // Video data formats (such as mp4 and mpg).
726    VIDEO = 202;
727
728    // Textual data formats (such as txt and xml).
729    TEXT = 203;
730
731    // TensorFlow record format.
732    TFRECORD = 204;
733
734    // Data that doesn't match a specific format.
735    OTHER = 1000;
736
737    // Data of an unknown format.
738    UNKNOWN = 1001;
739  }
740
741  // The specific compressed file format of the data.
742  enum CompressionFormat {
743    // CompressionFormat unspecified. Implies uncompressed data.
744    COMPRESSION_FORMAT_UNSPECIFIED = 0;
745
746    // GZip compressed set of files.
747    GZIP = 2;
748
749    // BZip2 compressed set of files.
750    BZIP2 = 3;
751  }
752
753  // Output only. The data format associated with the stored data, which
754  // represents content type values. The value is inferred from mime type.
755  Format format = 1 [(google.api.field_behavior) = OUTPUT_ONLY];
756
757  // Optional. The compression type associated with the stored data.
758  // If unspecified, the data is uncompressed.
759  CompressionFormat compression_format = 2
760      [(google.api.field_behavior) = OPTIONAL];
761
762  // Required. The mime type descriptor for the data. Must match the pattern
763  // {type}/{subtype}. Supported values:
764  //
765  // - application/x-parquet
766  // - application/x-avro
767  // - application/x-orc
768  // - application/x-tfrecord
769  // - application/x-parquet+iceberg
770  // - application/x-avro+iceberg
771  // - application/x-orc+iceberg
772  // - application/json
773  // - application/{subtypes}
774  // - text/csv
775  // - text/<subtypes>
776  // - image/{image subtype}
777  // - video/{video subtype}
778  // - audio/{audio subtype}
779  string mime_type = 3 [(google.api.field_behavior) = REQUIRED];
780
781  // Additional format-specific options.
782  oneof options {
783    // Optional. Additional information about CSV formatted data.
784    CsvOptions csv = 10 [(google.api.field_behavior) = OPTIONAL];
785
786    // Optional. Additional information about CSV formatted data.
787    JsonOptions json = 11 [(google.api.field_behavior) = OPTIONAL];
788
789    // Optional. Additional information about iceberg tables.
790    IcebergOptions iceberg = 12 [(google.api.field_behavior) = OPTIONAL];
791  }
792}
793
794// Describes the access mechanism of the data within its storage location.
795message StorageAccess {
796  // Access Mode determines how data stored within the Entity is read.
797  enum AccessMode {
798    // Access mode unspecified.
799    ACCESS_MODE_UNSPECIFIED = 0;
800
801    // Default. Data is accessed directly using storage APIs.
802    DIRECT = 1;
803
804    // Data is accessed through a managed interface using BigQuery APIs.
805    MANAGED = 2;
806  }
807
808  // Output only. Describes the read access mechanism of the data. Not user
809  // settable.
810  AccessMode read = 21 [(google.api.field_behavior) = OUTPUT_ONLY];
811}
812
813// Identifies the cloud system that manages the data storage.
814enum StorageSystem {
815  // Storage system unspecified.
816  STORAGE_SYSTEM_UNSPECIFIED = 0;
817
818  // The entity data is contained within a Cloud Storage bucket.
819  CLOUD_STORAGE = 1;
820
821  // The entity data is contained within a BigQuery dataset.
822  BIGQUERY = 2;
823}
824