xref: /aosp_15_r20/external/googleapis/google/cloud/datalabeling/v1beta1/dataset.proto (revision d5c09012810ac0c9f33fe448fb6da8260d444cc9)
1// Copyright 2019 Google LLC.
2//
3// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at
6//
7//     http://www.apache.org/licenses/LICENSE-2.0
8//
9// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS,
11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12// See the License for the specific language governing permissions and
13// limitations under the License.
14//
15
16syntax = "proto3";
17
18package google.cloud.datalabeling.v1beta1;
19
20import "google/api/resource.proto";
21import "google/cloud/datalabeling/v1beta1/annotation.proto";
22import "google/cloud/datalabeling/v1beta1/annotation_spec_set.proto";
23import "google/cloud/datalabeling/v1beta1/data_payloads.proto";
24import "google/cloud/datalabeling/v1beta1/human_annotation_config.proto";
25import "google/protobuf/timestamp.proto";
26
27option csharp_namespace = "Google.Cloud.DataLabeling.V1Beta1";
28option go_package = "cloud.google.com/go/datalabeling/apiv1beta1/datalabelingpb;datalabelingpb";
29option java_multiple_files = true;
30option java_package = "com.google.cloud.datalabeling.v1beta1";
31option php_namespace = "Google\\Cloud\\DataLabeling\\V1beta1";
32option ruby_package = "Google::Cloud::DataLabeling::V1beta1";
33
34enum DataType {
35  DATA_TYPE_UNSPECIFIED = 0;
36
37  // Allowed for continuous evaluation.
38  IMAGE = 1;
39
40  VIDEO = 2;
41
42  // Allowed for continuous evaluation.
43  TEXT = 4;
44
45  // Allowed for continuous evaluation.
46  GENERAL_DATA = 6;
47}
48
49// Dataset is the resource to hold your data. You can request multiple labeling
50// tasks for a dataset while each one will generate an AnnotatedDataset.
51message Dataset {
52  option (google.api.resource) = {
53    type: "datalabeling.googleapis.com/Dataset"
54    pattern: "projects/{project}/datasets/{dataset}"
55  };
56
57  // Output only. Dataset resource name, format is:
58  // projects/{project_id}/datasets/{dataset_id}
59  string name = 1;
60
61  // Required. The display name of the dataset. Maximum of 64 characters.
62  string display_name = 2;
63
64  // Optional. User-provided description of the annotation specification set.
65  // The description can be up to 10000 characters long.
66  string description = 3;
67
68  // Output only. Time the dataset is created.
69  google.protobuf.Timestamp create_time = 4;
70
71  // Output only. This is populated with the original input configs
72  // where ImportData is called. It is available only after the clients
73  // import data to this dataset.
74  repeated InputConfig input_configs = 5;
75
76  // Output only. The names of any related resources that are blocking changes
77  // to the dataset.
78  repeated string blocking_resources = 6;
79
80  // Output only. The number of data items in the dataset.
81  int64 data_item_count = 7;
82}
83
84// The configuration of input data, including data type, location, etc.
85message InputConfig {
86  // Optional. The metadata associated with each data type.
87  oneof data_type_metadata {
88    // Required for text import, as language code must be specified.
89    TextMetadata text_metadata = 6;
90  }
91
92  // Required. Where the data is from.
93  oneof source {
94    // Source located in Cloud Storage.
95    GcsSource gcs_source = 2;
96
97    // Source located in BigQuery. You must specify this field if you are using
98    // this InputConfig in an [EvaluationJob][google.cloud.datalabeling.v1beta1.EvaluationJob].
99    BigQuerySource bigquery_source = 5;
100  }
101
102  // Required. Data type must be specifed when user tries to import data.
103  DataType data_type = 1;
104
105  // Optional. The type of annotation to be performed on this data. You must
106  // specify this field if you are using this InputConfig in an
107  // [EvaluationJob][google.cloud.datalabeling.v1beta1.EvaluationJob].
108  AnnotationType annotation_type = 3;
109
110  // Optional. Metadata about annotations for the input. You must specify this
111  // field if you are using this InputConfig in an [EvaluationJob][google.cloud.datalabeling.v1beta1.EvaluationJob] for a
112  // model version that performs classification.
113  ClassificationMetadata classification_metadata = 4;
114}
115
116// Metadata for the text.
117message TextMetadata {
118  // The language of this text, as a
119  // [BCP-47](https://www.rfc-editor.org/rfc/bcp/bcp47.txt).
120  // Default value is en-US.
121  string language_code = 1;
122}
123
124// Metadata for classification annotations.
125message ClassificationMetadata {
126  // Whether the classification task is multi-label or not.
127  bool is_multi_label = 1;
128}
129
130// Source of the Cloud Storage file to be imported.
131message GcsSource {
132  // Required. The input URI of source file. This must be a Cloud Storage path
133  // (`gs://...`).
134  string input_uri = 1;
135
136  // Required. The format of the source file. Only "text/csv" is supported.
137  string mime_type = 2;
138}
139
140// The BigQuery location for input data. If used in an [EvaluationJob][google.cloud.datalabeling.v1beta1.EvaluationJob], this
141// is where the service saves the prediction input and output sampled from the
142// model version.
143message BigQuerySource {
144  // Required. BigQuery URI to a table, up to 2,000 characters long. If you
145  // specify the URI of a table that does not exist, Data Labeling Service
146  // creates a table at the URI with the correct schema when you create your
147  // [EvaluationJob][google.cloud.datalabeling.v1beta1.EvaluationJob]. If you specify the URI of a table that already exists,
148  // it must have the
149  // [correct
150  // schema](/ml-engine/docs/continuous-evaluation/create-job#table-schema).
151  //
152  // Provide the table URI in the following format:
153  //
154  // "bq://<var>{your_project_id}</var>/<var>{your_dataset_name}</var>/<var>{your_table_name}</var>"
155  //
156  // [Learn
157  // more](/ml-engine/docs/continuous-evaluation/create-job#table-schema).
158  string input_uri = 1;
159}
160
161// The configuration of output data.
162message OutputConfig {
163  // Required. Location to output data to.
164  oneof destination {
165    // Output to a file in Cloud Storage. Should be used for labeling output
166    // other than image segmentation.
167    GcsDestination gcs_destination = 1;
168
169    // Output to a folder in Cloud Storage. Should be used for image
170    // segmentation labeling output.
171    GcsFolderDestination gcs_folder_destination = 2;
172  }
173}
174
175// Export destination of the data.Only gcs path is allowed in
176// output_uri.
177message GcsDestination {
178  // Required. The output uri of destination file.
179  string output_uri = 1;
180
181  // Required. The format of the gcs destination. Only "text/csv" and
182  // "application/json"
183  // are supported.
184  string mime_type = 2;
185}
186
187// Export folder destination of the data.
188message GcsFolderDestination {
189  // Required. Cloud Storage directory to export data to.
190  string output_folder_uri = 1;
191}
192
193// DataItem is a piece of data, without annotation. For example, an image.
194message DataItem {
195  option (google.api.resource) = {
196    type: "datalabeling.googleapis.com/DataItem"
197    pattern: "projects/{project}/datasets/{dataset}/dataItems/{data_item}"
198  };
199
200  // Output only.
201  oneof payload {
202    // The image payload, a container of the image bytes/uri.
203    ImagePayload image_payload = 2;
204
205    // The text payload, a container of text content.
206    TextPayload text_payload = 3;
207
208    // The video payload, a container of the video uri.
209    VideoPayload video_payload = 4;
210  }
211
212  // Output only. Name of the data item, in format of:
213  // projects/{project_id}/datasets/{dataset_id}/dataItems/{data_item_id}
214  string name = 1;
215}
216
217// AnnotatedDataset is a set holding annotations for data in a Dataset. Each
218// labeling task will generate an AnnotatedDataset under the Dataset that the
219// task is requested for.
220message AnnotatedDataset {
221  option (google.api.resource) = {
222    type: "datalabeling.googleapis.com/AnnotatedDataset"
223    pattern: "projects/{project}/datasets/{dataset}/annotatedDatasets/{annotated_dataset}"
224  };
225
226  // Output only. AnnotatedDataset resource name in format of:
227  // projects/{project_id}/datasets/{dataset_id}/annotatedDatasets/
228  // {annotated_dataset_id}
229  string name = 1;
230
231  // Output only. The display name of the AnnotatedDataset. It is specified in
232  // HumanAnnotationConfig when user starts a labeling task. Maximum of 64
233  // characters.
234  string display_name = 2;
235
236  // Output only. The description of the AnnotatedDataset. It is specified in
237  // HumanAnnotationConfig when user starts a labeling task. Maximum of 10000
238  // characters.
239  string description = 9;
240
241  // Output only. Source of the annotation.
242  AnnotationSource annotation_source = 3;
243
244  // Output only. Type of the annotation. It is specified when starting labeling
245  // task.
246  AnnotationType annotation_type = 8;
247
248  // Output only. Number of examples in the annotated dataset.
249  int64 example_count = 4;
250
251  // Output only. Number of examples that have annotation in the annotated
252  // dataset.
253  int64 completed_example_count = 5;
254
255  // Output only. Per label statistics.
256  LabelStats label_stats = 6;
257
258  // Output only. Time the AnnotatedDataset was created.
259  google.protobuf.Timestamp create_time = 7;
260
261  // Output only. Additional information about AnnotatedDataset.
262  AnnotatedDatasetMetadata metadata = 10;
263
264  // Output only. The names of any related resources that are blocking changes
265  // to the annotated dataset.
266  repeated string blocking_resources = 11;
267}
268
269// Statistics about annotation specs.
270message LabelStats {
271  // Map of each annotation spec's example count. Key is the annotation spec
272  // name and value is the number of examples for that annotation spec.
273  // If the annotated dataset does not have annotation spec, the map will return
274  // a pair where the key is empty string and value is the total number of
275  // annotations.
276  map<string, int64> example_count = 1;
277}
278
279// Metadata on AnnotatedDataset.
280message AnnotatedDatasetMetadata {
281  // Specific request configuration used when requesting the labeling task.
282  oneof annotation_request_config {
283    // Configuration for image classification task.
284    ImageClassificationConfig image_classification_config = 2;
285
286    // Configuration for image bounding box and bounding poly task.
287    BoundingPolyConfig bounding_poly_config = 3;
288
289    // Configuration for image polyline task.
290    PolylineConfig polyline_config = 4;
291
292    // Configuration for image segmentation task.
293    SegmentationConfig segmentation_config = 5;
294
295    // Configuration for video classification task.
296    VideoClassificationConfig video_classification_config = 6;
297
298    // Configuration for video object detection task.
299    ObjectDetectionConfig object_detection_config = 7;
300
301    // Configuration for video object tracking task.
302    ObjectTrackingConfig object_tracking_config = 8;
303
304    // Configuration for video event labeling task.
305    EventConfig event_config = 9;
306
307    // Configuration for text classification task.
308    TextClassificationConfig text_classification_config = 10;
309
310    // Configuration for text entity extraction task.
311    TextEntityExtractionConfig text_entity_extraction_config = 11;
312  }
313
314  // HumanAnnotationConfig used when requesting the human labeling task for this
315  // AnnotatedDataset.
316  HumanAnnotationConfig human_annotation_config = 1;
317}
318
319// An Example is a piece of data and its annotation. For example, an image with
320// label "house".
321message Example {
322  option (google.api.resource) = {
323    type: "datalabeling.googleapis.com/Example"
324    pattern: "projects/{project}/datasets/{dataset}/annotatedDatasets/{annotated_dataset}/examples/{example}"
325  };
326
327  // Output only. The data part of Example.
328  oneof payload {
329    // The image payload, a container of the image bytes/uri.
330    ImagePayload image_payload = 2;
331
332    // The text payload, a container of the text content.
333    TextPayload text_payload = 6;
334
335    // The video payload, a container of the video uri.
336    VideoPayload video_payload = 7;
337  }
338
339  // Output only. Name of the example, in format of:
340  // projects/{project_id}/datasets/{dataset_id}/annotatedDatasets/
341  // {annotated_dataset_id}/examples/{example_id}
342  string name = 1;
343
344  // Output only. Annotations for the piece of data in Example.
345  // One piece of data can have multiple annotations.
346  repeated Annotation annotations = 5;
347}
348