1// Copyright 2019 Google LLC. 2// 3// Licensed under the Apache License, Version 2.0 (the "License"); 4// you may not use this file except in compliance with the License. 5// You may obtain a copy of the License at 6// 7// http://www.apache.org/licenses/LICENSE-2.0 8// 9// Unless required by applicable law or agreed to in writing, software 10// distributed under the License is distributed on an "AS IS" BASIS, 11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12// See the License for the specific language governing permissions and 13// limitations under the License. 14// 15 16syntax = "proto3"; 17 18package google.cloud.datalabeling.v1beta1; 19 20import "google/api/resource.proto"; 21import "google/cloud/datalabeling/v1beta1/annotation.proto"; 22import "google/cloud/datalabeling/v1beta1/annotation_spec_set.proto"; 23import "google/cloud/datalabeling/v1beta1/data_payloads.proto"; 24import "google/cloud/datalabeling/v1beta1/human_annotation_config.proto"; 25import "google/protobuf/timestamp.proto"; 26 27option csharp_namespace = "Google.Cloud.DataLabeling.V1Beta1"; 28option go_package = "cloud.google.com/go/datalabeling/apiv1beta1/datalabelingpb;datalabelingpb"; 29option java_multiple_files = true; 30option java_package = "com.google.cloud.datalabeling.v1beta1"; 31option php_namespace = "Google\\Cloud\\DataLabeling\\V1beta1"; 32option ruby_package = "Google::Cloud::DataLabeling::V1beta1"; 33 34enum DataType { 35 DATA_TYPE_UNSPECIFIED = 0; 36 37 // Allowed for continuous evaluation. 38 IMAGE = 1; 39 40 VIDEO = 2; 41 42 // Allowed for continuous evaluation. 43 TEXT = 4; 44 45 // Allowed for continuous evaluation. 46 GENERAL_DATA = 6; 47} 48 49// Dataset is the resource to hold your data. You can request multiple labeling 50// tasks for a dataset while each one will generate an AnnotatedDataset. 51message Dataset { 52 option (google.api.resource) = { 53 type: "datalabeling.googleapis.com/Dataset" 54 pattern: "projects/{project}/datasets/{dataset}" 55 }; 56 57 // Output only. Dataset resource name, format is: 58 // projects/{project_id}/datasets/{dataset_id} 59 string name = 1; 60 61 // Required. The display name of the dataset. Maximum of 64 characters. 62 string display_name = 2; 63 64 // Optional. User-provided description of the annotation specification set. 65 // The description can be up to 10000 characters long. 66 string description = 3; 67 68 // Output only. Time the dataset is created. 69 google.protobuf.Timestamp create_time = 4; 70 71 // Output only. This is populated with the original input configs 72 // where ImportData is called. It is available only after the clients 73 // import data to this dataset. 74 repeated InputConfig input_configs = 5; 75 76 // Output only. The names of any related resources that are blocking changes 77 // to the dataset. 78 repeated string blocking_resources = 6; 79 80 // Output only. The number of data items in the dataset. 81 int64 data_item_count = 7; 82} 83 84// The configuration of input data, including data type, location, etc. 85message InputConfig { 86 // Optional. The metadata associated with each data type. 87 oneof data_type_metadata { 88 // Required for text import, as language code must be specified. 89 TextMetadata text_metadata = 6; 90 } 91 92 // Required. Where the data is from. 93 oneof source { 94 // Source located in Cloud Storage. 95 GcsSource gcs_source = 2; 96 97 // Source located in BigQuery. You must specify this field if you are using 98 // this InputConfig in an [EvaluationJob][google.cloud.datalabeling.v1beta1.EvaluationJob]. 99 BigQuerySource bigquery_source = 5; 100 } 101 102 // Required. Data type must be specifed when user tries to import data. 103 DataType data_type = 1; 104 105 // Optional. The type of annotation to be performed on this data. You must 106 // specify this field if you are using this InputConfig in an 107 // [EvaluationJob][google.cloud.datalabeling.v1beta1.EvaluationJob]. 108 AnnotationType annotation_type = 3; 109 110 // Optional. Metadata about annotations for the input. You must specify this 111 // field if you are using this InputConfig in an [EvaluationJob][google.cloud.datalabeling.v1beta1.EvaluationJob] for a 112 // model version that performs classification. 113 ClassificationMetadata classification_metadata = 4; 114} 115 116// Metadata for the text. 117message TextMetadata { 118 // The language of this text, as a 119 // [BCP-47](https://www.rfc-editor.org/rfc/bcp/bcp47.txt). 120 // Default value is en-US. 121 string language_code = 1; 122} 123 124// Metadata for classification annotations. 125message ClassificationMetadata { 126 // Whether the classification task is multi-label or not. 127 bool is_multi_label = 1; 128} 129 130// Source of the Cloud Storage file to be imported. 131message GcsSource { 132 // Required. The input URI of source file. This must be a Cloud Storage path 133 // (`gs://...`). 134 string input_uri = 1; 135 136 // Required. The format of the source file. Only "text/csv" is supported. 137 string mime_type = 2; 138} 139 140// The BigQuery location for input data. If used in an [EvaluationJob][google.cloud.datalabeling.v1beta1.EvaluationJob], this 141// is where the service saves the prediction input and output sampled from the 142// model version. 143message BigQuerySource { 144 // Required. BigQuery URI to a table, up to 2,000 characters long. If you 145 // specify the URI of a table that does not exist, Data Labeling Service 146 // creates a table at the URI with the correct schema when you create your 147 // [EvaluationJob][google.cloud.datalabeling.v1beta1.EvaluationJob]. If you specify the URI of a table that already exists, 148 // it must have the 149 // [correct 150 // schema](/ml-engine/docs/continuous-evaluation/create-job#table-schema). 151 // 152 // Provide the table URI in the following format: 153 // 154 // "bq://<var>{your_project_id}</var>/<var>{your_dataset_name}</var>/<var>{your_table_name}</var>" 155 // 156 // [Learn 157 // more](/ml-engine/docs/continuous-evaluation/create-job#table-schema). 158 string input_uri = 1; 159} 160 161// The configuration of output data. 162message OutputConfig { 163 // Required. Location to output data to. 164 oneof destination { 165 // Output to a file in Cloud Storage. Should be used for labeling output 166 // other than image segmentation. 167 GcsDestination gcs_destination = 1; 168 169 // Output to a folder in Cloud Storage. Should be used for image 170 // segmentation labeling output. 171 GcsFolderDestination gcs_folder_destination = 2; 172 } 173} 174 175// Export destination of the data.Only gcs path is allowed in 176// output_uri. 177message GcsDestination { 178 // Required. The output uri of destination file. 179 string output_uri = 1; 180 181 // Required. The format of the gcs destination. Only "text/csv" and 182 // "application/json" 183 // are supported. 184 string mime_type = 2; 185} 186 187// Export folder destination of the data. 188message GcsFolderDestination { 189 // Required. Cloud Storage directory to export data to. 190 string output_folder_uri = 1; 191} 192 193// DataItem is a piece of data, without annotation. For example, an image. 194message DataItem { 195 option (google.api.resource) = { 196 type: "datalabeling.googleapis.com/DataItem" 197 pattern: "projects/{project}/datasets/{dataset}/dataItems/{data_item}" 198 }; 199 200 // Output only. 201 oneof payload { 202 // The image payload, a container of the image bytes/uri. 203 ImagePayload image_payload = 2; 204 205 // The text payload, a container of text content. 206 TextPayload text_payload = 3; 207 208 // The video payload, a container of the video uri. 209 VideoPayload video_payload = 4; 210 } 211 212 // Output only. Name of the data item, in format of: 213 // projects/{project_id}/datasets/{dataset_id}/dataItems/{data_item_id} 214 string name = 1; 215} 216 217// AnnotatedDataset is a set holding annotations for data in a Dataset. Each 218// labeling task will generate an AnnotatedDataset under the Dataset that the 219// task is requested for. 220message AnnotatedDataset { 221 option (google.api.resource) = { 222 type: "datalabeling.googleapis.com/AnnotatedDataset" 223 pattern: "projects/{project}/datasets/{dataset}/annotatedDatasets/{annotated_dataset}" 224 }; 225 226 // Output only. AnnotatedDataset resource name in format of: 227 // projects/{project_id}/datasets/{dataset_id}/annotatedDatasets/ 228 // {annotated_dataset_id} 229 string name = 1; 230 231 // Output only. The display name of the AnnotatedDataset. It is specified in 232 // HumanAnnotationConfig when user starts a labeling task. Maximum of 64 233 // characters. 234 string display_name = 2; 235 236 // Output only. The description of the AnnotatedDataset. It is specified in 237 // HumanAnnotationConfig when user starts a labeling task. Maximum of 10000 238 // characters. 239 string description = 9; 240 241 // Output only. Source of the annotation. 242 AnnotationSource annotation_source = 3; 243 244 // Output only. Type of the annotation. It is specified when starting labeling 245 // task. 246 AnnotationType annotation_type = 8; 247 248 // Output only. Number of examples in the annotated dataset. 249 int64 example_count = 4; 250 251 // Output only. Number of examples that have annotation in the annotated 252 // dataset. 253 int64 completed_example_count = 5; 254 255 // Output only. Per label statistics. 256 LabelStats label_stats = 6; 257 258 // Output only. Time the AnnotatedDataset was created. 259 google.protobuf.Timestamp create_time = 7; 260 261 // Output only. Additional information about AnnotatedDataset. 262 AnnotatedDatasetMetadata metadata = 10; 263 264 // Output only. The names of any related resources that are blocking changes 265 // to the annotated dataset. 266 repeated string blocking_resources = 11; 267} 268 269// Statistics about annotation specs. 270message LabelStats { 271 // Map of each annotation spec's example count. Key is the annotation spec 272 // name and value is the number of examples for that annotation spec. 273 // If the annotated dataset does not have annotation spec, the map will return 274 // a pair where the key is empty string and value is the total number of 275 // annotations. 276 map<string, int64> example_count = 1; 277} 278 279// Metadata on AnnotatedDataset. 280message AnnotatedDatasetMetadata { 281 // Specific request configuration used when requesting the labeling task. 282 oneof annotation_request_config { 283 // Configuration for image classification task. 284 ImageClassificationConfig image_classification_config = 2; 285 286 // Configuration for image bounding box and bounding poly task. 287 BoundingPolyConfig bounding_poly_config = 3; 288 289 // Configuration for image polyline task. 290 PolylineConfig polyline_config = 4; 291 292 // Configuration for image segmentation task. 293 SegmentationConfig segmentation_config = 5; 294 295 // Configuration for video classification task. 296 VideoClassificationConfig video_classification_config = 6; 297 298 // Configuration for video object detection task. 299 ObjectDetectionConfig object_detection_config = 7; 300 301 // Configuration for video object tracking task. 302 ObjectTrackingConfig object_tracking_config = 8; 303 304 // Configuration for video event labeling task. 305 EventConfig event_config = 9; 306 307 // Configuration for text classification task. 308 TextClassificationConfig text_classification_config = 10; 309 310 // Configuration for text entity extraction task. 311 TextEntityExtractionConfig text_entity_extraction_config = 11; 312 } 313 314 // HumanAnnotationConfig used when requesting the human labeling task for this 315 // AnnotatedDataset. 316 HumanAnnotationConfig human_annotation_config = 1; 317} 318 319// An Example is a piece of data and its annotation. For example, an image with 320// label "house". 321message Example { 322 option (google.api.resource) = { 323 type: "datalabeling.googleapis.com/Example" 324 pattern: "projects/{project}/datasets/{dataset}/annotatedDatasets/{annotated_dataset}/examples/{example}" 325 }; 326 327 // Output only. The data part of Example. 328 oneof payload { 329 // The image payload, a container of the image bytes/uri. 330 ImagePayload image_payload = 2; 331 332 // The text payload, a container of the text content. 333 TextPayload text_payload = 6; 334 335 // The video payload, a container of the video uri. 336 VideoPayload video_payload = 7; 337 } 338 339 // Output only. Name of the example, in format of: 340 // projects/{project_id}/datasets/{dataset_id}/annotatedDatasets/ 341 // {annotated_dataset_id}/examples/{example_id} 342 string name = 1; 343 344 // Output only. Annotations for the piece of data in Example. 345 // One piece of data can have multiple annotations. 346 repeated Annotation annotations = 5; 347} 348