1// Copyright 2019 Google LLC. 2// 3// Licensed under the Apache License, Version 2.0 (the "License"); 4// you may not use this file except in compliance with the License. 5// You may obtain a copy of the License at 6// 7// http://www.apache.org/licenses/LICENSE-2.0 8// 9// Unless required by applicable law or agreed to in writing, software 10// distributed under the License is distributed on an "AS IS" BASIS, 11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12// See the License for the specific language governing permissions and 13// limitations under the License. 14// 15 16syntax = "proto3"; 17 18package google.cloud.datalabeling.v1beta1; 19 20import "google/api/resource.proto"; 21import "google/cloud/datalabeling/v1beta1/dataset.proto"; 22import "google/cloud/datalabeling/v1beta1/evaluation.proto"; 23import "google/cloud/datalabeling/v1beta1/human_annotation_config.proto"; 24import "google/protobuf/timestamp.proto"; 25import "google/rpc/status.proto"; 26 27option csharp_namespace = "Google.Cloud.DataLabeling.V1Beta1"; 28option go_package = "cloud.google.com/go/datalabeling/apiv1beta1/datalabelingpb;datalabelingpb"; 29option java_multiple_files = true; 30option java_package = "com.google.cloud.datalabeling.v1beta1"; 31option php_namespace = "Google\\Cloud\\DataLabeling\\V1beta1"; 32option ruby_package = "Google::Cloud::DataLabeling::V1beta1"; 33 34// Defines an evaluation job that runs periodically to generate 35// [Evaluations][google.cloud.datalabeling.v1beta1.Evaluation]. [Creating an evaluation 36// job](/ml-engine/docs/continuous-evaluation/create-job) is the starting point 37// for using continuous evaluation. 38message EvaluationJob { 39 option (google.api.resource) = { 40 type: "datalabeling.googleapis.com/EvaluationJob" 41 pattern: "projects/{project}/evaluationJobs/{evaluation_job}" 42 }; 43 44 // State of the job. 45 enum State { 46 STATE_UNSPECIFIED = 0; 47 48 // The job is scheduled to run at the [configured interval][google.cloud.datalabeling.v1beta1.EvaluationJob.schedule]. You 49 // can [pause][google.cloud.datalabeling.v1beta1.DataLabelingService.PauseEvaluationJob] or 50 // [delete][google.cloud.datalabeling.v1beta1.DataLabelingService.DeleteEvaluationJob] the job. 51 // 52 // When the job is in this state, it samples prediction input and output 53 // from your model version into your BigQuery table as predictions occur. 54 SCHEDULED = 1; 55 56 // The job is currently running. When the job runs, Data Labeling Service 57 // does several things: 58 // 59 // 1. If you have configured your job to use Data Labeling Service for 60 // ground truth labeling, the service creates a 61 // [Dataset][google.cloud.datalabeling.v1beta1.Dataset] and a labeling task for all data sampled 62 // since the last time the job ran. Human labelers provide ground truth 63 // labels for your data. Human labeling may take hours, or even days, 64 // depending on how much data has been sampled. The job remains in the 65 // `RUNNING` state during this time, and it can even be running multiple 66 // times in parallel if it gets triggered again (for example 24 hours 67 // later) before the earlier run has completed. When human labelers have 68 // finished labeling the data, the next step occurs. 69 // <br><br> 70 // If you have configured your job to provide your own ground truth 71 // labels, Data Labeling Service still creates a [Dataset][google.cloud.datalabeling.v1beta1.Dataset] for newly 72 // sampled data, but it expects that you have already added ground truth 73 // labels to the BigQuery table by this time. The next step occurs 74 // immediately. 75 // 76 // 2. Data Labeling Service creates an [Evaluation][google.cloud.datalabeling.v1beta1.Evaluation] by comparing your 77 // model version's predictions with the ground truth labels. 78 // 79 // If the job remains in this state for a long time, it continues to sample 80 // prediction data into your BigQuery table and will run again at the next 81 // interval, even if it causes the job to run multiple times in parallel. 82 RUNNING = 2; 83 84 // The job is not sampling prediction input and output into your BigQuery 85 // table and it will not run according to its schedule. You can 86 // [resume][google.cloud.datalabeling.v1beta1.DataLabelingService.ResumeEvaluationJob] the job. 87 PAUSED = 3; 88 89 // The job has this state right before it is deleted. 90 STOPPED = 4; 91 } 92 93 // Output only. After you create a job, Data Labeling Service assigns a name 94 // to the job with the following format: 95 // 96 // "projects/<var>{project_id}</var>/evaluationJobs/<var>{evaluation_job_id}</var>" 97 string name = 1; 98 99 // Required. Description of the job. The description can be up to 25,000 100 // characters long. 101 string description = 2; 102 103 // Output only. Describes the current state of the job. 104 State state = 3; 105 106 // Required. Describes the interval at which the job runs. This interval must 107 // be at least 1 day, and it is rounded to the nearest day. For example, if 108 // you specify a 50-hour interval, the job runs every 2 days. 109 // 110 // You can provide the schedule in 111 // [crontab format](/scheduler/docs/configuring/cron-job-schedules) or in an 112 // [English-like 113 // format](/appengine/docs/standard/python/config/cronref#schedule_format). 114 // 115 // Regardless of what you specify, the job will run at 10:00 AM UTC. Only the 116 // interval from this schedule is used, not the specific time of day. 117 string schedule = 4; 118 119 // Required. The [AI Platform Prediction model 120 // version](/ml-engine/docs/prediction-overview) to be evaluated. Prediction 121 // input and output is sampled from this model version. When creating an 122 // evaluation job, specify the model version in the following format: 123 // 124 // "projects/<var>{project_id}</var>/models/<var>{model_name}</var>/versions/<var>{version_name}</var>" 125 // 126 // There can only be one evaluation job per model version. 127 string model_version = 5; 128 129 // Required. Configuration details for the evaluation job. 130 EvaluationJobConfig evaluation_job_config = 6; 131 132 // Required. Name of the [AnnotationSpecSet][google.cloud.datalabeling.v1beta1.AnnotationSpecSet] describing all the 133 // labels that your machine learning model outputs. You must create this 134 // resource before you create an evaluation job and provide its name in the 135 // following format: 136 // 137 // "projects/<var>{project_id}</var>/annotationSpecSets/<var>{annotation_spec_set_id}</var>" 138 string annotation_spec_set = 7; 139 140 // Required. Whether you want Data Labeling Service to provide ground truth 141 // labels for prediction input. If you want the service to assign human 142 // labelers to annotate your data, set this to `true`. If you want to provide 143 // your own ground truth labels in the evaluation job's BigQuery table, set 144 // this to `false`. 145 bool label_missing_ground_truth = 8; 146 147 // Output only. Every time the evaluation job runs and an error occurs, the 148 // failed attempt is appended to this array. 149 repeated Attempt attempts = 9; 150 151 // Output only. Timestamp of when this evaluation job was created. 152 google.protobuf.Timestamp create_time = 10; 153} 154 155// Configures specific details of how a continuous evaluation job works. Provide 156// this configuration when you create an EvaluationJob. 157message EvaluationJobConfig { 158 // Required. Details for how you want human reviewers to provide ground truth 159 // labels. 160 oneof human_annotation_request_config { 161 // Specify this field if your model version performs image classification or 162 // general classification. 163 // 164 // `annotationSpecSet` in this configuration must match 165 // [EvaluationJob.annotationSpecSet][google.cloud.datalabeling.v1beta1.EvaluationJob.annotation_spec_set]. 166 // `allowMultiLabel` in this configuration must match 167 // `classificationMetadata.isMultiLabel` in [input_config][google.cloud.datalabeling.v1beta1.EvaluationJobConfig.input_config]. 168 ImageClassificationConfig image_classification_config = 4; 169 170 // Specify this field if your model version performs image object detection 171 // (bounding box detection). 172 // 173 // `annotationSpecSet` in this configuration must match 174 // [EvaluationJob.annotationSpecSet][google.cloud.datalabeling.v1beta1.EvaluationJob.annotation_spec_set]. 175 BoundingPolyConfig bounding_poly_config = 5; 176 177 // Specify this field if your model version performs text classification. 178 // 179 // `annotationSpecSet` in this configuration must match 180 // [EvaluationJob.annotationSpecSet][google.cloud.datalabeling.v1beta1.EvaluationJob.annotation_spec_set]. 181 // `allowMultiLabel` in this configuration must match 182 // `classificationMetadata.isMultiLabel` in [input_config][google.cloud.datalabeling.v1beta1.EvaluationJobConfig.input_config]. 183 TextClassificationConfig text_classification_config = 8; 184 } 185 186 // Rquired. Details for the sampled prediction input. Within this 187 // configuration, there are requirements for several fields: 188 // 189 // * `dataType` must be one of `IMAGE`, `TEXT`, or `GENERAL_DATA`. 190 // * `annotationType` must be one of `IMAGE_CLASSIFICATION_ANNOTATION`, 191 // `TEXT_CLASSIFICATION_ANNOTATION`, `GENERAL_CLASSIFICATION_ANNOTATION`, 192 // or `IMAGE_BOUNDING_BOX_ANNOTATION` (image object detection). 193 // * If your machine learning model performs classification, you must specify 194 // `classificationMetadata.isMultiLabel`. 195 // * You must specify `bigquerySource` (not `gcsSource`). 196 InputConfig input_config = 1; 197 198 // Required. Details for calculating evaluation metrics and creating 199 // [Evaulations][google.cloud.datalabeling.v1beta1.Evaluation]. If your model version performs image object 200 // detection, you must specify the `boundingBoxEvaluationOptions` field within 201 // this configuration. Otherwise, provide an empty object for this 202 // configuration. 203 EvaluationConfig evaluation_config = 2; 204 205 // Optional. Details for human annotation of your data. If you set 206 // [labelMissingGroundTruth][google.cloud.datalabeling.v1beta1.EvaluationJob.label_missing_ground_truth] to 207 // `true` for this evaluation job, then you must specify this field. If you 208 // plan to provide your own ground truth labels, then omit this field. 209 // 210 // Note that you must create an [Instruction][google.cloud.datalabeling.v1beta1.Instruction] resource before you can 211 // specify this field. Provide the name of the instruction resource in the 212 // `instruction` field within this configuration. 213 HumanAnnotationConfig human_annotation_config = 3; 214 215 // Required. Prediction keys that tell Data Labeling Service where to find the 216 // data for evaluation in your BigQuery table. When the service samples 217 // prediction input and output from your model version and saves it to 218 // BigQuery, the data gets stored as JSON strings in the BigQuery table. These 219 // keys tell Data Labeling Service how to parse the JSON. 220 // 221 // You can provide the following entries in this field: 222 // 223 // * `data_json_key`: the data key for prediction input. You must provide 224 // either this key or `reference_json_key`. 225 // * `reference_json_key`: the data reference key for prediction input. You 226 // must provide either this key or `data_json_key`. 227 // * `label_json_key`: the label key for prediction output. Required. 228 // * `label_score_json_key`: the score key for prediction output. Required. 229 // * `bounding_box_json_key`: the bounding box key for prediction output. 230 // Required if your model version perform image object detection. 231 // 232 // Learn [how to configure prediction 233 // keys](/ml-engine/docs/continuous-evaluation/create-job#prediction-keys). 234 map<string, string> bigquery_import_keys = 9; 235 236 // Required. The maximum number of predictions to sample and save to BigQuery 237 // during each [evaluation interval][google.cloud.datalabeling.v1beta1.EvaluationJob.schedule]. This limit 238 // overrides `example_sample_percentage`: even if the service has not sampled 239 // enough predictions to fulfill `example_sample_perecentage` during an 240 // interval, it stops sampling predictions when it meets this limit. 241 int32 example_count = 10; 242 243 // Required. Fraction of predictions to sample and save to BigQuery during 244 // each [evaluation interval][google.cloud.datalabeling.v1beta1.EvaluationJob.schedule]. For example, 0.1 means 245 // 10% of predictions served by your model version get saved to BigQuery. 246 double example_sample_percentage = 11; 247 248 // Optional. Configuration details for evaluation job alerts. Specify this 249 // field if you want to receive email alerts if the evaluation job finds that 250 // your predictions have low mean average precision during a run. 251 EvaluationJobAlertConfig evaluation_job_alert_config = 13; 252} 253 254// Provides details for how an evaluation job sends email alerts based on the 255// results of a run. 256message EvaluationJobAlertConfig { 257 // Required. An email address to send alerts to. 258 string email = 1; 259 260 // Required. A number between 0 and 1 that describes a minimum mean average 261 // precision threshold. When the evaluation job runs, if it calculates that 262 // your model version's predictions from the recent interval have 263 // [meanAveragePrecision][google.cloud.datalabeling.v1beta1.PrCurve.mean_average_precision] below this 264 // threshold, then it sends an alert to your specified email. 265 double min_acceptable_mean_average_precision = 2; 266} 267 268// Records a failed evaluation job run. 269message Attempt { 270 google.protobuf.Timestamp attempt_time = 1; 271 272 // Details of errors that occurred. 273 repeated google.rpc.Status partial_failures = 2; 274} 275