xref: /aosp_15_r20/external/googleapis/google/cloud/datalabeling/v1beta1/evaluation_job.proto (revision d5c09012810ac0c9f33fe448fb6da8260d444cc9)
1// Copyright 2019 Google LLC.
2//
3// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at
6//
7//     http://www.apache.org/licenses/LICENSE-2.0
8//
9// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS,
11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12// See the License for the specific language governing permissions and
13// limitations under the License.
14//
15
16syntax = "proto3";
17
18package google.cloud.datalabeling.v1beta1;
19
20import "google/api/resource.proto";
21import "google/cloud/datalabeling/v1beta1/dataset.proto";
22import "google/cloud/datalabeling/v1beta1/evaluation.proto";
23import "google/cloud/datalabeling/v1beta1/human_annotation_config.proto";
24import "google/protobuf/timestamp.proto";
25import "google/rpc/status.proto";
26
27option csharp_namespace = "Google.Cloud.DataLabeling.V1Beta1";
28option go_package = "cloud.google.com/go/datalabeling/apiv1beta1/datalabelingpb;datalabelingpb";
29option java_multiple_files = true;
30option java_package = "com.google.cloud.datalabeling.v1beta1";
31option php_namespace = "Google\\Cloud\\DataLabeling\\V1beta1";
32option ruby_package = "Google::Cloud::DataLabeling::V1beta1";
33
34// Defines an evaluation job that runs periodically to generate
35// [Evaluations][google.cloud.datalabeling.v1beta1.Evaluation]. [Creating an evaluation
36// job](/ml-engine/docs/continuous-evaluation/create-job) is the starting point
37// for using continuous evaluation.
38message EvaluationJob {
39  option (google.api.resource) = {
40    type: "datalabeling.googleapis.com/EvaluationJob"
41    pattern: "projects/{project}/evaluationJobs/{evaluation_job}"
42  };
43
44  // State of the job.
45  enum State {
46    STATE_UNSPECIFIED = 0;
47
48    // The job is scheduled to run at the [configured interval][google.cloud.datalabeling.v1beta1.EvaluationJob.schedule]. You
49    // can [pause][google.cloud.datalabeling.v1beta1.DataLabelingService.PauseEvaluationJob] or
50    // [delete][google.cloud.datalabeling.v1beta1.DataLabelingService.DeleteEvaluationJob] the job.
51    //
52    // When the job is in this state, it samples prediction input and output
53    // from your model version into your BigQuery table as predictions occur.
54    SCHEDULED = 1;
55
56    // The job is currently running. When the job runs, Data Labeling Service
57    // does several things:
58    //
59    // 1. If you have configured your job to use Data Labeling Service for
60    //    ground truth labeling, the service creates a
61    //    [Dataset][google.cloud.datalabeling.v1beta1.Dataset] and a labeling task for all data sampled
62    //    since the last time the job ran. Human labelers provide ground truth
63    //    labels for your data. Human labeling may take hours, or even days,
64    //    depending on how much data has been sampled. The job remains in the
65    //    `RUNNING` state during this time, and it can even be running multiple
66    //    times in parallel if it gets triggered again (for example 24 hours
67    //    later) before the earlier run has completed. When human labelers have
68    //    finished labeling the data, the next step occurs.
69    //    <br><br>
70    //    If you have configured your job to provide your own ground truth
71    //    labels, Data Labeling Service still creates a [Dataset][google.cloud.datalabeling.v1beta1.Dataset] for newly
72    //    sampled data, but it expects that you have already added ground truth
73    //    labels to the BigQuery table by this time. The next step occurs
74    //    immediately.
75    //
76    // 2. Data Labeling Service creates an [Evaluation][google.cloud.datalabeling.v1beta1.Evaluation] by comparing your
77    //    model version's predictions with the ground truth labels.
78    //
79    // If the job remains in this state for a long time, it continues to sample
80    // prediction data into your BigQuery table and will run again at the next
81    // interval, even if it causes the job to run multiple times in parallel.
82    RUNNING = 2;
83
84    // The job is not sampling prediction input and output into your BigQuery
85    // table and it will not run according to its schedule. You can
86    // [resume][google.cloud.datalabeling.v1beta1.DataLabelingService.ResumeEvaluationJob] the job.
87    PAUSED = 3;
88
89    // The job has this state right before it is deleted.
90    STOPPED = 4;
91  }
92
93  // Output only. After you create a job, Data Labeling Service assigns a name
94  // to the job with the following format:
95  //
96  // "projects/<var>{project_id}</var>/evaluationJobs/<var>{evaluation_job_id}</var>"
97  string name = 1;
98
99  // Required. Description of the job. The description can be up to 25,000
100  // characters long.
101  string description = 2;
102
103  // Output only. Describes the current state of the job.
104  State state = 3;
105
106  // Required. Describes the interval at which the job runs. This interval must
107  // be at least 1 day, and it is rounded to the nearest day. For example, if
108  // you specify a 50-hour interval, the job runs every 2 days.
109  //
110  // You can provide the schedule in
111  // [crontab format](/scheduler/docs/configuring/cron-job-schedules) or in an
112  // [English-like
113  // format](/appengine/docs/standard/python/config/cronref#schedule_format).
114  //
115  // Regardless of what you specify, the job will run at 10:00 AM UTC. Only the
116  // interval from this schedule is used, not the specific time of day.
117  string schedule = 4;
118
119  // Required. The [AI Platform Prediction model
120  // version](/ml-engine/docs/prediction-overview) to be evaluated. Prediction
121  // input and output is sampled from this model version. When creating an
122  // evaluation job, specify the model version in the following format:
123  //
124  // "projects/<var>{project_id}</var>/models/<var>{model_name}</var>/versions/<var>{version_name}</var>"
125  //
126  // There can only be one evaluation job per model version.
127  string model_version = 5;
128
129  // Required. Configuration details for the evaluation job.
130  EvaluationJobConfig evaluation_job_config = 6;
131
132  // Required. Name of the [AnnotationSpecSet][google.cloud.datalabeling.v1beta1.AnnotationSpecSet] describing all the
133  // labels that your machine learning model outputs. You must create this
134  // resource before you create an evaluation job and provide its name in the
135  // following format:
136  //
137  // "projects/<var>{project_id}</var>/annotationSpecSets/<var>{annotation_spec_set_id}</var>"
138  string annotation_spec_set = 7;
139
140  // Required. Whether you want Data Labeling Service to provide ground truth
141  // labels for prediction input. If you want the service to assign human
142  // labelers to annotate your data, set this to `true`. If you want to provide
143  // your own ground truth labels in the evaluation job's BigQuery table, set
144  // this to `false`.
145  bool label_missing_ground_truth = 8;
146
147  // Output only. Every time the evaluation job runs and an error occurs, the
148  // failed attempt is appended to this array.
149  repeated Attempt attempts = 9;
150
151  // Output only. Timestamp of when this evaluation job was created.
152  google.protobuf.Timestamp create_time = 10;
153}
154
155// Configures specific details of how a continuous evaluation job works. Provide
156// this configuration when you create an EvaluationJob.
157message EvaluationJobConfig {
158  // Required. Details for how you want human reviewers to provide ground truth
159  // labels.
160  oneof human_annotation_request_config {
161    // Specify this field if your model version performs image classification or
162    // general classification.
163    //
164    // `annotationSpecSet` in this configuration must match
165    // [EvaluationJob.annotationSpecSet][google.cloud.datalabeling.v1beta1.EvaluationJob.annotation_spec_set].
166    // `allowMultiLabel` in this configuration must match
167    // `classificationMetadata.isMultiLabel` in [input_config][google.cloud.datalabeling.v1beta1.EvaluationJobConfig.input_config].
168    ImageClassificationConfig image_classification_config = 4;
169
170    // Specify this field if your model version performs image object detection
171    // (bounding box detection).
172    //
173    // `annotationSpecSet` in this configuration must match
174    // [EvaluationJob.annotationSpecSet][google.cloud.datalabeling.v1beta1.EvaluationJob.annotation_spec_set].
175    BoundingPolyConfig bounding_poly_config = 5;
176
177    // Specify this field if your model version performs text classification.
178    //
179    // `annotationSpecSet` in this configuration must match
180    // [EvaluationJob.annotationSpecSet][google.cloud.datalabeling.v1beta1.EvaluationJob.annotation_spec_set].
181    // `allowMultiLabel` in this configuration must match
182    // `classificationMetadata.isMultiLabel` in [input_config][google.cloud.datalabeling.v1beta1.EvaluationJobConfig.input_config].
183    TextClassificationConfig text_classification_config = 8;
184  }
185
186  // Rquired. Details for the sampled prediction input. Within this
187  // configuration, there are requirements for several fields:
188  //
189  // * `dataType` must be one of `IMAGE`, `TEXT`, or `GENERAL_DATA`.
190  // * `annotationType` must be one of `IMAGE_CLASSIFICATION_ANNOTATION`,
191  //   `TEXT_CLASSIFICATION_ANNOTATION`, `GENERAL_CLASSIFICATION_ANNOTATION`,
192  //   or `IMAGE_BOUNDING_BOX_ANNOTATION` (image object detection).
193  // * If your machine learning model performs classification, you must specify
194  //   `classificationMetadata.isMultiLabel`.
195  // * You must specify `bigquerySource` (not `gcsSource`).
196  InputConfig input_config = 1;
197
198  // Required. Details for calculating evaluation metrics and creating
199  // [Evaulations][google.cloud.datalabeling.v1beta1.Evaluation]. If your model version performs image object
200  // detection, you must specify the `boundingBoxEvaluationOptions` field within
201  // this configuration. Otherwise, provide an empty object for this
202  // configuration.
203  EvaluationConfig evaluation_config = 2;
204
205  // Optional. Details for human annotation of your data. If you set
206  // [labelMissingGroundTruth][google.cloud.datalabeling.v1beta1.EvaluationJob.label_missing_ground_truth] to
207  // `true` for this evaluation job, then you must specify this field. If you
208  // plan to provide your own ground truth labels, then omit this field.
209  //
210  // Note that you must create an [Instruction][google.cloud.datalabeling.v1beta1.Instruction] resource before you can
211  // specify this field. Provide the name of the instruction resource in the
212  // `instruction` field within this configuration.
213  HumanAnnotationConfig human_annotation_config = 3;
214
215  // Required. Prediction keys that tell Data Labeling Service where to find the
216  // data for evaluation in your BigQuery table. When the service samples
217  // prediction input and output from your model version and saves it to
218  // BigQuery, the data gets stored as JSON strings in the BigQuery table. These
219  // keys tell Data Labeling Service how to parse the JSON.
220  //
221  // You can provide the following entries in this field:
222  //
223  // * `data_json_key`: the data key for prediction input. You must provide
224  //   either this key or `reference_json_key`.
225  // * `reference_json_key`: the data reference key for prediction input. You
226  //   must provide either this key or `data_json_key`.
227  // * `label_json_key`: the label key for prediction output. Required.
228  // * `label_score_json_key`: the score key for prediction output. Required.
229  // * `bounding_box_json_key`: the bounding box key for prediction output.
230  //   Required if your model version perform image object detection.
231  //
232  // Learn [how to configure prediction
233  // keys](/ml-engine/docs/continuous-evaluation/create-job#prediction-keys).
234  map<string, string> bigquery_import_keys = 9;
235
236  // Required. The maximum number of predictions to sample and save to BigQuery
237  // during each [evaluation interval][google.cloud.datalabeling.v1beta1.EvaluationJob.schedule]. This limit
238  // overrides `example_sample_percentage`: even if the service has not sampled
239  // enough predictions to fulfill `example_sample_perecentage` during an
240  // interval, it stops sampling predictions when it meets this limit.
241  int32 example_count = 10;
242
243  // Required. Fraction of predictions to sample and save to BigQuery during
244  // each [evaluation interval][google.cloud.datalabeling.v1beta1.EvaluationJob.schedule]. For example, 0.1 means
245  // 10% of predictions served by your model version get saved to BigQuery.
246  double example_sample_percentage = 11;
247
248  // Optional. Configuration details for evaluation job alerts. Specify this
249  // field if you want to receive email alerts if the evaluation job finds that
250  // your predictions have low mean average precision during a run.
251  EvaluationJobAlertConfig evaluation_job_alert_config = 13;
252}
253
254// Provides details for how an evaluation job sends email alerts based on the
255// results of a run.
256message EvaluationJobAlertConfig {
257  // Required. An email address to send alerts to.
258  string email = 1;
259
260  // Required. A number between 0 and 1 that describes a minimum mean average
261  // precision threshold. When the evaluation job runs, if it calculates that
262  // your model version's predictions from the recent interval have
263  // [meanAveragePrecision][google.cloud.datalabeling.v1beta1.PrCurve.mean_average_precision] below this
264  // threshold, then it sends an alert to your specified email.
265  double min_acceptable_mean_average_precision = 2;
266}
267
268// Records a failed evaluation job run.
269message Attempt {
270  google.protobuf.Timestamp attempt_time = 1;
271
272  // Details of errors that occurred.
273  repeated google.rpc.Status partial_failures = 2;
274}
275