datalabeling/v1beta1/evaluation.proto

// Copyright 2019 Google LLC.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//

syntax = "proto3";

package google.cloud.datalabeling.v1beta1;

import "google/api/resource.proto";
import "google/cloud/datalabeling/v1beta1/annotation.proto";
import "google/cloud/datalabeling/v1beta1/annotation_spec_set.proto";
import "google/protobuf/timestamp.proto";

option csharp_namespace = "Google.Cloud.DataLabeling.V1Beta1";
option go_package = "cloud.google.com/go/datalabeling/apiv1beta1/datalabelingpb;datalabelingpb";
option java_multiple_files = true;
option java_package = "com.google.cloud.datalabeling.v1beta1";
option php_namespace = "Google\\Cloud\\DataLabeling\\V1beta1";
option ruby_package = "Google::Cloud::DataLabeling::V1beta1";

// Describes an evaluation between a machine learning model's predictions and
// ground truth labels. Created when an [EvaluationJob][google.cloud.datalabeling.v1beta1.EvaluationJob] runs successfully.
message Evaluation {
  option (google.api.resource) = {
    type: "datalabeling.googleapis.com/Evaluation"
    pattern: "projects/{project}/datasets/{dataset}/evaluations/{evaluation}"
  };

  // Output only. Resource name of an evaluation. The name has the following
  // format:
  //
  // "projects/<var>{project_id}</var>/datasets/<var>{dataset_id}</var>/evaluations/<var>{evaluation_id</var>}'
  string name = 1;

  // Output only. Options used in the evaluation job that created this
  // evaluation.
  EvaluationConfig config = 2;

  // Output only. Timestamp for when the evaluation job that created this
  // evaluation ran.
  google.protobuf.Timestamp evaluation_job_run_time = 3;

  // Output only. Timestamp for when this evaluation was created.
  google.protobuf.Timestamp create_time = 4;

  // Output only. Metrics comparing predictions to ground truth labels.
  EvaluationMetrics evaluation_metrics = 5;

  // Output only. Type of task that the model version being evaluated performs,
  // as defined in the
  //
  // [evaluationJobConfig.inputConfig.annotationType][google.cloud.datalabeling.v1beta1.EvaluationJobConfig.input_config]
  // field of the evaluation job that created this evaluation.
  AnnotationType annotation_type = 6;

  // Output only. The number of items in the ground truth dataset that were used
  // for this evaluation. Only populated when the evaulation is for certain
  // AnnotationTypes.
  int64 evaluated_item_count = 7;
}

// Configuration details used for calculating evaluation metrics and creating an
// [Evaluation][google.cloud.datalabeling.v1beta1.Evaluation].
message EvaluationConfig {
  // Vertical specific options for general metrics.
  oneof vertical_option {
    // Only specify this field if the related model performs image object
    // detection (`IMAGE_BOUNDING_BOX_ANNOTATION`). Describes how to evaluate
    // bounding boxes.
    BoundingBoxEvaluationOptions bounding_box_evaluation_options = 1;
  }
}

// Options regarding evaluation between bounding boxes.
message BoundingBoxEvaluationOptions {
  // Minimum
  // [intersection-over-union
  //
  // (IOU)](/vision/automl/object-detection/docs/evaluate#intersection-over-union)
  // required for 2 bounding boxes to be considered a match. This must be a
  // number between 0 and 1.
  float iou_threshold = 1;
}

message EvaluationMetrics {
  // Common metrics covering most general cases.
  oneof metrics {
    ClassificationMetrics classification_metrics = 1;

    ObjectDetectionMetrics object_detection_metrics = 2;
  }
}

// Metrics calculated for a classification model.
message ClassificationMetrics {
  // Precision-recall curve based on ground truth labels, predicted labels, and
  // scores for the predicted labels.
  PrCurve pr_curve = 1;

  // Confusion matrix of predicted labels vs. ground truth labels.
  ConfusionMatrix confusion_matrix = 2;
}

// Metrics calculated for an image object detection (bounding box) model.
message ObjectDetectionMetrics {
  // Precision-recall curve.
  PrCurve pr_curve = 1;
}

message PrCurve {
  message ConfidenceMetricsEntry {
    // Threshold used for this entry.
    //
    // For classification tasks, this is a classification threshold: a
    // predicted label is categorized as positive or negative (in the context of
    // this point on the PR curve) based on whether the label's score meets this
    // threshold.
    //
    // For image object detection (bounding box) tasks, this is the
    // [intersection-over-union
    //
    // (IOU)](/vision/automl/object-detection/docs/evaluate#intersection-over-union)
    // threshold for the context of this point on the PR curve.
    float confidence_threshold = 1;

    // Recall value.
    float recall = 2;

    // Precision value.
    float precision = 3;

    // Harmonic mean of recall and precision.
    float f1_score = 4;

    // Recall value for entries with label that has highest score.
    float recall_at1 = 5;

    // Precision value for entries with label that has highest score.
    float precision_at1 = 6;

    // The harmonic mean of [recall_at1][google.cloud.datalabeling.v1beta1.PrCurve.ConfidenceMetricsEntry.recall_at1] and [precision_at1][google.cloud.datalabeling.v1beta1.PrCurve.ConfidenceMetricsEntry.precision_at1].
    float f1_score_at1 = 7;

    // Recall value for entries with label that has highest 5 scores.
    float recall_at5 = 8;

    // Precision value for entries with label that has highest 5 scores.
    float precision_at5 = 9;

    // The harmonic mean of [recall_at5][google.cloud.datalabeling.v1beta1.PrCurve.ConfidenceMetricsEntry.recall_at5] and [precision_at5][google.cloud.datalabeling.v1beta1.PrCurve.ConfidenceMetricsEntry.precision_at5].
    float f1_score_at5 = 10;
  }

  // The annotation spec of the label for which the precision-recall curve
  // calculated. If this field is empty, that means the precision-recall curve
  // is an aggregate curve for all labels.
  AnnotationSpec annotation_spec = 1;

  // Area under the precision-recall curve. Not to be confused with area under
  // a receiver operating characteristic (ROC) curve.
  float area_under_curve = 2;

  // Entries that make up the precision-recall graph. Each entry is a "point" on
  // the graph drawn for a different `confidence_threshold`.
  repeated ConfidenceMetricsEntry confidence_metrics_entries = 3;

  // Mean average prcision of this curve.
  float mean_average_precision = 4;
}

// Confusion matrix of the model running the classification. Only applicable
// when the metrics entry aggregates multiple labels. Not applicable when the
// entry is for a single label.
message ConfusionMatrix {
  message ConfusionMatrixEntry {
    // The annotation spec of a predicted label.
    AnnotationSpec annotation_spec = 1;

    // Number of items predicted to have this label. (The ground truth label for
    // these items is the `Row.annotationSpec` of this entry's parent.)
    int32 item_count = 2;
  }

  // A row in the confusion matrix. Each entry in this row has the same
  // ground truth label.
  message Row {
    // The annotation spec of the ground truth label for this row.
    AnnotationSpec annotation_spec = 1;

    // A list of the confusion matrix entries. One entry for each possible
    // predicted label.
    repeated ConfusionMatrixEntry entries = 2;
  }

  repeated Row row = 1;
}