vision/v1p4beta1/image_annotator.proto

// Copyright 2019 Google LLC.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//

syntax = "proto3";

package google.cloud.vision.v1p4beta1;

import "google/api/annotations.proto";
import "google/api/client.proto";
import "google/api/field_behavior.proto";
import "google/cloud/vision/v1p4beta1/face.proto";
import "google/cloud/vision/v1p4beta1/geometry.proto";
import "google/cloud/vision/v1p4beta1/product_search.proto";
import "google/cloud/vision/v1p4beta1/text_annotation.proto";
import "google/cloud/vision/v1p4beta1/web_detection.proto";
import "google/longrunning/operations.proto";
import "google/protobuf/timestamp.proto";
import "google/rpc/status.proto";
import "google/type/color.proto";
import "google/type/latlng.proto";

option cc_enable_arenas = true;
option go_package = "cloud.google.com/go/vision/apiv1p4beta1/visionpb;visionpb";
option java_multiple_files = true;
option java_outer_classname = "ImageAnnotatorProto";
option java_package = "com.google.cloud.vision.v1p4beta1";
option objc_class_prefix = "GCVN";

// Service that performs Google Cloud Vision API detection tasks over client
// images, such as face, landmark, logo, label, and text detection. The
// ImageAnnotator service returns detected entities from the images.
service ImageAnnotator {
  option (google.api.default_host) = "vision.googleapis.com";
  option (google.api.oauth_scopes) =
      "https://www.googleapis.com/auth/cloud-platform,"
      "https://www.googleapis.com/auth/cloud-vision";

  // Run image detection and annotation for a batch of images.
  rpc BatchAnnotateImages(BatchAnnotateImagesRequest)
      returns (BatchAnnotateImagesResponse) {
    option (google.api.http) = {
      post: "/v1p4beta1/images:annotate"
      body: "*"
    };
    option (google.api.method_signature) = "requests";
  }

  // Service that performs image detection and annotation for a batch of files.
  // Now only "application/pdf", "image/tiff" and "image/gif" are supported.
  //
  // This service will extract at most 5 (customers can specify which 5 in
  // AnnotateFileRequest.pages) frames (gif) or pages (pdf or tiff) from each
  // file provided and perform detection and annotation for each image
  // extracted.
  rpc BatchAnnotateFiles(BatchAnnotateFilesRequest)
      returns (BatchAnnotateFilesResponse) {
    option (google.api.http) = {
      post: "/v1p4beta1/files:annotate"
      body: "*"
    };
    option (google.api.method_signature) = "requests";
  }

  // Run asynchronous image detection and annotation for a list of images.
  //
  // Progress and results can be retrieved through the
  // `google.longrunning.Operations` interface.
  // `Operation.metadata` contains `OperationMetadata` (metadata).
  // `Operation.response` contains `AsyncBatchAnnotateImagesResponse` (results).
  //
  // This service will write image annotation outputs to json files in customer
  // GCS bucket, each json file containing BatchAnnotateImagesResponse proto.
  rpc AsyncBatchAnnotateImages(AsyncBatchAnnotateImagesRequest)
      returns (google.longrunning.Operation) {
    option (google.api.http) = {
      post: "/v1p4beta1/images:asyncBatchAnnotate"
      body: "*"
    };
    option (google.api.method_signature) = "requests,output_config";
    option (google.longrunning.operation_info) = {
      response_type: "AsyncBatchAnnotateImagesResponse"
      metadata_type: "OperationMetadata"
    };
  }

  // Run asynchronous image detection and annotation for a list of generic
  // files, such as PDF files, which may contain multiple pages and multiple
  // images per page. Progress and results can be retrieved through the
  // `google.longrunning.Operations` interface.
  // `Operation.metadata` contains `OperationMetadata` (metadata).
  // `Operation.response` contains `AsyncBatchAnnotateFilesResponse` (results).
  rpc AsyncBatchAnnotateFiles(AsyncBatchAnnotateFilesRequest)
      returns (google.longrunning.Operation) {
    option (google.api.http) = {
      post: "/v1p4beta1/files:asyncBatchAnnotate"
      body: "*"
    };
    option (google.api.method_signature) = "requests";
    option (google.longrunning.operation_info) = {
      response_type: "AsyncBatchAnnotateFilesResponse"
      metadata_type: "OperationMetadata"
    };
  }
}

// The type of Google Cloud Vision API detection to perform, and the maximum
// number of results to return for that type. Multiple `Feature` objects can
// be specified in the `features` list.
message Feature {
  // Type of Google Cloud Vision API feature to be extracted.
  enum Type {
    // Unspecified feature type.
    TYPE_UNSPECIFIED = 0;

    // Run face detection.
    FACE_DETECTION = 1;

    // Run landmark detection.
    LANDMARK_DETECTION = 2;

    // Run logo detection.
    LOGO_DETECTION = 3;

    // Run label detection.
    LABEL_DETECTION = 4;

    // Run text detection / optical character recognition (OCR). Text detection
    // is optimized for areas of text within a larger image; if the image is
    // a document, use `DOCUMENT_TEXT_DETECTION` instead.
    TEXT_DETECTION = 5;

    // Run dense text document OCR. Takes precedence when both
    // `DOCUMENT_TEXT_DETECTION` and `TEXT_DETECTION` are present.
    DOCUMENT_TEXT_DETECTION = 11;

    // Run Safe Search to detect potentially unsafe
    // or undesirable content.
    SAFE_SEARCH_DETECTION = 6;

    // Compute a set of image properties, such as the
    // image's dominant colors.
    IMAGE_PROPERTIES = 7;

    // Run crop hints.
    CROP_HINTS = 9;

    // Run web detection.
    WEB_DETECTION = 10;

    // Run Product Search.
    PRODUCT_SEARCH = 12;

    // Run localizer for object detection.
    OBJECT_LOCALIZATION = 19;
  }

  // The feature type.
  Type type = 1;

  // Maximum number of results of this type. Does not apply to
  // `TEXT_DETECTION`, `DOCUMENT_TEXT_DETECTION`, or `CROP_HINTS`.
  int32 max_results = 2;

  // Model to use for the feature.
  // Supported values: "builtin/stable" (the default if unset) and
  // "builtin/latest". `DOCUMENT_TEXT_DETECTION` and `TEXT_DETECTION` also
  // support "builtin/weekly" for the bleeding edge release updated weekly.
  string model = 3;
}

// External image source (Google Cloud Storage or web URL image location).
message ImageSource {
  // **Use `image_uri` instead.**
  //
  // The Google Cloud Storage  URI of the form
  // `gs://bucket_name/object_name`. Object versioning is not supported. See
  // [Google Cloud Storage Request
  // URIs](https://cloud.google.com/storage/docs/reference-uris) for more info.
  string gcs_image_uri = 1;

  // The URI of the source image. Can be either:
  //
  // 1. A Google Cloud Storage URI of the form
  //    `gs://bucket_name/object_name`. Object versioning is not supported. See
  //    [Google Cloud Storage Request
  //    URIs](https://cloud.google.com/storage/docs/reference-uris) for more
  //    info.
  //
  // 2. A publicly-accessible image HTTP/HTTPS URL. When fetching images from
  //    HTTP/HTTPS URLs, Google cannot guarantee that the request will be
  //    completed. Your request may fail if the specified host denies the
  //    request (e.g. due to request throttling or DOS prevention), or if Google
  //    throttles requests to the site for abuse prevention. You should not
  //    depend on externally-hosted images for production applications.
  //
  // When both `gcs_image_uri` and `image_uri` are specified, `image_uri` takes
  // precedence.
  string image_uri = 2;
}

// Client image to perform Google Cloud Vision API tasks over.
message Image {
  // Image content, represented as a stream of bytes.
  // Note: As with all `bytes` fields, protobuffers use a pure binary
  // representation, whereas JSON representations use base64.
  bytes content = 1;

  // Google Cloud Storage image location, or publicly-accessible image
  // URL. If both `content` and `source` are provided for an image, `content`
  // takes precedence and is used to perform the image annotation request.
  ImageSource source = 2;
}

// A bucketized representation of likelihood, which is intended to give clients
// highly stable results across model upgrades.
enum Likelihood {
  // Unknown likelihood.
  UNKNOWN = 0;

  // It is very unlikely.
  VERY_UNLIKELY = 1;

  // It is unlikely.
  UNLIKELY = 2;

  // It is possible.
  POSSIBLE = 3;

  // It is likely.
  LIKELY = 4;

  // It is very likely.
  VERY_LIKELY = 5;
}

// A face annotation object contains the results of face detection.
message FaceAnnotation {
  // A face-specific landmark (for example, a face feature).
  message Landmark {
    // Face landmark (feature) type.
    // Left and right are defined from the vantage of the viewer of the image
    // without considering mirror projections typical of photos. So, `LEFT_EYE`,
    // typically, is the person's right eye.
    enum Type {
      // Unknown face landmark detected. Should not be filled.
      UNKNOWN_LANDMARK = 0;

      // Left eye.
      LEFT_EYE = 1;

      // Right eye.
      RIGHT_EYE = 2;

      // Left of left eyebrow.
      LEFT_OF_LEFT_EYEBROW = 3;

      // Right of left eyebrow.
      RIGHT_OF_LEFT_EYEBROW = 4;

      // Left of right eyebrow.
      LEFT_OF_RIGHT_EYEBROW = 5;

      // Right of right eyebrow.
      RIGHT_OF_RIGHT_EYEBROW = 6;

      // Midpoint between eyes.
      MIDPOINT_BETWEEN_EYES = 7;

      // Nose tip.
      NOSE_TIP = 8;

      // Upper lip.
      UPPER_LIP = 9;

      // Lower lip.
      LOWER_LIP = 10;

      // Mouth left.
      MOUTH_LEFT = 11;

      // Mouth right.
      MOUTH_RIGHT = 12;

      // Mouth center.
      MOUTH_CENTER = 13;

      // Nose, bottom right.
      NOSE_BOTTOM_RIGHT = 14;

      // Nose, bottom left.
      NOSE_BOTTOM_LEFT = 15;

      // Nose, bottom center.
      NOSE_BOTTOM_CENTER = 16;

      // Left eye, top boundary.
      LEFT_EYE_TOP_BOUNDARY = 17;

      // Left eye, right corner.
      LEFT_EYE_RIGHT_CORNER = 18;

      // Left eye, bottom boundary.
      LEFT_EYE_BOTTOM_BOUNDARY = 19;

      // Left eye, left corner.
      LEFT_EYE_LEFT_CORNER = 20;

      // Right eye, top boundary.
      RIGHT_EYE_TOP_BOUNDARY = 21;

      // Right eye, right corner.
      RIGHT_EYE_RIGHT_CORNER = 22;

      // Right eye, bottom boundary.
      RIGHT_EYE_BOTTOM_BOUNDARY = 23;

      // Right eye, left corner.
      RIGHT_EYE_LEFT_CORNER = 24;

      // Left eyebrow, upper midpoint.
      LEFT_EYEBROW_UPPER_MIDPOINT = 25;

      // Right eyebrow, upper midpoint.
      RIGHT_EYEBROW_UPPER_MIDPOINT = 26;

      // Left ear tragion.
      LEFT_EAR_TRAGION = 27;

      // Right ear tragion.
      RIGHT_EAR_TRAGION = 28;

      // Left eye pupil.
      LEFT_EYE_PUPIL = 29;

      // Right eye pupil.
      RIGHT_EYE_PUPIL = 30;

      // Forehead glabella.
      FOREHEAD_GLABELLA = 31;

      // Chin gnathion.
      CHIN_GNATHION = 32;

      // Chin left gonion.
      CHIN_LEFT_GONION = 33;

      // Chin right gonion.
      CHIN_RIGHT_GONION = 34;
    }

    // Face landmark type.
    Type type = 3;

    // Face landmark position.
    Position position = 4;
  }

  // The bounding polygon around the face. The coordinates of the bounding box
  // are in the original image's scale.
  // The bounding box is computed to "frame" the face in accordance with human
  // expectations. It is based on the landmarker results.
  // Note that one or more x and/or y coordinates may not be generated in the
  // `BoundingPoly` (the polygon will be unbounded) if only a partial face
  // appears in the image to be annotated.
  BoundingPoly bounding_poly = 1;

  // The `fd_bounding_poly` bounding polygon is tighter than the
  // `boundingPoly`, and encloses only the skin part of the face. Typically, it
  // is used to eliminate the face from any image analysis that detects the
  // "amount of skin" visible in an image. It is not based on the
  // landmarker results, only on the initial face detection, hence
  // the <code>fd</code> (face detection) prefix.
  BoundingPoly fd_bounding_poly = 2;

  // Detected face landmarks.
  repeated Landmark landmarks = 3;

  // Roll angle, which indicates the amount of clockwise/anti-clockwise rotation
  // of the face relative to the image vertical about the axis perpendicular to
  // the face. Range [-180,180].
  float roll_angle = 4;

  // Yaw angle, which indicates the leftward/rightward angle that the face is
  // pointing relative to the vertical plane perpendicular to the image. Range
  // [-180,180].
  float pan_angle = 5;

  // Pitch angle, which indicates the upwards/downwards angle that the face is
  // pointing relative to the image's horizontal plane. Range [-180,180].
  float tilt_angle = 6;

  // Detection confidence. Range [0, 1].
  float detection_confidence = 7;

  // Face landmarking confidence. Range [0, 1].
  float landmarking_confidence = 8;

  // Joy likelihood.
  Likelihood joy_likelihood = 9;

  // Sorrow likelihood.
  Likelihood sorrow_likelihood = 10;

  // Anger likelihood.
  Likelihood anger_likelihood = 11;

  // Surprise likelihood.
  Likelihood surprise_likelihood = 12;

  // Under-exposed likelihood.
  Likelihood under_exposed_likelihood = 13;

  // Blurred likelihood.
  Likelihood blurred_likelihood = 14;

  // Headwear likelihood.
  Likelihood headwear_likelihood = 15;

  // Additional recognition information. Only computed if
  // image_context.face_recognition_params is provided, **and** a match is found
  // to a [Celebrity][google.cloud.vision.v1p4beta1.Celebrity] in the input
  // [CelebritySet][google.cloud.vision.v1p4beta1.CelebritySet]. This field is
  // sorted in order of decreasing confidence values.
  repeated FaceRecognitionResult recognition_result = 16;
}

// Detected entity location information.
message LocationInfo {
  // lat/long location coordinates.
  google.type.LatLng lat_lng = 1;
}

// A `Property` consists of a user-supplied name/value pair.
message Property {
  // Name of the property.
  string name = 1;

  // Value of the property.
  string value = 2;

  // Value of numeric properties.
  uint64 uint64_value = 3;
}

// Set of detected entity features.
message EntityAnnotation {
  // Opaque entity ID. Some IDs may be available in
  // [Google Knowledge Graph Search
  // API](https://developers.google.com/knowledge-graph/).
  string mid = 1;

  // The language code for the locale in which the entity textual
  // `description` is expressed.
  string locale = 2;

  // Entity textual description, expressed in its `locale` language.
  string description = 3;

  // Overall score of the result. Range [0, 1].
  float score = 4;

  // **Deprecated. Use `score` instead.**
  // The accuracy of the entity detection in an image.
  // For example, for an image in which the "Eiffel Tower" entity is detected,
  // this field represents the confidence that there is a tower in the query
  // image. Range [0, 1].
  float confidence = 5 [deprecated = true];

  // The relevancy of the ICA (Image Content Annotation) label to the
  // image. For example, the relevancy of "tower" is likely higher to an image
  // containing the detected "Eiffel Tower" than to an image containing a
  // detected distant towering building, even though the confidence that
  // there is a tower in each image may be the same. Range [0, 1].
  float topicality = 6;

  // Image region to which this entity belongs. Not produced
  // for `LABEL_DETECTION` features.
  BoundingPoly bounding_poly = 7;

  // The location information for the detected entity. Multiple
  // `LocationInfo` elements can be present because one location may
  // indicate the location of the scene in the image, and another location
  // may indicate the location of the place where the image was taken.
  // Location information is usually present for landmarks.
  repeated LocationInfo locations = 8;

  // Some entities may have optional user-supplied `Property` (name/value)
  // fields, such a score or string that qualifies the entity.
  repeated Property properties = 9;
}

// Set of detected objects with bounding boxes.
message LocalizedObjectAnnotation {
  // Object ID that should align with EntityAnnotation mid.
  string mid = 1;

  // The BCP-47 language code, such as "en-US" or "sr-Latn". For more
  // information, see
  // http://www.unicode.org/reports/tr35/#Unicode_locale_identifier.
  string language_code = 2;

  // Object name, expressed in its `language_code` language.
  string name = 3;

  // Score of the result. Range [0, 1].
  float score = 4;

  // Image region to which this object belongs. This must be populated.
  BoundingPoly bounding_poly = 5;
}

// Set of features pertaining to the image, computed by computer vision
// methods over safe-search verticals (for example, adult, spoof, medical,
// violence).
message SafeSearchAnnotation {
  // Represents the adult content likelihood for the image. Adult content may
  // contain elements such as nudity, pornographic images or cartoons, or
  // sexual activities.
  Likelihood adult = 1;

  // Spoof likelihood. The likelihood that an modification
  // was made to the image's canonical version to make it appear
  // funny or offensive.
  Likelihood spoof = 2;

  // Likelihood that this is a medical image.
  Likelihood medical = 3;

  // Likelihood that this image contains violent content.
  Likelihood violence = 4;

  // Likelihood that the request image contains racy content. Racy content may
  // include (but is not limited to) skimpy or sheer clothing, strategically
  // covered nudity, lewd or provocative poses, or close-ups of sensitive
  // body areas.
  Likelihood racy = 9;
}

// Rectangle determined by min and max `LatLng` pairs.
message LatLongRect {
  // Min lat/long pair.
  google.type.LatLng min_lat_lng = 1;

  // Max lat/long pair.
  google.type.LatLng max_lat_lng = 2;
}

// Color information consists of RGB channels, score, and the fraction of
// the image that the color occupies in the image.
message ColorInfo {
  // RGB components of the color.
  google.type.Color color = 1;

  // Image-specific score for this color. Value in range [0, 1].
  float score = 2;

  // The fraction of pixels the color occupies in the image.
  // Value in range [0, 1].
  float pixel_fraction = 3;
}

// Set of dominant colors and their corresponding scores.
message DominantColorsAnnotation {
  // RGB color values with their score and pixel fraction.
  repeated ColorInfo colors = 1;
}

// Stores image properties, such as dominant colors.
message ImageProperties {
  // If present, dominant colors completed successfully.
  DominantColorsAnnotation dominant_colors = 1;
}

// Single crop hint that is used to generate a new crop when serving an image.
message CropHint {
  // The bounding polygon for the crop region. The coordinates of the bounding
  // box are in the original image's scale.
  BoundingPoly bounding_poly = 1;

  // Confidence of this being a salient region.  Range [0, 1].
  float confidence = 2;

  // Fraction of importance of this salient region with respect to the original
  // image.
  float importance_fraction = 3;
}

// Set of crop hints that are used to generate new crops when serving images.
message CropHintsAnnotation {
  // Crop hint results.
  repeated CropHint crop_hints = 1;
}

// Parameters for crop hints annotation request.
message CropHintsParams {
  // Aspect ratios in floats, representing the ratio of the width to the height
  // of the image. For example, if the desired aspect ratio is 4/3, the
  // corresponding float value should be 1.33333.  If not specified, the
  // best possible crop is returned. The number of provided aspect ratios is
  // limited to a maximum of 16; any aspect ratios provided after the 16th are
  // ignored.
  repeated float aspect_ratios = 1;
}

// Parameters for web detection request.
message WebDetectionParams {
  // Whether to include results derived from the geo information in the image.
  bool include_geo_results = 2;
}

// Parameters for text detections. This is used to control TEXT_DETECTION and
// DOCUMENT_TEXT_DETECTION features.
message TextDetectionParams {

  // By default, Cloud Vision API only includes confidence score for
  // DOCUMENT_TEXT_DETECTION result. Set the flag to true to include confidence
  // score for TEXT_DETECTION as well.
  bool enable_text_detection_confidence_score = 9;

  // A list of advanced OCR options to fine-tune OCR behavior.
  repeated string advanced_ocr_options = 11;
}

// Image context and/or feature-specific parameters.
message ImageContext {
  // Not used.
  LatLongRect lat_long_rect = 1;

  // List of languages to use for TEXT_DETECTION. In most cases, an empty value
  // yields the best results since it enables automatic language detection. For
  // languages based on the Latin alphabet, setting `language_hints` is not
  // needed. In rare cases, when the language of the text in the image is known,
  // setting a hint will help get better results (although it will be a
  // significant hindrance if the hint is wrong). Text detection returns an
  // error if one or more of the specified languages is not one of the
  // [supported languages](https://cloud.google.com/vision/docs/languages).
  repeated string language_hints = 2;

  // Parameters for crop hints annotation request.
  CropHintsParams crop_hints_params = 4;

  // Parameters for face recognition.
  FaceRecognitionParams face_recognition_params = 10;

  // Parameters for product search.
  ProductSearchParams product_search_params = 5;

  // Parameters for web detection.
  WebDetectionParams web_detection_params = 6;

  // Parameters for text detection and document text detection.
  TextDetectionParams text_detection_params = 12;
}

// Request for performing Google Cloud Vision API tasks over a user-provided
// image, with user-requested features, and with context information.
message AnnotateImageRequest {
  // The image to be processed.
  Image image = 1;

  // Requested features.
  repeated Feature features = 2;

  // Additional context that may accompany the image.
  ImageContext image_context = 3;
}

// If an image was produced from a file (e.g. a PDF), this message gives
// information about the source of that image.
message ImageAnnotationContext {
  // The URI of the file used to produce the image.
  string uri = 1;

  // If the file was a PDF or TIFF, this field gives the page number within
  // the file used to produce the image.
  int32 page_number = 2;
}

// Response to an image annotation request.
message AnnotateImageResponse {
  // If present, face detection has completed successfully.
  repeated FaceAnnotation face_annotations = 1;

  // If present, landmark detection has completed successfully.
  repeated EntityAnnotation landmark_annotations = 2;

  // If present, logo detection has completed successfully.
  repeated EntityAnnotation logo_annotations = 3;

  // If present, label detection has completed successfully.
  repeated EntityAnnotation label_annotations = 4;

  // If present, localized object detection has completed successfully.
  // This will be sorted descending by confidence score.
  repeated LocalizedObjectAnnotation localized_object_annotations = 22;

  // If present, text (OCR) detection has completed successfully.
  repeated EntityAnnotation text_annotations = 5;

  // If present, text (OCR) detection or document (OCR) text detection has
  // completed successfully.
  // This annotation provides the structural hierarchy for the OCR detected
  // text.
  TextAnnotation full_text_annotation = 12;

  // If present, safe-search annotation has completed successfully.
  SafeSearchAnnotation safe_search_annotation = 6;

  // If present, image properties were extracted successfully.
  ImageProperties image_properties_annotation = 8;

  // If present, crop hints have completed successfully.
  CropHintsAnnotation crop_hints_annotation = 11;

  // If present, web detection has completed successfully.
  WebDetection web_detection = 13;

  // If present, product search has completed successfully.
  ProductSearchResults product_search_results = 14;

  // If set, represents the error message for the operation.
  // Note that filled-in image annotations are guaranteed to be
  // correct, even when `error` is set.
  google.rpc.Status error = 9;

  // If present, contextual information is needed to understand where this image
  // comes from.
  ImageAnnotationContext context = 21;
}

// Multiple image annotation requests are batched into a single service call.
message BatchAnnotateImagesRequest {
  // Required. Individual image annotation requests for this batch.
  repeated AnnotateImageRequest requests = 1 [(google.api.field_behavior) = REQUIRED];
}

// Response to a batch image annotation request.
message BatchAnnotateImagesResponse {
  // Individual responses to image annotation requests within the batch.
  repeated AnnotateImageResponse responses = 1;
}

// A request to annotate one single file, e.g. a PDF, TIFF or GIF file.
message AnnotateFileRequest {
  // Required. Information about the input file.
  InputConfig input_config = 1;

  // Required. Requested features.
  repeated Feature features = 2;

  // Additional context that may accompany the image(s) in the file.
  ImageContext image_context = 3;

  // Pages of the file to perform image annotation.
  //
  // Pages starts from 1, we assume the first page of the file is page 1.
  // At most 5 pages are supported per request. Pages can be negative.
  //
  // Page 1 means the first page.
  // Page 2 means the second page.
  // Page -1 means the last page.
  // Page -2 means the second to the last page.
  //
  // If the file is GIF instead of PDF or TIFF, page refers to GIF frames.
  //
  // If this field is empty, by default the service performs image annotation
  // for the first 5 pages of the file.
  repeated int32 pages = 4;
}

// Response to a single file annotation request. A file may contain one or more
// images, which individually have their own responses.
message AnnotateFileResponse {
  // Information about the file for which this response is generated.
  InputConfig input_config = 1;

  // Individual responses to images found within the file. This field will be
  // empty if the `error` field is set.
  repeated AnnotateImageResponse responses = 2;

  // This field gives the total number of pages in the file.
  int32 total_pages = 3;

  // If set, represents the error message for the failed request. The
  // `responses` field will not be set in this case.
  google.rpc.Status error = 4;
}

// A list of requests to annotate files using the BatchAnnotateFiles API.
message BatchAnnotateFilesRequest {
  // Required. The list of file annotation requests. Right now we support only
  // one AnnotateFileRequest in BatchAnnotateFilesRequest.
  repeated AnnotateFileRequest requests = 1
      [(google.api.field_behavior) = REQUIRED];
}

// A list of file annotation responses.
message BatchAnnotateFilesResponse {
  // The list of file annotation responses, each response corresponding to each
  // AnnotateFileRequest in BatchAnnotateFilesRequest.
  repeated AnnotateFileResponse responses = 1;
}

// An offline file annotation request.
message AsyncAnnotateFileRequest {
  // Required. Information about the input file.
  InputConfig input_config = 1;

  // Required. Requested features.
  repeated Feature features = 2;

  // Additional context that may accompany the image(s) in the file.
  ImageContext image_context = 3;

  // Required. The desired output location and metadata (e.g. format).
  OutputConfig output_config = 4;
}

// The response for a single offline file annotation request.
message AsyncAnnotateFileResponse {
  // The output location and metadata from AsyncAnnotateFileRequest.
  OutputConfig output_config = 1;
}

// Request for async image annotation for a list of images.
message AsyncBatchAnnotateImagesRequest {
  // Required. Individual image annotation requests for this batch.
  repeated AnnotateImageRequest requests = 1
      [(google.api.field_behavior) = REQUIRED];

  // Required. The desired output location and metadata (e.g. format).
  OutputConfig output_config = 2 [(google.api.field_behavior) = REQUIRED];
}

// Response to an async batch image annotation request.
message AsyncBatchAnnotateImagesResponse {
  // The output location and metadata from AsyncBatchAnnotateImagesRequest.
  OutputConfig output_config = 1;
}

// Multiple async file annotation requests are batched into a single service
// call.
message AsyncBatchAnnotateFilesRequest {
  // Required. Individual async file annotation requests for this batch.
  repeated AsyncAnnotateFileRequest requests = 1
      [(google.api.field_behavior) = REQUIRED];
}

// Response to an async batch file annotation request.
message AsyncBatchAnnotateFilesResponse {
  // The list of file annotation responses, one for each request in
  // AsyncBatchAnnotateFilesRequest.
  repeated AsyncAnnotateFileResponse responses = 1;
}

// The desired input location and metadata.
message InputConfig {
  // The Google Cloud Storage location to read the input from.
  GcsSource gcs_source = 1;

  // File content, represented as a stream of bytes.
  // Note: As with all `bytes` fields, protobuffers use a pure binary
  // representation, whereas JSON representations use base64.
  //
  // Currently, this field only works for BatchAnnotateFiles requests. It does
  // not work for AsyncBatchAnnotateFiles requests.
  bytes content = 3;

  // The type of the file. Currently only "application/pdf", "image/tiff" and
  // "image/gif" are supported. Wildcards are not supported.
  string mime_type = 2;
}

// The desired output location and metadata.
message OutputConfig {
  // The Google Cloud Storage location to write the output(s) to.
  GcsDestination gcs_destination = 1;

  // The max number of response protos to put into each output JSON file on
  // Google Cloud Storage.
  // The valid range is [1, 100]. If not specified, the default value is 20.
  //
  // For example, for one pdf file with 100 pages, 100 response protos will
  // be generated. If `batch_size` = 20, then 5 json files each
  // containing 20 response protos will be written under the prefix
  // `gcs_destination`.`uri`.
  //
  // Currently, batch_size only applies to GcsDestination, with potential future
  // support for other output configurations.
  int32 batch_size = 2;
}

// The Google Cloud Storage location where the input will be read from.
message GcsSource {
  // Google Cloud Storage URI for the input file. This must only be a
  // Google Cloud Storage object. Wildcards are not currently supported.
  string uri = 1;
}

// The Google Cloud Storage location where the output will be written to.
message GcsDestination {
  // Google Cloud Storage URI prefix where the results will be stored. Results
  // will be in JSON format and preceded by its corresponding input URI prefix.
  // This field can either represent a gcs file prefix or gcs directory. In
  // either case, the uri should be unique because in order to get all of the
  // output files, you will need to do a wildcard gcs search on the uri prefix
  // you provide.
  //
  // Examples:
  //
  // *    File Prefix: gs://bucket-name/here/filenameprefix   The output files
  // will be created in gs://bucket-name/here/ and the names of the
  // output files will begin with "filenameprefix".
  //
  // *    Directory Prefix: gs://bucket-name/some/location/   The output files
  // will be created in gs://bucket-name/some/location/ and the names of the
  // output files could be anything because there was no filename prefix
  // specified.
  //
  // If multiple outputs, each response is still AnnotateFileResponse, each of
  // which contains some subset of the full list of AnnotateImageResponse.
  // Multiple outputs can happen if, for example, the output JSON is too large
  // and overflows into multiple sharded files.
  string uri = 1;
}

// Contains metadata for the BatchAnnotateImages operation.
message OperationMetadata {
  // Batch operation states.
  enum State {
    // Invalid.
    STATE_UNSPECIFIED = 0;

    // Request is received.
    CREATED = 1;

    // Request is actively being processed.
    RUNNING = 2;

    // The batch processing is done.
    DONE = 3;

    // The batch processing was cancelled.
    CANCELLED = 4;
  }

  // Current state of the batch operation.
  State state = 1;

  // The time when the batch request was received.
  google.protobuf.Timestamp create_time = 5;

  // The time when the operation result was last updated.
  google.protobuf.Timestamp update_time = 6;
}