xref: /aosp_15_r20/external/googleapis/google/cloud/vision/v1/image_annotator.proto (revision d5c09012810ac0c9f33fe448fb6da8260d444cc9)
1// Copyright 2023 Google LLC
2//
3// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at
6//
7//     http://www.apache.org/licenses/LICENSE-2.0
8//
9// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS,
11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12// See the License for the specific language governing permissions and
13// limitations under the License.
14
15syntax = "proto3";
16
17package google.cloud.vision.v1;
18
19import "google/api/annotations.proto";
20import "google/api/client.proto";
21import "google/api/field_behavior.proto";
22import "google/cloud/vision/v1/geometry.proto";
23import "google/cloud/vision/v1/product_search.proto";
24import "google/cloud/vision/v1/text_annotation.proto";
25import "google/cloud/vision/v1/web_detection.proto";
26import "google/longrunning/operations.proto";
27import "google/protobuf/timestamp.proto";
28import "google/rpc/status.proto";
29import "google/type/color.proto";
30import "google/type/latlng.proto";
31
32option cc_enable_arenas = true;
33option go_package = "cloud.google.com/go/vision/v2/apiv1/visionpb;visionpb";
34option java_multiple_files = true;
35option java_outer_classname = "ImageAnnotatorProto";
36option java_package = "com.google.cloud.vision.v1";
37option objc_class_prefix = "GCVN";
38
39// Service that performs Google Cloud Vision API detection tasks over client
40// images, such as face, landmark, logo, label, and text detection. The
41// ImageAnnotator service returns detected entities from the images.
42service ImageAnnotator {
43  option (google.api.default_host) = "vision.googleapis.com";
44  option (google.api.oauth_scopes) =
45      "https://www.googleapis.com/auth/cloud-platform,"
46      "https://www.googleapis.com/auth/cloud-vision";
47
48  // Run image detection and annotation for a batch of images.
49  rpc BatchAnnotateImages(BatchAnnotateImagesRequest)
50      returns (BatchAnnotateImagesResponse) {
51    option (google.api.http) = {
52      post: "/v1/images:annotate"
53      body: "*"
54      additional_bindings {
55        post: "/v1/{parent=projects/*/locations/*}/images:annotate"
56        body: "*"
57      }
58      additional_bindings {
59        post: "/v1/{parent=projects/*}/images:annotate"
60        body: "*"
61      }
62    };
63    option (google.api.method_signature) = "requests";
64  }
65
66  // Service that performs image detection and annotation for a batch of files.
67  // Now only "application/pdf", "image/tiff" and "image/gif" are supported.
68  //
69  // This service will extract at most 5 (customers can specify which 5 in
70  // AnnotateFileRequest.pages) frames (gif) or pages (pdf or tiff) from each
71  // file provided and perform detection and annotation for each image
72  // extracted.
73  rpc BatchAnnotateFiles(BatchAnnotateFilesRequest)
74      returns (BatchAnnotateFilesResponse) {
75    option (google.api.http) = {
76      post: "/v1/files:annotate"
77      body: "*"
78      additional_bindings {
79        post: "/v1/{parent=projects/*/locations/*}/files:annotate"
80        body: "*"
81      }
82      additional_bindings {
83        post: "/v1/{parent=projects/*}/files:annotate"
84        body: "*"
85      }
86    };
87    option (google.api.method_signature) = "requests";
88  }
89
90  // Run asynchronous image detection and annotation for a list of images.
91  //
92  // Progress and results can be retrieved through the
93  // `google.longrunning.Operations` interface.
94  // `Operation.metadata` contains `OperationMetadata` (metadata).
95  // `Operation.response` contains `AsyncBatchAnnotateImagesResponse` (results).
96  //
97  // This service will write image annotation outputs to json files in customer
98  // GCS bucket, each json file containing BatchAnnotateImagesResponse proto.
99  rpc AsyncBatchAnnotateImages(AsyncBatchAnnotateImagesRequest)
100      returns (google.longrunning.Operation) {
101    option (google.api.http) = {
102      post: "/v1/images:asyncBatchAnnotate"
103      body: "*"
104      additional_bindings {
105        post: "/v1/{parent=projects/*/locations/*}/images:asyncBatchAnnotate"
106        body: "*"
107      }
108      additional_bindings {
109        post: "/v1/{parent=projects/*}/images:asyncBatchAnnotate"
110        body: "*"
111      }
112    };
113    option (google.api.method_signature) = "requests,output_config";
114    option (google.longrunning.operation_info) = {
115      response_type: "AsyncBatchAnnotateImagesResponse"
116      metadata_type: "OperationMetadata"
117    };
118  }
119
120  // Run asynchronous image detection and annotation for a list of generic
121  // files, such as PDF files, which may contain multiple pages and multiple
122  // images per page. Progress and results can be retrieved through the
123  // `google.longrunning.Operations` interface.
124  // `Operation.metadata` contains `OperationMetadata` (metadata).
125  // `Operation.response` contains `AsyncBatchAnnotateFilesResponse` (results).
126  rpc AsyncBatchAnnotateFiles(AsyncBatchAnnotateFilesRequest)
127      returns (google.longrunning.Operation) {
128    option (google.api.http) = {
129      post: "/v1/files:asyncBatchAnnotate"
130      body: "*"
131      additional_bindings {
132        post: "/v1/{parent=projects/*/locations/*}/files:asyncBatchAnnotate"
133        body: "*"
134      }
135      additional_bindings {
136        post: "/v1/{parent=projects/*}/files:asyncBatchAnnotate"
137        body: "*"
138      }
139    };
140    option (google.api.method_signature) = "requests";
141    option (google.longrunning.operation_info) = {
142      response_type: "AsyncBatchAnnotateFilesResponse"
143      metadata_type: "OperationMetadata"
144    };
145  }
146}
147
148// A bucketized representation of likelihood, which is intended to give clients
149// highly stable results across model upgrades.
150enum Likelihood {
151  // Unknown likelihood.
152  UNKNOWN = 0;
153
154  // It is very unlikely.
155  VERY_UNLIKELY = 1;
156
157  // It is unlikely.
158  UNLIKELY = 2;
159
160  // It is possible.
161  POSSIBLE = 3;
162
163  // It is likely.
164  LIKELY = 4;
165
166  // It is very likely.
167  VERY_LIKELY = 5;
168}
169
170// The type of Google Cloud Vision API detection to perform, and the maximum
171// number of results to return for that type. Multiple `Feature` objects can
172// be specified in the `features` list.
173message Feature {
174  // Type of Google Cloud Vision API feature to be extracted.
175  enum Type {
176    // Unspecified feature type.
177    TYPE_UNSPECIFIED = 0;
178
179    // Run face detection.
180    FACE_DETECTION = 1;
181
182    // Run landmark detection.
183    LANDMARK_DETECTION = 2;
184
185    // Run logo detection.
186    LOGO_DETECTION = 3;
187
188    // Run label detection.
189    LABEL_DETECTION = 4;
190
191    // Run text detection / optical character recognition (OCR). Text detection
192    // is optimized for areas of text within a larger image; if the image is
193    // a document, use `DOCUMENT_TEXT_DETECTION` instead.
194    TEXT_DETECTION = 5;
195
196    // Run dense text document OCR. Takes precedence when both
197    // `DOCUMENT_TEXT_DETECTION` and `TEXT_DETECTION` are present.
198    DOCUMENT_TEXT_DETECTION = 11;
199
200    // Run Safe Search to detect potentially unsafe
201    // or undesirable content.
202    SAFE_SEARCH_DETECTION = 6;
203
204    // Compute a set of image properties, such as the
205    // image's dominant colors.
206    IMAGE_PROPERTIES = 7;
207
208    // Run crop hints.
209    CROP_HINTS = 9;
210
211    // Run web detection.
212    WEB_DETECTION = 10;
213
214    // Run Product Search.
215    PRODUCT_SEARCH = 12;
216
217    // Run localizer for object detection.
218    OBJECT_LOCALIZATION = 19;
219  }
220
221  // The feature type.
222  Type type = 1;
223
224  // Maximum number of results of this type. Does not apply to
225  // `TEXT_DETECTION`, `DOCUMENT_TEXT_DETECTION`, or `CROP_HINTS`.
226  int32 max_results = 2;
227
228  // Model to use for the feature.
229  // Supported values: "builtin/stable" (the default if unset) and
230  // "builtin/latest". `DOCUMENT_TEXT_DETECTION` and `TEXT_DETECTION` also
231  // support "builtin/weekly" for the bleeding edge release updated weekly.
232  string model = 3;
233}
234
235// External image source (Google Cloud Storage or web URL image location).
236message ImageSource {
237  // **Use `image_uri` instead.**
238  //
239  // The Google Cloud Storage  URI of the form
240  // `gs://bucket_name/object_name`. Object versioning is not supported. See
241  // [Google Cloud Storage Request
242  // URIs](https://cloud.google.com/storage/docs/reference-uris) for more info.
243  string gcs_image_uri = 1;
244
245  // The URI of the source image. Can be either:
246  //
247  // 1. A Google Cloud Storage URI of the form
248  //    `gs://bucket_name/object_name`. Object versioning is not supported. See
249  //    [Google Cloud Storage Request
250  //    URIs](https://cloud.google.com/storage/docs/reference-uris) for more
251  //    info.
252  //
253  // 2. A publicly-accessible image HTTP/HTTPS URL. When fetching images from
254  //    HTTP/HTTPS URLs, Google cannot guarantee that the request will be
255  //    completed. Your request may fail if the specified host denies the
256  //    request (e.g. due to request throttling or DOS prevention), or if Google
257  //    throttles requests to the site for abuse prevention. You should not
258  //    depend on externally-hosted images for production applications.
259  //
260  // When both `gcs_image_uri` and `image_uri` are specified, `image_uri` takes
261  // precedence.
262  string image_uri = 2;
263}
264
265// Client image to perform Google Cloud Vision API tasks over.
266message Image {
267  // Image content, represented as a stream of bytes.
268  // Note: As with all `bytes` fields, protobuffers use a pure binary
269  // representation, whereas JSON representations use base64.
270  //
271  // Currently, this field only works for BatchAnnotateImages requests. It does
272  // not work for AsyncBatchAnnotateImages requests.
273  bytes content = 1;
274
275  // Google Cloud Storage image location, or publicly-accessible image
276  // URL. If both `content` and `source` are provided for an image, `content`
277  // takes precedence and is used to perform the image annotation request.
278  ImageSource source = 2;
279}
280
281// A face annotation object contains the results of face detection.
282message FaceAnnotation {
283  // A face-specific landmark (for example, a face feature).
284  message Landmark {
285    // Face landmark (feature) type.
286    // Left and right are defined from the vantage of the viewer of the image
287    // without considering mirror projections typical of photos. So, `LEFT_EYE`,
288    // typically, is the person's right eye.
289    enum Type {
290      // Unknown face landmark detected. Should not be filled.
291      UNKNOWN_LANDMARK = 0;
292
293      // Left eye.
294      LEFT_EYE = 1;
295
296      // Right eye.
297      RIGHT_EYE = 2;
298
299      // Left of left eyebrow.
300      LEFT_OF_LEFT_EYEBROW = 3;
301
302      // Right of left eyebrow.
303      RIGHT_OF_LEFT_EYEBROW = 4;
304
305      // Left of right eyebrow.
306      LEFT_OF_RIGHT_EYEBROW = 5;
307
308      // Right of right eyebrow.
309      RIGHT_OF_RIGHT_EYEBROW = 6;
310
311      // Midpoint between eyes.
312      MIDPOINT_BETWEEN_EYES = 7;
313
314      // Nose tip.
315      NOSE_TIP = 8;
316
317      // Upper lip.
318      UPPER_LIP = 9;
319
320      // Lower lip.
321      LOWER_LIP = 10;
322
323      // Mouth left.
324      MOUTH_LEFT = 11;
325
326      // Mouth right.
327      MOUTH_RIGHT = 12;
328
329      // Mouth center.
330      MOUTH_CENTER = 13;
331
332      // Nose, bottom right.
333      NOSE_BOTTOM_RIGHT = 14;
334
335      // Nose, bottom left.
336      NOSE_BOTTOM_LEFT = 15;
337
338      // Nose, bottom center.
339      NOSE_BOTTOM_CENTER = 16;
340
341      // Left eye, top boundary.
342      LEFT_EYE_TOP_BOUNDARY = 17;
343
344      // Left eye, right corner.
345      LEFT_EYE_RIGHT_CORNER = 18;
346
347      // Left eye, bottom boundary.
348      LEFT_EYE_BOTTOM_BOUNDARY = 19;
349
350      // Left eye, left corner.
351      LEFT_EYE_LEFT_CORNER = 20;
352
353      // Right eye, top boundary.
354      RIGHT_EYE_TOP_BOUNDARY = 21;
355
356      // Right eye, right corner.
357      RIGHT_EYE_RIGHT_CORNER = 22;
358
359      // Right eye, bottom boundary.
360      RIGHT_EYE_BOTTOM_BOUNDARY = 23;
361
362      // Right eye, left corner.
363      RIGHT_EYE_LEFT_CORNER = 24;
364
365      // Left eyebrow, upper midpoint.
366      LEFT_EYEBROW_UPPER_MIDPOINT = 25;
367
368      // Right eyebrow, upper midpoint.
369      RIGHT_EYEBROW_UPPER_MIDPOINT = 26;
370
371      // Left ear tragion.
372      LEFT_EAR_TRAGION = 27;
373
374      // Right ear tragion.
375      RIGHT_EAR_TRAGION = 28;
376
377      // Left eye pupil.
378      LEFT_EYE_PUPIL = 29;
379
380      // Right eye pupil.
381      RIGHT_EYE_PUPIL = 30;
382
383      // Forehead glabella.
384      FOREHEAD_GLABELLA = 31;
385
386      // Chin gnathion.
387      CHIN_GNATHION = 32;
388
389      // Chin left gonion.
390      CHIN_LEFT_GONION = 33;
391
392      // Chin right gonion.
393      CHIN_RIGHT_GONION = 34;
394
395      // Left cheek center.
396      LEFT_CHEEK_CENTER = 35;
397
398      // Right cheek center.
399      RIGHT_CHEEK_CENTER = 36;
400    }
401
402    // Face landmark type.
403    Type type = 3;
404
405    // Face landmark position.
406    Position position = 4;
407  }
408
409  // The bounding polygon around the face. The coordinates of the bounding box
410  // are in the original image's scale.
411  // The bounding box is computed to "frame" the face in accordance with human
412  // expectations. It is based on the landmarker results.
413  // Note that one or more x and/or y coordinates may not be generated in the
414  // `BoundingPoly` (the polygon will be unbounded) if only a partial face
415  // appears in the image to be annotated.
416  BoundingPoly bounding_poly = 1;
417
418  // The `fd_bounding_poly` bounding polygon is tighter than the
419  // `boundingPoly`, and encloses only the skin part of the face. Typically, it
420  // is used to eliminate the face from any image analysis that detects the
421  // "amount of skin" visible in an image. It is not based on the
422  // landmarker results, only on the initial face detection, hence
423  // the <code>fd</code> (face detection) prefix.
424  BoundingPoly fd_bounding_poly = 2;
425
426  // Detected face landmarks.
427  repeated Landmark landmarks = 3;
428
429  // Roll angle, which indicates the amount of clockwise/anti-clockwise rotation
430  // of the face relative to the image vertical about the axis perpendicular to
431  // the face. Range [-180,180].
432  float roll_angle = 4;
433
434  // Yaw angle, which indicates the leftward/rightward angle that the face is
435  // pointing relative to the vertical plane perpendicular to the image. Range
436  // [-180,180].
437  float pan_angle = 5;
438
439  // Pitch angle, which indicates the upwards/downwards angle that the face is
440  // pointing relative to the image's horizontal plane. Range [-180,180].
441  float tilt_angle = 6;
442
443  // Detection confidence. Range [0, 1].
444  float detection_confidence = 7;
445
446  // Face landmarking confidence. Range [0, 1].
447  float landmarking_confidence = 8;
448
449  // Joy likelihood.
450  Likelihood joy_likelihood = 9;
451
452  // Sorrow likelihood.
453  Likelihood sorrow_likelihood = 10;
454
455  // Anger likelihood.
456  Likelihood anger_likelihood = 11;
457
458  // Surprise likelihood.
459  Likelihood surprise_likelihood = 12;
460
461  // Under-exposed likelihood.
462  Likelihood under_exposed_likelihood = 13;
463
464  // Blurred likelihood.
465  Likelihood blurred_likelihood = 14;
466
467  // Headwear likelihood.
468  Likelihood headwear_likelihood = 15;
469}
470
471// Detected entity location information.
472message LocationInfo {
473  // lat/long location coordinates.
474  google.type.LatLng lat_lng = 1;
475}
476
477// A `Property` consists of a user-supplied name/value pair.
478message Property {
479  // Name of the property.
480  string name = 1;
481
482  // Value of the property.
483  string value = 2;
484
485  // Value of numeric properties.
486  uint64 uint64_value = 3;
487}
488
489// Set of detected entity features.
490message EntityAnnotation {
491  // Opaque entity ID. Some IDs may be available in
492  // [Google Knowledge Graph Search
493  // API](https://developers.google.com/knowledge-graph/).
494  string mid = 1;
495
496  // The language code for the locale in which the entity textual
497  // `description` is expressed.
498  string locale = 2;
499
500  // Entity textual description, expressed in its `locale` language.
501  string description = 3;
502
503  // Overall score of the result. Range [0, 1].
504  float score = 4;
505
506  // **Deprecated. Use `score` instead.**
507  // The accuracy of the entity detection in an image.
508  // For example, for an image in which the "Eiffel Tower" entity is detected,
509  // this field represents the confidence that there is a tower in the query
510  // image. Range [0, 1].
511  float confidence = 5 [deprecated = true];
512
513  // The relevancy of the ICA (Image Content Annotation) label to the
514  // image. For example, the relevancy of "tower" is likely higher to an image
515  // containing the detected "Eiffel Tower" than to an image containing a
516  // detected distant towering building, even though the confidence that
517  // there is a tower in each image may be the same. Range [0, 1].
518  float topicality = 6;
519
520  // Image region to which this entity belongs. Not produced
521  // for `LABEL_DETECTION` features.
522  BoundingPoly bounding_poly = 7;
523
524  // The location information for the detected entity. Multiple
525  // `LocationInfo` elements can be present because one location may
526  // indicate the location of the scene in the image, and another location
527  // may indicate the location of the place where the image was taken.
528  // Location information is usually present for landmarks.
529  repeated LocationInfo locations = 8;
530
531  // Some entities may have optional user-supplied `Property` (name/value)
532  // fields, such a score or string that qualifies the entity.
533  repeated Property properties = 9;
534}
535
536// Set of detected objects with bounding boxes.
537message LocalizedObjectAnnotation {
538  // Object ID that should align with EntityAnnotation mid.
539  string mid = 1;
540
541  // The BCP-47 language code, such as "en-US" or "sr-Latn". For more
542  // information, see
543  // http://www.unicode.org/reports/tr35/#Unicode_locale_identifier.
544  string language_code = 2;
545
546  // Object name, expressed in its `language_code` language.
547  string name = 3;
548
549  // Score of the result. Range [0, 1].
550  float score = 4;
551
552  // Image region to which this object belongs. This must be populated.
553  BoundingPoly bounding_poly = 5;
554}
555
556// Set of features pertaining to the image, computed by computer vision
557// methods over safe-search verticals (for example, adult, spoof, medical,
558// violence).
559message SafeSearchAnnotation {
560  // Represents the adult content likelihood for the image. Adult content may
561  // contain elements such as nudity, pornographic images or cartoons, or
562  // sexual activities.
563  Likelihood adult = 1;
564
565  // Spoof likelihood. The likelihood that an modification
566  // was made to the image's canonical version to make it appear
567  // funny or offensive.
568  Likelihood spoof = 2;
569
570  // Likelihood that this is a medical image.
571  Likelihood medical = 3;
572
573  // Likelihood that this image contains violent content. Violent content may
574  // include death, serious harm, or injury to individuals or groups of
575  // individuals.
576  Likelihood violence = 4;
577
578  // Likelihood that the request image contains racy content. Racy content may
579  // include (but is not limited to) skimpy or sheer clothing, strategically
580  // covered nudity, lewd or provocative poses, or close-ups of sensitive
581  // body areas.
582  Likelihood racy = 9;
583}
584
585// Rectangle determined by min and max `LatLng` pairs.
586message LatLongRect {
587  // Min lat/long pair.
588  google.type.LatLng min_lat_lng = 1;
589
590  // Max lat/long pair.
591  google.type.LatLng max_lat_lng = 2;
592}
593
594// Color information consists of RGB channels, score, and the fraction of
595// the image that the color occupies in the image.
596message ColorInfo {
597  // RGB components of the color.
598  google.type.Color color = 1;
599
600  // Image-specific score for this color. Value in range [0, 1].
601  float score = 2;
602
603  // The fraction of pixels the color occupies in the image.
604  // Value in range [0, 1].
605  float pixel_fraction = 3;
606}
607
608// Set of dominant colors and their corresponding scores.
609message DominantColorsAnnotation {
610  // RGB color values with their score and pixel fraction.
611  repeated ColorInfo colors = 1;
612}
613
614// Stores image properties, such as dominant colors.
615message ImageProperties {
616  // If present, dominant colors completed successfully.
617  DominantColorsAnnotation dominant_colors = 1;
618}
619
620// Single crop hint that is used to generate a new crop when serving an image.
621message CropHint {
622  // The bounding polygon for the crop region. The coordinates of the bounding
623  // box are in the original image's scale.
624  BoundingPoly bounding_poly = 1;
625
626  // Confidence of this being a salient region.  Range [0, 1].
627  float confidence = 2;
628
629  // Fraction of importance of this salient region with respect to the original
630  // image.
631  float importance_fraction = 3;
632}
633
634// Set of crop hints that are used to generate new crops when serving images.
635message CropHintsAnnotation {
636  // Crop hint results.
637  repeated CropHint crop_hints = 1;
638}
639
640// Parameters for crop hints annotation request.
641message CropHintsParams {
642  // Aspect ratios in floats, representing the ratio of the width to the height
643  // of the image. For example, if the desired aspect ratio is 4/3, the
644  // corresponding float value should be 1.33333.  If not specified, the
645  // best possible crop is returned. The number of provided aspect ratios is
646  // limited to a maximum of 16; any aspect ratios provided after the 16th are
647  // ignored.
648  repeated float aspect_ratios = 1;
649}
650
651// Parameters for web detection request.
652message WebDetectionParams {
653  // This field has no effect on results.
654  bool include_geo_results = 2 [deprecated = true];
655}
656
657// Parameters for text detections. This is used to control TEXT_DETECTION and
658// DOCUMENT_TEXT_DETECTION features.
659message TextDetectionParams {
660  // By default, Cloud Vision API only includes confidence score for
661  // DOCUMENT_TEXT_DETECTION result. Set the flag to true to include confidence
662  // score for TEXT_DETECTION as well.
663  bool enable_text_detection_confidence_score = 9;
664
665  // A list of advanced OCR options to further fine-tune OCR behavior.
666  // Current valid values are:
667  //
668  // - `legacy_layout`: a heuristics layout detection algorithm, which serves as
669  // an alternative to the current ML-based layout detection algorithm.
670  // Customers can choose the best suitable layout algorithm based on their
671  // situation.
672  repeated string advanced_ocr_options = 11;
673}
674
675// Image context and/or feature-specific parameters.
676message ImageContext {
677  // Not used.
678  LatLongRect lat_long_rect = 1;
679
680  // List of languages to use for TEXT_DETECTION. In most cases, an empty value
681  // yields the best results since it enables automatic language detection. For
682  // languages based on the Latin alphabet, setting `language_hints` is not
683  // needed. In rare cases, when the language of the text in the image is known,
684  // setting a hint will help get better results (although it will be a
685  // significant hindrance if the hint is wrong). Text detection returns an
686  // error if one or more of the specified languages is not one of the
687  // [supported languages](https://cloud.google.com/vision/docs/languages).
688  repeated string language_hints = 2;
689
690  // Parameters for crop hints annotation request.
691  CropHintsParams crop_hints_params = 4;
692
693  // Parameters for product search.
694  ProductSearchParams product_search_params = 5;
695
696  // Parameters for web detection.
697  WebDetectionParams web_detection_params = 6;
698
699  // Parameters for text detection and document text detection.
700  TextDetectionParams text_detection_params = 12;
701}
702
703// Request for performing Google Cloud Vision API tasks over a user-provided
704// image, with user-requested features, and with context information.
705message AnnotateImageRequest {
706  // The image to be processed.
707  Image image = 1;
708
709  // Requested features.
710  repeated Feature features = 2;
711
712  // Additional context that may accompany the image.
713  ImageContext image_context = 3;
714}
715
716// If an image was produced from a file (e.g. a PDF), this message gives
717// information about the source of that image.
718message ImageAnnotationContext {
719  // The URI of the file used to produce the image.
720  string uri = 1;
721
722  // If the file was a PDF or TIFF, this field gives the page number within
723  // the file used to produce the image.
724  int32 page_number = 2;
725}
726
727// Response to an image annotation request.
728message AnnotateImageResponse {
729  // If present, face detection has completed successfully.
730  repeated FaceAnnotation face_annotations = 1;
731
732  // If present, landmark detection has completed successfully.
733  repeated EntityAnnotation landmark_annotations = 2;
734
735  // If present, logo detection has completed successfully.
736  repeated EntityAnnotation logo_annotations = 3;
737
738  // If present, label detection has completed successfully.
739  repeated EntityAnnotation label_annotations = 4;
740
741  // If present, localized object detection has completed successfully.
742  // This will be sorted descending by confidence score.
743  repeated LocalizedObjectAnnotation localized_object_annotations = 22;
744
745  // If present, text (OCR) detection has completed successfully.
746  repeated EntityAnnotation text_annotations = 5;
747
748  // If present, text (OCR) detection or document (OCR) text detection has
749  // completed successfully.
750  // This annotation provides the structural hierarchy for the OCR detected
751  // text.
752  TextAnnotation full_text_annotation = 12;
753
754  // If present, safe-search annotation has completed successfully.
755  SafeSearchAnnotation safe_search_annotation = 6;
756
757  // If present, image properties were extracted successfully.
758  ImageProperties image_properties_annotation = 8;
759
760  // If present, crop hints have completed successfully.
761  CropHintsAnnotation crop_hints_annotation = 11;
762
763  // If present, web detection has completed successfully.
764  WebDetection web_detection = 13;
765
766  // If present, product search has completed successfully.
767  ProductSearchResults product_search_results = 14;
768
769  // If set, represents the error message for the operation.
770  // Note that filled-in image annotations are guaranteed to be
771  // correct, even when `error` is set.
772  google.rpc.Status error = 9;
773
774  // If present, contextual information is needed to understand where this image
775  // comes from.
776  ImageAnnotationContext context = 21;
777}
778
779// Multiple image annotation requests are batched into a single service call.
780message BatchAnnotateImagesRequest {
781  // Required. Individual image annotation requests for this batch.
782  repeated AnnotateImageRequest requests = 1
783      [(google.api.field_behavior) = REQUIRED];
784
785  // Optional. Target project and location to make a call.
786  //
787  // Format: `projects/{project-id}/locations/{location-id}`.
788  //
789  // If no parent is specified, a region will be chosen automatically.
790  //
791  // Supported location-ids:
792  //     `us`: USA country only,
793  //     `asia`: East asia areas, like Japan, Taiwan,
794  //     `eu`: The European Union.
795  //
796  // Example: `projects/project-A/locations/eu`.
797  string parent = 4;
798
799  // Optional. The labels with user-defined metadata for the request.
800  //
801  // Label keys and values can be no longer than 63 characters
802  // (Unicode codepoints), can only contain lowercase letters, numeric
803  // characters, underscores and dashes. International characters are allowed.
804  // Label values are optional. Label keys must start with a letter.
805  map<string, string> labels = 5 [(google.api.field_behavior) = OPTIONAL];
806}
807
808// Response to a batch image annotation request.
809message BatchAnnotateImagesResponse {
810  // Individual responses to image annotation requests within the batch.
811  repeated AnnotateImageResponse responses = 1;
812}
813
814// A request to annotate one single file, e.g. a PDF, TIFF or GIF file.
815message AnnotateFileRequest {
816  // Required. Information about the input file.
817  InputConfig input_config = 1;
818
819  // Required. Requested features.
820  repeated Feature features = 2;
821
822  // Additional context that may accompany the image(s) in the file.
823  ImageContext image_context = 3;
824
825  // Pages of the file to perform image annotation.
826  //
827  // Pages starts from 1, we assume the first page of the file is page 1.
828  // At most 5 pages are supported per request. Pages can be negative.
829  //
830  // Page 1 means the first page.
831  // Page 2 means the second page.
832  // Page -1 means the last page.
833  // Page -2 means the second to the last page.
834  //
835  // If the file is GIF instead of PDF or TIFF, page refers to GIF frames.
836  //
837  // If this field is empty, by default the service performs image annotation
838  // for the first 5 pages of the file.
839  repeated int32 pages = 4;
840}
841
842// Response to a single file annotation request. A file may contain one or more
843// images, which individually have their own responses.
844message AnnotateFileResponse {
845  // Information about the file for which this response is generated.
846  InputConfig input_config = 1;
847
848  // Individual responses to images found within the file. This field will be
849  // empty if the `error` field is set.
850  repeated AnnotateImageResponse responses = 2;
851
852  // This field gives the total number of pages in the file.
853  int32 total_pages = 3;
854
855  // If set, represents the error message for the failed request. The
856  // `responses` field will not be set in this case.
857  google.rpc.Status error = 4;
858}
859
860// A list of requests to annotate files using the BatchAnnotateFiles API.
861message BatchAnnotateFilesRequest {
862  // Required. The list of file annotation requests. Right now we support only
863  // one AnnotateFileRequest in BatchAnnotateFilesRequest.
864  repeated AnnotateFileRequest requests = 1
865      [(google.api.field_behavior) = REQUIRED];
866
867  // Optional. Target project and location to make a call.
868  //
869  // Format: `projects/{project-id}/locations/{location-id}`.
870  //
871  // If no parent is specified, a region will be chosen automatically.
872  //
873  // Supported location-ids:
874  //     `us`: USA country only,
875  //     `asia`: East asia areas, like Japan, Taiwan,
876  //     `eu`: The European Union.
877  //
878  // Example: `projects/project-A/locations/eu`.
879  string parent = 3;
880
881  // Optional. The labels with user-defined metadata for the request.
882  //
883  // Label keys and values can be no longer than 63 characters
884  // (Unicode codepoints), can only contain lowercase letters, numeric
885  // characters, underscores and dashes. International characters are allowed.
886  // Label values are optional. Label keys must start with a letter.
887  map<string, string> labels = 5 [(google.api.field_behavior) = OPTIONAL];
888}
889
890// A list of file annotation responses.
891message BatchAnnotateFilesResponse {
892  // The list of file annotation responses, each response corresponding to each
893  // AnnotateFileRequest in BatchAnnotateFilesRequest.
894  repeated AnnotateFileResponse responses = 1;
895}
896
897// An offline file annotation request.
898message AsyncAnnotateFileRequest {
899  // Required. Information about the input file.
900  InputConfig input_config = 1;
901
902  // Required. Requested features.
903  repeated Feature features = 2;
904
905  // Additional context that may accompany the image(s) in the file.
906  ImageContext image_context = 3;
907
908  // Required. The desired output location and metadata (e.g. format).
909  OutputConfig output_config = 4;
910}
911
912// The response for a single offline file annotation request.
913message AsyncAnnotateFileResponse {
914  // The output location and metadata from AsyncAnnotateFileRequest.
915  OutputConfig output_config = 1;
916}
917
918// Request for async image annotation for a list of images.
919message AsyncBatchAnnotateImagesRequest {
920  // Required. Individual image annotation requests for this batch.
921  repeated AnnotateImageRequest requests = 1
922      [(google.api.field_behavior) = REQUIRED];
923
924  // Required. The desired output location and metadata (e.g. format).
925  OutputConfig output_config = 2 [(google.api.field_behavior) = REQUIRED];
926
927  // Optional. Target project and location to make a call.
928  //
929  // Format: `projects/{project-id}/locations/{location-id}`.
930  //
931  // If no parent is specified, a region will be chosen automatically.
932  //
933  // Supported location-ids:
934  //     `us`: USA country only,
935  //     `asia`: East asia areas, like Japan, Taiwan,
936  //     `eu`: The European Union.
937  //
938  // Example: `projects/project-A/locations/eu`.
939  string parent = 4;
940
941  // Optional. The labels with user-defined metadata for the request.
942  //
943  // Label keys and values can be no longer than 63 characters
944  // (Unicode codepoints), can only contain lowercase letters, numeric
945  // characters, underscores and dashes. International characters are allowed.
946  // Label values are optional. Label keys must start with a letter.
947  map<string, string> labels = 5 [(google.api.field_behavior) = OPTIONAL];
948}
949
950// Response to an async batch image annotation request.
951message AsyncBatchAnnotateImagesResponse {
952  // The output location and metadata from AsyncBatchAnnotateImagesRequest.
953  OutputConfig output_config = 1;
954}
955
956// Multiple async file annotation requests are batched into a single service
957// call.
958message AsyncBatchAnnotateFilesRequest {
959  // Required. Individual async file annotation requests for this batch.
960  repeated AsyncAnnotateFileRequest requests = 1
961      [(google.api.field_behavior) = REQUIRED];
962
963  // Optional. Target project and location to make a call.
964  //
965  // Format: `projects/{project-id}/locations/{location-id}`.
966  //
967  // If no parent is specified, a region will be chosen automatically.
968  //
969  // Supported location-ids:
970  //     `us`: USA country only,
971  //     `asia`: East asia areas, like Japan, Taiwan,
972  //     `eu`: The European Union.
973  //
974  // Example: `projects/project-A/locations/eu`.
975  string parent = 4;
976
977  // Optional. The labels with user-defined metadata for the request.
978  //
979  // Label keys and values can be no longer than 63 characters
980  // (Unicode codepoints), can only contain lowercase letters, numeric
981  // characters, underscores and dashes. International characters are allowed.
982  // Label values are optional. Label keys must start with a letter.
983  map<string, string> labels = 5 [(google.api.field_behavior) = OPTIONAL];
984}
985
986// Response to an async batch file annotation request.
987message AsyncBatchAnnotateFilesResponse {
988  // The list of file annotation responses, one for each request in
989  // AsyncBatchAnnotateFilesRequest.
990  repeated AsyncAnnotateFileResponse responses = 1;
991}
992
993// The desired input location and metadata.
994message InputConfig {
995  // The Google Cloud Storage location to read the input from.
996  GcsSource gcs_source = 1;
997
998  // File content, represented as a stream of bytes.
999  // Note: As with all `bytes` fields, protobuffers use a pure binary
1000  // representation, whereas JSON representations use base64.
1001  //
1002  // Currently, this field only works for BatchAnnotateFiles requests. It does
1003  // not work for AsyncBatchAnnotateFiles requests.
1004  bytes content = 3;
1005
1006  // The type of the file. Currently only "application/pdf", "image/tiff" and
1007  // "image/gif" are supported. Wildcards are not supported.
1008  string mime_type = 2;
1009}
1010
1011// The desired output location and metadata.
1012message OutputConfig {
1013  // The Google Cloud Storage location to write the output(s) to.
1014  GcsDestination gcs_destination = 1;
1015
1016  // The max number of response protos to put into each output JSON file on
1017  // Google Cloud Storage.
1018  // The valid range is [1, 100]. If not specified, the default value is 20.
1019  //
1020  // For example, for one pdf file with 100 pages, 100 response protos will
1021  // be generated. If `batch_size` = 20, then 5 json files each
1022  // containing 20 response protos will be written under the prefix
1023  // `gcs_destination`.`uri`.
1024  //
1025  // Currently, batch_size only applies to GcsDestination, with potential future
1026  // support for other output configurations.
1027  int32 batch_size = 2;
1028}
1029
1030// The Google Cloud Storage location where the input will be read from.
1031message GcsSource {
1032  // Google Cloud Storage URI for the input file. This must only be a
1033  // Google Cloud Storage object. Wildcards are not currently supported.
1034  string uri = 1;
1035}
1036
1037// The Google Cloud Storage location where the output will be written to.
1038message GcsDestination {
1039  // Google Cloud Storage URI prefix where the results will be stored. Results
1040  // will be in JSON format and preceded by its corresponding input URI prefix.
1041  // This field can either represent a gcs file prefix or gcs directory. In
1042  // either case, the uri should be unique because in order to get all of the
1043  // output files, you will need to do a wildcard gcs search on the uri prefix
1044  // you provide.
1045  //
1046  // Examples:
1047  //
1048  // *    File Prefix: gs://bucket-name/here/filenameprefix   The output files
1049  // will be created in gs://bucket-name/here/ and the names of the
1050  // output files will begin with "filenameprefix".
1051  //
1052  // *    Directory Prefix: gs://bucket-name/some/location/   The output files
1053  // will be created in gs://bucket-name/some/location/ and the names of the
1054  // output files could be anything because there was no filename prefix
1055  // specified.
1056  //
1057  // If multiple outputs, each response is still AnnotateFileResponse, each of
1058  // which contains some subset of the full list of AnnotateImageResponse.
1059  // Multiple outputs can happen if, for example, the output JSON is too large
1060  // and overflows into multiple sharded files.
1061  string uri = 1;
1062}
1063
1064// Contains metadata for the BatchAnnotateImages operation.
1065message OperationMetadata {
1066  // Batch operation states.
1067  enum State {
1068    // Invalid.
1069    STATE_UNSPECIFIED = 0;
1070
1071    // Request is received.
1072    CREATED = 1;
1073
1074    // Request is actively being processed.
1075    RUNNING = 2;
1076
1077    // The batch processing is done.
1078    DONE = 3;
1079
1080    // The batch processing was cancelled.
1081    CANCELLED = 4;
1082  }
1083
1084  // Current state of the batch operation.
1085  State state = 1;
1086
1087  // The time when the batch request was received.
1088  google.protobuf.Timestamp create_time = 5;
1089
1090  // The time when the operation result was last updated.
1091  google.protobuf.Timestamp update_time = 6;
1092}
1093