1// Copyright 2022 Google LLC
2//
3// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at
6//
7//     http://www.apache.org/licenses/LICENSE-2.0
8//
9// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS,
11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12// See the License for the specific language governing permissions and
13// limitations under the License.
14
15syntax = "proto3";
16
17package google.cloud.vision.v1;
18
19import "google/api/annotations.proto";
20import "google/api/client.proto";
21import "google/api/field_behavior.proto";
22import "google/cloud/vision/v1/geometry.proto";
23import "google/cloud/vision/v1/product_search.proto";
24import "google/cloud/vision/v1/text_annotation.proto";
25import "google/cloud/vision/v1/web_detection.proto";
26import "google/longrunning/operations.proto";
27import "google/protobuf/timestamp.proto";
28import "google/rpc/status.proto";
29import "google/type/color.proto";
30import "google/type/latlng.proto";
31
32option cc_enable_arenas = true;
33option go_package = "cloud.google.com/go/vision/v2/apiv1/visionpb;visionpb";
34option java_multiple_files = true;
35option java_outer_classname = "ImageAnnotatorProto";
36option java_package = "com.google.cloud.vision.v1";
37option objc_class_prefix = "GCVN";
38
39// Service that performs Google Cloud Vision API detection tasks over client
40// images, such as face, landmark, logo, label, and text detection. The
41// ImageAnnotator service returns detected entities from the images.
42service ImageAnnotator {
43  option (google.api.default_host) = "vision.googleapis.com";
44  option (google.api.oauth_scopes) =
45      "https://www.googleapis.com/auth/cloud-platform,"
46      "https://www.googleapis.com/auth/cloud-vision";
47
48  // Run image detection and annotation for a batch of images.
49  rpc BatchAnnotateImages(BatchAnnotateImagesRequest) returns (BatchAnnotateImagesResponse) {
50    option (google.api.http) = {
51      post: "/v1/images:annotate"
52      body: "*"
53      additional_bindings {
54        post: "/v1/{parent=projects/*/locations/*}/images:annotate"
55        body: "*"
56      }
57      additional_bindings {
58        post: "/v1/{parent=projects/*}/images:annotate"
59        body: "*"
60      }
61    };
62    option (google.api.method_signature) = "requests";
63  }
64
65  // Service that performs image detection and annotation for a batch of files.
66  // Now only "application/pdf", "image/tiff" and "image/gif" are supported.
67  //
68  // This service will extract at most 5 (customers can specify which 5 in
69  // AnnotateFileRequest.pages) frames (gif) or pages (pdf or tiff) from each
70  // file provided and perform detection and annotation for each image
71  // extracted.
72  rpc BatchAnnotateFiles(BatchAnnotateFilesRequest) returns (BatchAnnotateFilesResponse) {
73    option (google.api.http) = {
74      post: "/v1/files:annotate"
75      body: "*"
76      additional_bindings {
77        post: "/v1/{parent=projects/*/locations/*}/files:annotate"
78        body: "*"
79      }
80      additional_bindings {
81        post: "/v1/{parent=projects/*}/files:annotate"
82        body: "*"
83      }
84    };
85    option (google.api.method_signature) = "requests";
86  }
87
88  // Run asynchronous image detection and annotation for a list of images.
89  //
90  // Progress and results can be retrieved through the
91  // `google.longrunning.Operations` interface.
92  // `Operation.metadata` contains `OperationMetadata` (metadata).
93  // `Operation.response` contains `AsyncBatchAnnotateImagesResponse` (results).
94  //
95  // This service will write image annotation outputs to json files in customer
96  // GCS bucket, each json file containing BatchAnnotateImagesResponse proto.
97  rpc AsyncBatchAnnotateImages(AsyncBatchAnnotateImagesRequest) returns (google.longrunning.Operation) {
98    option (google.api.http) = {
99      post: "/v1/images:asyncBatchAnnotate"
100      body: "*"
101      additional_bindings {
102        post: "/v1/{parent=projects/*/locations/*}/images:asyncBatchAnnotate"
103        body: "*"
104      }
105      additional_bindings {
106        post: "/v1/{parent=projects/*}/images:asyncBatchAnnotate"
107        body: "*"
108      }
109    };
110    option (google.api.method_signature) = "requests,output_config";
111    option (google.longrunning.operation_info) = {
112      response_type: "AsyncBatchAnnotateImagesResponse"
113      metadata_type: "OperationMetadata"
114    };
115  }
116
117  // Run asynchronous image detection and annotation for a list of generic
118  // files, such as PDF files, which may contain multiple pages and multiple
119  // images per page. Progress and results can be retrieved through the
120  // `google.longrunning.Operations` interface.
121  // `Operation.metadata` contains `OperationMetadata` (metadata).
122  // `Operation.response` contains `AsyncBatchAnnotateFilesResponse` (results).
123  rpc AsyncBatchAnnotateFiles(AsyncBatchAnnotateFilesRequest) returns (google.longrunning.Operation) {
124    option (google.api.http) = {
125      post: "/v1/files:asyncBatchAnnotate"
126      body: "*"
127      additional_bindings {
128        post: "/v1/{parent=projects/*/locations/*}/files:asyncBatchAnnotate"
129        body: "*"
130      }
131      additional_bindings {
132        post: "/v1/{parent=projects/*}/files:asyncBatchAnnotate"
133        body: "*"
134      }
135    };
136    option (google.api.method_signature) = "requests";
137    option (google.longrunning.operation_info) = {
138      response_type: "AsyncBatchAnnotateFilesResponse"
139      metadata_type: "OperationMetadata"
140    };
141  }
142}
143
144// A bucketized representation of likelihood, which is intended to give clients
145// highly stable results across model upgrades.
146enum Likelihood {
147  // Unknown likelihood.
148  UNKNOWN = 0;
149
150  // It is very unlikely.
151  VERY_UNLIKELY = 1;
152
153  // It is unlikely.
154  UNLIKELY = 2;
155
156  // It is possible.
157  POSSIBLE = 3;
158
159  // It is likely.
160  LIKELY = 4;
161
162  // It is very likely.
163  VERY_LIKELY = 5;
164}
165
166// The type of Google Cloud Vision API detection to perform, and the maximum
167// number of results to return for that type. Multiple `Feature` objects can
168// be specified in the `features` list.
169message Feature {
170  // Type of Google Cloud Vision API feature to be extracted.
171  enum Type {
172    // Unspecified feature type.
173    TYPE_UNSPECIFIED = 0;
174
175    // Run face detection.
176    FACE_DETECTION = 1;
177
178    // Run landmark detection.
179    LANDMARK_DETECTION = 2;
180
181    // Run logo detection.
182    LOGO_DETECTION = 3;
183
184    // Run label detection.
185    LABEL_DETECTION = 4;
186
187    // Run text detection / optical character recognition (OCR). Text detection
188    // is optimized for areas of text within a larger image; if the image is
189    // a document, use `DOCUMENT_TEXT_DETECTION` instead.
190    TEXT_DETECTION = 5;
191
192    // Run dense text document OCR. Takes precedence when both
193    // `DOCUMENT_TEXT_DETECTION` and `TEXT_DETECTION` are present.
194    DOCUMENT_TEXT_DETECTION = 11;
195
196    // Run Safe Search to detect potentially unsafe
197    // or undesirable content.
198    SAFE_SEARCH_DETECTION = 6;
199
200    // Compute a set of image properties, such as the
201    // image's dominant colors.
202    IMAGE_PROPERTIES = 7;
203
204    // Run crop hints.
205    CROP_HINTS = 9;
206
207    // Run web detection.
208    WEB_DETECTION = 10;
209
210    // Run Product Search.
211    PRODUCT_SEARCH = 12;
212
213    // Run localizer for object detection.
214    OBJECT_LOCALIZATION = 19;
215  }
216
217  // The feature type.
218  Type type = 1;
219
220  // Maximum number of results of this type. Does not apply to
221  // `TEXT_DETECTION`, `DOCUMENT_TEXT_DETECTION`, or `CROP_HINTS`.
222  int32 max_results = 2;
223
224  // Model to use for the feature.
225  // Supported values: "builtin/stable" (the default if unset) and
226  // "builtin/latest". `DOCUMENT_TEXT_DETECTION` and `TEXT_DETECTION` also
227  // support "builtin/weekly" for the bleeding edge release updated weekly.
228  string model = 3;
229}
230
231// External image source (Google Cloud Storage or web URL image location).
232message ImageSource {
233  // **Use `image_uri` instead.**
234  //
235  // The Google Cloud Storage  URI of the form
236  // `gs://bucket_name/object_name`. Object versioning is not supported. See
237  // [Google Cloud Storage Request
238  // URIs](https://cloud.google.com/storage/docs/reference-uris) for more info.
239  string gcs_image_uri = 1;
240
241  // The URI of the source image. Can be either:
242  //
243  // 1. A Google Cloud Storage URI of the form
244  //    `gs://bucket_name/object_name`. Object versioning is not supported. See
245  //    [Google Cloud Storage Request
246  //    URIs](https://cloud.google.com/storage/docs/reference-uris) for more
247  //    info.
248  //
249  // 2. A publicly-accessible image HTTP/HTTPS URL. When fetching images from
250  //    HTTP/HTTPS URLs, Google cannot guarantee that the request will be
251  //    completed. Your request may fail if the specified host denies the
252  //    request (e.g. due to request throttling or DOS prevention), or if Google
253  //    throttles requests to the site for abuse prevention. You should not
254  //    depend on externally-hosted images for production applications.
255  //
256  // When both `gcs_image_uri` and `image_uri` are specified, `image_uri` takes
257  // precedence.
258  string image_uri = 2;
259}
260
261// Client image to perform Google Cloud Vision API tasks over.
262message Image {
263  // Image content, represented as a stream of bytes.
264  // Note: As with all `bytes` fields, protobuffers use a pure binary
265  // representation, whereas JSON representations use base64.
266  //
267  // Currently, this field only works for BatchAnnotateImages requests. It does
268  // not work for AsyncBatchAnnotateImages requests.
269  bytes content = 1;
270
271  // Google Cloud Storage image location, or publicly-accessible image
272  // URL. If both `content` and `source` are provided for an image, `content`
273  // takes precedence and is used to perform the image annotation request.
274  ImageSource source = 2;
275}
276
277// A face annotation object contains the results of face detection.
278message FaceAnnotation {
279  // A face-specific landmark (for example, a face feature).
280  message Landmark {
281    // Face landmark (feature) type.
282    // Left and right are defined from the vantage of the viewer of the image
283    // without considering mirror projections typical of photos. So, `LEFT_EYE`,
284    // typically, is the person's right eye.
285    enum Type {
286      // Unknown face landmark detected. Should not be filled.
287      UNKNOWN_LANDMARK = 0;
288
289      // Left eye.
290      LEFT_EYE = 1;
291
292      // Right eye.
293      RIGHT_EYE = 2;
294
295      // Left of left eyebrow.
296      LEFT_OF_LEFT_EYEBROW = 3;
297
298      // Right of left eyebrow.
299      RIGHT_OF_LEFT_EYEBROW = 4;
300
301      // Left of right eyebrow.
302      LEFT_OF_RIGHT_EYEBROW = 5;
303
304      // Right of right eyebrow.
305      RIGHT_OF_RIGHT_EYEBROW = 6;
306
307      // Midpoint between eyes.
308      MIDPOINT_BETWEEN_EYES = 7;
309
310      // Nose tip.
311      NOSE_TIP = 8;
312
313      // Upper lip.
314      UPPER_LIP = 9;
315
316      // Lower lip.
317      LOWER_LIP = 10;
318
319      // Mouth left.
320      MOUTH_LEFT = 11;
321
322      // Mouth right.
323      MOUTH_RIGHT = 12;
324
325      // Mouth center.
326      MOUTH_CENTER = 13;
327
328      // Nose, bottom right.
329      NOSE_BOTTOM_RIGHT = 14;
330
331      // Nose, bottom left.
332      NOSE_BOTTOM_LEFT = 15;
333
334      // Nose, bottom center.
335      NOSE_BOTTOM_CENTER = 16;
336
337      // Left eye, top boundary.
338      LEFT_EYE_TOP_BOUNDARY = 17;
339
340      // Left eye, right corner.
341      LEFT_EYE_RIGHT_CORNER = 18;
342
343      // Left eye, bottom boundary.
344      LEFT_EYE_BOTTOM_BOUNDARY = 19;
345
346      // Left eye, left corner.
347      LEFT_EYE_LEFT_CORNER = 20;
348
349      // Right eye, top boundary.
350      RIGHT_EYE_TOP_BOUNDARY = 21;
351
352      // Right eye, right corner.
353      RIGHT_EYE_RIGHT_CORNER = 22;
354
355      // Right eye, bottom boundary.
356      RIGHT_EYE_BOTTOM_BOUNDARY = 23;
357
358      // Right eye, left corner.
359      RIGHT_EYE_LEFT_CORNER = 24;
360
361      // Left eyebrow, upper midpoint.
362      LEFT_EYEBROW_UPPER_MIDPOINT = 25;
363
364      // Right eyebrow, upper midpoint.
365      RIGHT_EYEBROW_UPPER_MIDPOINT = 26;
366
367      // Left ear tragion.
368      LEFT_EAR_TRAGION = 27;
369
370      // Right ear tragion.
371      RIGHT_EAR_TRAGION = 28;
372
373      // Left eye pupil.
374      LEFT_EYE_PUPIL = 29;
375
376      // Right eye pupil.
377      RIGHT_EYE_PUPIL = 30;
378
379      // Forehead glabella.
380      FOREHEAD_GLABELLA = 31;
381
382      // Chin gnathion.
383      CHIN_GNATHION = 32;
384
385      // Chin left gonion.
386      CHIN_LEFT_GONION = 33;
387
388      // Chin right gonion.
389      CHIN_RIGHT_GONION = 34;
390
391      // Left cheek center.
392      LEFT_CHEEK_CENTER = 35;
393
394      // Right cheek center.
395      RIGHT_CHEEK_CENTER = 36;
396    }
397
398    // Face landmark type.
399    Type type = 3;
400
401    // Face landmark position.
402    Position position = 4;
403  }
404
405  // The bounding polygon around the face. The coordinates of the bounding box
406  // are in the original image's scale.
407  // The bounding box is computed to "frame" the face in accordance with human
408  // expectations. It is based on the landmarker results.
409  // Note that one or more x and/or y coordinates may not be generated in the
410  // `BoundingPoly` (the polygon will be unbounded) if only a partial face
411  // appears in the image to be annotated.
412  BoundingPoly bounding_poly = 1;
413
414  // The `fd_bounding_poly` bounding polygon is tighter than the
415  // `boundingPoly`, and encloses only the skin part of the face. Typically, it
416  // is used to eliminate the face from any image analysis that detects the
417  // "amount of skin" visible in an image. It is not based on the
418  // landmarker results, only on the initial face detection, hence
419  // the <code>fd</code> (face detection) prefix.
420  BoundingPoly fd_bounding_poly = 2;
421
422  // Detected face landmarks.
423  repeated Landmark landmarks = 3;
424
425  // Roll angle, which indicates the amount of clockwise/anti-clockwise rotation
426  // of the face relative to the image vertical about the axis perpendicular to
427  // the face. Range [-180,180].
428  float roll_angle = 4;
429
430  // Yaw angle, which indicates the leftward/rightward angle that the face is
431  // pointing relative to the vertical plane perpendicular to the image. Range
432  // [-180,180].
433  float pan_angle = 5;
434
435  // Pitch angle, which indicates the upwards/downwards angle that the face is
436  // pointing relative to the image's horizontal plane. Range [-180,180].
437  float tilt_angle = 6;
438
439  // Detection confidence. Range [0, 1].
440  float detection_confidence = 7;
441
442  // Face landmarking confidence. Range [0, 1].
443  float landmarking_confidence = 8;
444
445  // Joy likelihood.
446  Likelihood joy_likelihood = 9;
447
448  // Sorrow likelihood.
449  Likelihood sorrow_likelihood = 10;
450
451  // Anger likelihood.
452  Likelihood anger_likelihood = 11;
453
454  // Surprise likelihood.
455  Likelihood surprise_likelihood = 12;
456
457  // Under-exposed likelihood.
458  Likelihood under_exposed_likelihood = 13;
459
460  // Blurred likelihood.
461  Likelihood blurred_likelihood = 14;
462
463  // Headwear likelihood.
464  Likelihood headwear_likelihood = 15;
465}
466
467// Detected entity location information.
468message LocationInfo {
469  // lat/long location coordinates.
470  google.type.LatLng lat_lng = 1;
471}
472
473// A `Property` consists of a user-supplied name/value pair.
474message Property {
475  // Name of the property.
476  string name = 1;
477
478  // Value of the property.
479  string value = 2;
480
481  // Value of numeric properties.
482  uint64 uint64_value = 3;
483}
484
485// Set of detected entity features.
486message EntityAnnotation {
487  // Opaque entity ID. Some IDs may be available in
488  // [Google Knowledge Graph Search
489  // API](https://developers.google.com/knowledge-graph/).
490  string mid = 1;
491
492  // The language code for the locale in which the entity textual
493  // `description` is expressed.
494  string locale = 2;
495
496  // Entity textual description, expressed in its `locale` language.
497  string description = 3;
498
499  // Overall score of the result. Range [0, 1].
500  float score = 4;
501
502  // **Deprecated. Use `score` instead.**
503  // The accuracy of the entity detection in an image.
504  // For example, for an image in which the "Eiffel Tower" entity is detected,
505  // this field represents the confidence that there is a tower in the query
506  // image. Range [0, 1].
507  float confidence = 5 [deprecated = true];
508
509  // The relevancy of the ICA (Image Content Annotation) label to the
510  // image. For example, the relevancy of "tower" is likely higher to an image
511  // containing the detected "Eiffel Tower" than to an image containing a
512  // detected distant towering building, even though the confidence that
513  // there is a tower in each image may be the same. Range [0, 1].
514  float topicality = 6;
515
516  // Image region to which this entity belongs. Not produced
517  // for `LABEL_DETECTION` features.
518  BoundingPoly bounding_poly = 7;
519
520  // The location information for the detected entity. Multiple
521  // `LocationInfo` elements can be present because one location may
522  // indicate the location of the scene in the image, and another location
523  // may indicate the location of the place where the image was taken.
524  // Location information is usually present for landmarks.
525  repeated LocationInfo locations = 8;
526
527  // Some entities may have optional user-supplied `Property` (name/value)
528  // fields, such a score or string that qualifies the entity.
529  repeated Property properties = 9;
530}
531
532// Set of detected objects with bounding boxes.
533message LocalizedObjectAnnotation {
534  // Object ID that should align with EntityAnnotation mid.
535  string mid = 1;
536
537  // The BCP-47 language code, such as "en-US" or "sr-Latn". For more
538  // information, see
539  // http://www.unicode.org/reports/tr35/#Unicode_locale_identifier.
540  string language_code = 2;
541
542  // Object name, expressed in its `language_code` language.
543  string name = 3;
544
545  // Score of the result. Range [0, 1].
546  float score = 4;
547
548  // Image region to which this object belongs. This must be populated.
549  BoundingPoly bounding_poly = 5;
550}
551
552// Set of features pertaining to the image, computed by computer vision
553// methods over safe-search verticals (for example, adult, spoof, medical,
554// violence).
555message SafeSearchAnnotation {
556  // Represents the adult content likelihood for the image. Adult content may
557  // contain elements such as nudity, pornographic images or cartoons, or
558  // sexual activities.
559  Likelihood adult = 1;
560
561  // Spoof likelihood. The likelihood that an modification
562  // was made to the image's canonical version to make it appear
563  // funny or offensive.
564  Likelihood spoof = 2;
565
566  // Likelihood that this is a medical image.
567  Likelihood medical = 3;
568
569  // Likelihood that this image contains violent content.
570  Likelihood violence = 4;
571
572  // Likelihood that the request image contains racy content. Racy content may
573  // include (but is not limited to) skimpy or sheer clothing, strategically
574  // covered nudity, lewd or provocative poses, or close-ups of sensitive
575  // body areas.
576  Likelihood racy = 9;
577}
578
579// Rectangle determined by min and max `LatLng` pairs.
580message LatLongRect {
581  // Min lat/long pair.
582  google.type.LatLng min_lat_lng = 1;
583
584  // Max lat/long pair.
585  google.type.LatLng max_lat_lng = 2;
586}
587
588// Color information consists of RGB channels, score, and the fraction of
589// the image that the color occupies in the image.
590message ColorInfo {
591  // RGB components of the color.
592  google.type.Color color = 1;
593
594  // Image-specific score for this color. Value in range [0, 1].
595  float score = 2;
596
597  // The fraction of pixels the color occupies in the image.
598  // Value in range [0, 1].
599  float pixel_fraction = 3;
600}
601
602// Set of dominant colors and their corresponding scores.
603message DominantColorsAnnotation {
604  // RGB color values with their score and pixel fraction.
605  repeated ColorInfo colors = 1;
606}
607
608// Stores image properties, such as dominant colors.
609message ImageProperties {
610  // If present, dominant colors completed successfully.
611  DominantColorsAnnotation dominant_colors = 1;
612}
613
614// Single crop hint that is used to generate a new crop when serving an image.
615message CropHint {
616  // The bounding polygon for the crop region. The coordinates of the bounding
617  // box are in the original image's scale.
618  BoundingPoly bounding_poly = 1;
619
620  // Confidence of this being a salient region.  Range [0, 1].
621  float confidence = 2;
622
623  // Fraction of importance of this salient region with respect to the original
624  // image.
625  float importance_fraction = 3;
626}
627
628// Set of crop hints that are used to generate new crops when serving images.
629message CropHintsAnnotation {
630  // Crop hint results.
631  repeated CropHint crop_hints = 1;
632}
633
634// Parameters for crop hints annotation request.
635message CropHintsParams {
636  // Aspect ratios in floats, representing the ratio of the width to the height
637  // of the image. For example, if the desired aspect ratio is 4/3, the
638  // corresponding float value should be 1.33333.  If not specified, the
639  // best possible crop is returned. The number of provided aspect ratios is
640  // limited to a maximum of 16; any aspect ratios provided after the 16th are
641  // ignored.
642  repeated float aspect_ratios = 1;
643}
644
645// Parameters for web detection request.
646message WebDetectionParams {
647  // Whether to include results derived from the geo information in the image.
648  bool include_geo_results = 2;
649}
650
651// Parameters for text detections. This is used to control TEXT_DETECTION and
652// DOCUMENT_TEXT_DETECTION features.
653message TextDetectionParams {
654  // By default, Cloud Vision API only includes confidence score for
655  // DOCUMENT_TEXT_DETECTION result. Set the flag to true to include confidence
656  // score for TEXT_DETECTION as well.
657  bool enable_text_detection_confidence_score = 9;
658
659  // A list of advanced OCR options to fine-tune OCR behavior.
660  repeated string advanced_ocr_options = 11;
661}
662
663// Image context and/or feature-specific parameters.
664message ImageContext {
665  // Not used.
666  LatLongRect lat_long_rect = 1;
667
668  // List of languages to use for TEXT_DETECTION. In most cases, an empty value
669  // yields the best results since it enables automatic language detection. For
670  // languages based on the Latin alphabet, setting `language_hints` is not
671  // needed. In rare cases, when the language of the text in the image is known,
672  // setting a hint will help get better results (although it will be a
673  // significant hindrance if the hint is wrong). Text detection returns an
674  // error if one or more of the specified languages is not one of the
675  // [supported languages](https://cloud.google.com/vision/docs/languages).
676  repeated string language_hints = 2;
677
678  // Parameters for crop hints annotation request.
679  CropHintsParams crop_hints_params = 4;
680
681  // Parameters for product search.
682  ProductSearchParams product_search_params = 5;
683
684  // Parameters for web detection.
685  WebDetectionParams web_detection_params = 6;
686
687  // Parameters for text detection and document text detection.
688  TextDetectionParams text_detection_params = 12;
689}
690
691// Request for performing Google Cloud Vision API tasks over a user-provided
692// image, with user-requested features, and with context information.
693message AnnotateImageRequest {
694  // The image to be processed.
695  Image image = 1;
696
697  // Requested features.
698  repeated Feature features = 2;
699
700  // Additional context that may accompany the image.
701  ImageContext image_context = 3;
702}
703
704// If an image was produced from a file (e.g. a PDF), this message gives
705// information about the source of that image.
706message ImageAnnotationContext {
707  // The URI of the file used to produce the image.
708  string uri = 1;
709
710  // If the file was a PDF or TIFF, this field gives the page number within
711  // the file used to produce the image.
712  int32 page_number = 2;
713}
714
715// Response to an image annotation request.
716message AnnotateImageResponse {
717  // If present, face detection has completed successfully.
718  repeated FaceAnnotation face_annotations = 1;
719
720  // If present, landmark detection has completed successfully.
721  repeated EntityAnnotation landmark_annotations = 2;
722
723  // If present, logo detection has completed successfully.
724  repeated EntityAnnotation logo_annotations = 3;
725
726  // If present, label detection has completed successfully.
727  repeated EntityAnnotation label_annotations = 4;
728
729  // If present, localized object detection has completed successfully.
730  // This will be sorted descending by confidence score.
731  repeated LocalizedObjectAnnotation localized_object_annotations = 22;
732
733  // If present, text (OCR) detection has completed successfully.
734  repeated EntityAnnotation text_annotations = 5;
735
736  // If present, text (OCR) detection or document (OCR) text detection has
737  // completed successfully.
738  // This annotation provides the structural hierarchy for the OCR detected
739  // text.
740  TextAnnotation full_text_annotation = 12;
741
742  // If present, safe-search annotation has completed successfully.
743  SafeSearchAnnotation safe_search_annotation = 6;
744
745  // If present, image properties were extracted successfully.
746  ImageProperties image_properties_annotation = 8;
747
748  // If present, crop hints have completed successfully.
749  CropHintsAnnotation crop_hints_annotation = 11;
750
751  // If present, web detection has completed successfully.
752  WebDetection web_detection = 13;
753
754  // If present, product search has completed successfully.
755  ProductSearchResults product_search_results = 14;
756
757  // If set, represents the error message for the operation.
758  // Note that filled-in image annotations are guaranteed to be
759  // correct, even when `error` is set.
760  google.rpc.Status error = 9;
761
762  // If present, contextual information is needed to understand where this image
763  // comes from.
764  ImageAnnotationContext context = 21;
765}
766
767// Multiple image annotation requests are batched into a single service call.
768message BatchAnnotateImagesRequest {
769  // Required. Individual image annotation requests for this batch.
770  repeated AnnotateImageRequest requests = 1 [(google.api.field_behavior) = REQUIRED];
771
772  // Optional. Target project and location to make a call.
773  //
774  // Format: `projects/{project-id}/locations/{location-id}`.
775  //
776  // If no parent is specified, a region will be chosen automatically.
777  //
778  // Supported location-ids:
779  //     `us`: USA country only,
780  //     `asia`: East asia areas, like Japan, Taiwan,
781  //     `eu`: The European Union.
782  //
783  // Example: `projects/project-A/locations/eu`.
784  string parent = 4;
785}
786
787// Response to a batch image annotation request.
788message BatchAnnotateImagesResponse {
789  // Individual responses to image annotation requests within the batch.
790  repeated AnnotateImageResponse responses = 1;
791}
792
793// A request to annotate one single file, e.g. a PDF, TIFF or GIF file.
794message AnnotateFileRequest {
795  // Required. Information about the input file.
796  InputConfig input_config = 1;
797
798  // Required. Requested features.
799  repeated Feature features = 2;
800
801  // Additional context that may accompany the image(s) in the file.
802  ImageContext image_context = 3;
803
804  // Pages of the file to perform image annotation.
805  //
806  // Pages starts from 1, we assume the first page of the file is page 1.
807  // At most 5 pages are supported per request. Pages can be negative.
808  //
809  // Page 1 means the first page.
810  // Page 2 means the second page.
811  // Page -1 means the last page.
812  // Page -2 means the second to the last page.
813  //
814  // If the file is GIF instead of PDF or TIFF, page refers to GIF frames.
815  //
816  // If this field is empty, by default the service performs image annotation
817  // for the first 5 pages of the file.
818  repeated int32 pages = 4;
819}
820
821// Response to a single file annotation request. A file may contain one or more
822// images, which individually have their own responses.
823message AnnotateFileResponse {
824  // Information about the file for which this response is generated.
825  InputConfig input_config = 1;
826
827  // Individual responses to images found within the file. This field will be
828  // empty if the `error` field is set.
829  repeated AnnotateImageResponse responses = 2;
830
831  // This field gives the total number of pages in the file.
832  int32 total_pages = 3;
833
834  // If set, represents the error message for the failed request. The
835  // `responses` field will not be set in this case.
836  google.rpc.Status error = 4;
837}
838
839// A list of requests to annotate files using the BatchAnnotateFiles API.
840message BatchAnnotateFilesRequest {
841  // Required. The list of file annotation requests. Right now we support only one
842  // AnnotateFileRequest in BatchAnnotateFilesRequest.
843  repeated AnnotateFileRequest requests = 1 [(google.api.field_behavior) = REQUIRED];
844
845  // Optional. Target project and location to make a call.
846  //
847  // Format: `projects/{project-id}/locations/{location-id}`.
848  //
849  // If no parent is specified, a region will be chosen automatically.
850  //
851  // Supported location-ids:
852  //     `us`: USA country only,
853  //     `asia`: East asia areas, like Japan, Taiwan,
854  //     `eu`: The European Union.
855  //
856  // Example: `projects/project-A/locations/eu`.
857  string parent = 3;
858}
859
860// A list of file annotation responses.
861message BatchAnnotateFilesResponse {
862  // The list of file annotation responses, each response corresponding to each
863  // AnnotateFileRequest in BatchAnnotateFilesRequest.
864  repeated AnnotateFileResponse responses = 1;
865}
866
867// An offline file annotation request.
868message AsyncAnnotateFileRequest {
869  // Required. Information about the input file.
870  InputConfig input_config = 1;
871
872  // Required. Requested features.
873  repeated Feature features = 2;
874
875  // Additional context that may accompany the image(s) in the file.
876  ImageContext image_context = 3;
877
878  // Required. The desired output location and metadata (e.g. format).
879  OutputConfig output_config = 4;
880}
881
882// The response for a single offline file annotation request.
883message AsyncAnnotateFileResponse {
884  // The output location and metadata from AsyncAnnotateFileRequest.
885  OutputConfig output_config = 1;
886}
887
888// Request for async image annotation for a list of images.
889message AsyncBatchAnnotateImagesRequest {
890  // Required. Individual image annotation requests for this batch.
891  repeated AnnotateImageRequest requests = 1 [(google.api.field_behavior) = REQUIRED];
892
893  // Required. The desired output location and metadata (e.g. format).
894  OutputConfig output_config = 2 [(google.api.field_behavior) = REQUIRED];
895
896  // Optional. Target project and location to make a call.
897  //
898  // Format: `projects/{project-id}/locations/{location-id}`.
899  //
900  // If no parent is specified, a region will be chosen automatically.
901  //
902  // Supported location-ids:
903  //     `us`: USA country only,
904  //     `asia`: East asia areas, like Japan, Taiwan,
905  //     `eu`: The European Union.
906  //
907  // Example: `projects/project-A/locations/eu`.
908  string parent = 4;
909}
910
911// Response to an async batch image annotation request.
912message AsyncBatchAnnotateImagesResponse {
913  // The output location and metadata from AsyncBatchAnnotateImagesRequest.
914  OutputConfig output_config = 1;
915}
916
917// Multiple async file annotation requests are batched into a single service
918// call.
919message AsyncBatchAnnotateFilesRequest {
920  // Required. Individual async file annotation requests for this batch.
921  repeated AsyncAnnotateFileRequest requests = 1 [(google.api.field_behavior) = REQUIRED];
922
923  // Optional. Target project and location to make a call.
924  //
925  // Format: `projects/{project-id}/locations/{location-id}`.
926  //
927  // If no parent is specified, a region will be chosen automatically.
928  //
929  // Supported location-ids:
930  //     `us`: USA country only,
931  //     `asia`: East asia areas, like Japan, Taiwan,
932  //     `eu`: The European Union.
933  //
934  // Example: `projects/project-A/locations/eu`.
935  string parent = 4;
936}
937
938// Response to an async batch file annotation request.
939message AsyncBatchAnnotateFilesResponse {
940  // The list of file annotation responses, one for each request in
941  // AsyncBatchAnnotateFilesRequest.
942  repeated AsyncAnnotateFileResponse responses = 1;
943}
944
945// The desired input location and metadata.
946message InputConfig {
947  // The Google Cloud Storage location to read the input from.
948  GcsSource gcs_source = 1;
949
950  // File content, represented as a stream of bytes.
951  // Note: As with all `bytes` fields, protobuffers use a pure binary
952  // representation, whereas JSON representations use base64.
953  //
954  // Currently, this field only works for BatchAnnotateFiles requests. It does
955  // not work for AsyncBatchAnnotateFiles requests.
956  bytes content = 3;
957
958  // The type of the file. Currently only "application/pdf", "image/tiff" and
959  // "image/gif" are supported. Wildcards are not supported.
960  string mime_type = 2;
961}
962
963// The desired output location and metadata.
964message OutputConfig {
965  // The Google Cloud Storage location to write the output(s) to.
966  GcsDestination gcs_destination = 1;
967
968  // The max number of response protos to put into each output JSON file on
969  // Google Cloud Storage.
970  // The valid range is [1, 100]. If not specified, the default value is 20.
971  //
972  // For example, for one pdf file with 100 pages, 100 response protos will
973  // be generated. If `batch_size` = 20, then 5 json files each
974  // containing 20 response protos will be written under the prefix
975  // `gcs_destination`.`uri`.
976  //
977  // Currently, batch_size only applies to GcsDestination, with potential future
978  // support for other output configurations.
979  int32 batch_size = 2;
980}
981
982// The Google Cloud Storage location where the input will be read from.
983message GcsSource {
984  // Google Cloud Storage URI for the input file. This must only be a
985  // Google Cloud Storage object. Wildcards are not currently supported.
986  string uri = 1;
987}
988
989// The Google Cloud Storage location where the output will be written to.
990message GcsDestination {
991  // Google Cloud Storage URI prefix where the results will be stored. Results
992  // will be in JSON format and preceded by its corresponding input URI prefix.
993  // This field can either represent a gcs file prefix or gcs directory. In
994  // either case, the uri should be unique because in order to get all of the
995  // output files, you will need to do a wildcard gcs search on the uri prefix
996  // you provide.
997  //
998  // Examples:
999  //
1000  // *    File Prefix: gs://bucket-name/here/filenameprefix   The output files
1001  // will be created in gs://bucket-name/here/ and the names of the
1002  // output files will begin with "filenameprefix".
1003  //
1004  // *    Directory Prefix: gs://bucket-name/some/location/   The output files
1005  // will be created in gs://bucket-name/some/location/ and the names of the
1006  // output files could be anything because there was no filename prefix
1007  // specified.
1008  //
1009  // If multiple outputs, each response is still AnnotateFileResponse, each of
1010  // which contains some subset of the full list of AnnotateImageResponse.
1011  // Multiple outputs can happen if, for example, the output JSON is too large
1012  // and overflows into multiple sharded files.
1013  string uri = 1;
1014}
1015
1016// Contains metadata for the BatchAnnotateImages operation.
1017message OperationMetadata {
1018  // Batch operation states.
1019  enum State {
1020    // Invalid.
1021    STATE_UNSPECIFIED = 0;
1022
1023    // Request is received.
1024    CREATED = 1;
1025
1026    // Request is actively being processed.
1027    RUNNING = 2;
1028
1029    // The batch processing is done.
1030    DONE = 3;
1031
1032    // The batch processing was cancelled.
1033    CANCELLED = 4;
1034  }
1035
1036  // Current state of the batch operation.
1037  State state = 1;
1038
1039  // The time when the batch request was received.
1040  google.protobuf.Timestamp create_time = 5;
1041
1042  // The time when the operation result was last updated.
1043  google.protobuf.Timestamp update_time = 6;
1044}
1045