xref: /aosp_15_r20/external/googleapis/google/cloud/vision/v1p4beta1/image_annotator.proto (revision d5c09012810ac0c9f33fe448fb6da8260d444cc9)
1// Copyright 2019 Google LLC.
2//
3// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at
6//
7//     http://www.apache.org/licenses/LICENSE-2.0
8//
9// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS,
11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12// See the License for the specific language governing permissions and
13// limitations under the License.
14//
15
16syntax = "proto3";
17
18package google.cloud.vision.v1p4beta1;
19
20import "google/api/annotations.proto";
21import "google/api/client.proto";
22import "google/api/field_behavior.proto";
23import "google/cloud/vision/v1p4beta1/face.proto";
24import "google/cloud/vision/v1p4beta1/geometry.proto";
25import "google/cloud/vision/v1p4beta1/product_search.proto";
26import "google/cloud/vision/v1p4beta1/text_annotation.proto";
27import "google/cloud/vision/v1p4beta1/web_detection.proto";
28import "google/longrunning/operations.proto";
29import "google/protobuf/timestamp.proto";
30import "google/rpc/status.proto";
31import "google/type/color.proto";
32import "google/type/latlng.proto";
33
34option cc_enable_arenas = true;
35option go_package = "cloud.google.com/go/vision/apiv1p4beta1/visionpb;visionpb";
36option java_multiple_files = true;
37option java_outer_classname = "ImageAnnotatorProto";
38option java_package = "com.google.cloud.vision.v1p4beta1";
39option objc_class_prefix = "GCVN";
40
41// Service that performs Google Cloud Vision API detection tasks over client
42// images, such as face, landmark, logo, label, and text detection. The
43// ImageAnnotator service returns detected entities from the images.
44service ImageAnnotator {
45  option (google.api.default_host) = "vision.googleapis.com";
46  option (google.api.oauth_scopes) =
47      "https://www.googleapis.com/auth/cloud-platform,"
48      "https://www.googleapis.com/auth/cloud-vision";
49
50  // Run image detection and annotation for a batch of images.
51  rpc BatchAnnotateImages(BatchAnnotateImagesRequest)
52      returns (BatchAnnotateImagesResponse) {
53    option (google.api.http) = {
54      post: "/v1p4beta1/images:annotate"
55      body: "*"
56    };
57    option (google.api.method_signature) = "requests";
58  }
59
60  // Service that performs image detection and annotation for a batch of files.
61  // Now only "application/pdf", "image/tiff" and "image/gif" are supported.
62  //
63  // This service will extract at most 5 (customers can specify which 5 in
64  // AnnotateFileRequest.pages) frames (gif) or pages (pdf or tiff) from each
65  // file provided and perform detection and annotation for each image
66  // extracted.
67  rpc BatchAnnotateFiles(BatchAnnotateFilesRequest)
68      returns (BatchAnnotateFilesResponse) {
69    option (google.api.http) = {
70      post: "/v1p4beta1/files:annotate"
71      body: "*"
72    };
73    option (google.api.method_signature) = "requests";
74  }
75
76  // Run asynchronous image detection and annotation for a list of images.
77  //
78  // Progress and results can be retrieved through the
79  // `google.longrunning.Operations` interface.
80  // `Operation.metadata` contains `OperationMetadata` (metadata).
81  // `Operation.response` contains `AsyncBatchAnnotateImagesResponse` (results).
82  //
83  // This service will write image annotation outputs to json files in customer
84  // GCS bucket, each json file containing BatchAnnotateImagesResponse proto.
85  rpc AsyncBatchAnnotateImages(AsyncBatchAnnotateImagesRequest)
86      returns (google.longrunning.Operation) {
87    option (google.api.http) = {
88      post: "/v1p4beta1/images:asyncBatchAnnotate"
89      body: "*"
90    };
91    option (google.api.method_signature) = "requests,output_config";
92    option (google.longrunning.operation_info) = {
93      response_type: "AsyncBatchAnnotateImagesResponse"
94      metadata_type: "OperationMetadata"
95    };
96  }
97
98  // Run asynchronous image detection and annotation for a list of generic
99  // files, such as PDF files, which may contain multiple pages and multiple
100  // images per page. Progress and results can be retrieved through the
101  // `google.longrunning.Operations` interface.
102  // `Operation.metadata` contains `OperationMetadata` (metadata).
103  // `Operation.response` contains `AsyncBatchAnnotateFilesResponse` (results).
104  rpc AsyncBatchAnnotateFiles(AsyncBatchAnnotateFilesRequest)
105      returns (google.longrunning.Operation) {
106    option (google.api.http) = {
107      post: "/v1p4beta1/files:asyncBatchAnnotate"
108      body: "*"
109    };
110    option (google.api.method_signature) = "requests";
111    option (google.longrunning.operation_info) = {
112      response_type: "AsyncBatchAnnotateFilesResponse"
113      metadata_type: "OperationMetadata"
114    };
115  }
116}
117
118// The type of Google Cloud Vision API detection to perform, and the maximum
119// number of results to return for that type. Multiple `Feature` objects can
120// be specified in the `features` list.
121message Feature {
122  // Type of Google Cloud Vision API feature to be extracted.
123  enum Type {
124    // Unspecified feature type.
125    TYPE_UNSPECIFIED = 0;
126
127    // Run face detection.
128    FACE_DETECTION = 1;
129
130    // Run landmark detection.
131    LANDMARK_DETECTION = 2;
132
133    // Run logo detection.
134    LOGO_DETECTION = 3;
135
136    // Run label detection.
137    LABEL_DETECTION = 4;
138
139    // Run text detection / optical character recognition (OCR). Text detection
140    // is optimized for areas of text within a larger image; if the image is
141    // a document, use `DOCUMENT_TEXT_DETECTION` instead.
142    TEXT_DETECTION = 5;
143
144    // Run dense text document OCR. Takes precedence when both
145    // `DOCUMENT_TEXT_DETECTION` and `TEXT_DETECTION` are present.
146    DOCUMENT_TEXT_DETECTION = 11;
147
148    // Run Safe Search to detect potentially unsafe
149    // or undesirable content.
150    SAFE_SEARCH_DETECTION = 6;
151
152    // Compute a set of image properties, such as the
153    // image's dominant colors.
154    IMAGE_PROPERTIES = 7;
155
156    // Run crop hints.
157    CROP_HINTS = 9;
158
159    // Run web detection.
160    WEB_DETECTION = 10;
161
162    // Run Product Search.
163    PRODUCT_SEARCH = 12;
164
165    // Run localizer for object detection.
166    OBJECT_LOCALIZATION = 19;
167  }
168
169  // The feature type.
170  Type type = 1;
171
172  // Maximum number of results of this type. Does not apply to
173  // `TEXT_DETECTION`, `DOCUMENT_TEXT_DETECTION`, or `CROP_HINTS`.
174  int32 max_results = 2;
175
176  // Model to use for the feature.
177  // Supported values: "builtin/stable" (the default if unset) and
178  // "builtin/latest". `DOCUMENT_TEXT_DETECTION` and `TEXT_DETECTION` also
179  // support "builtin/weekly" for the bleeding edge release updated weekly.
180  string model = 3;
181}
182
183// External image source (Google Cloud Storage or web URL image location).
184message ImageSource {
185  // **Use `image_uri` instead.**
186  //
187  // The Google Cloud Storage  URI of the form
188  // `gs://bucket_name/object_name`. Object versioning is not supported. See
189  // [Google Cloud Storage Request
190  // URIs](https://cloud.google.com/storage/docs/reference-uris) for more info.
191  string gcs_image_uri = 1;
192
193  // The URI of the source image. Can be either:
194  //
195  // 1. A Google Cloud Storage URI of the form
196  //    `gs://bucket_name/object_name`. Object versioning is not supported. See
197  //    [Google Cloud Storage Request
198  //    URIs](https://cloud.google.com/storage/docs/reference-uris) for more
199  //    info.
200  //
201  // 2. A publicly-accessible image HTTP/HTTPS URL. When fetching images from
202  //    HTTP/HTTPS URLs, Google cannot guarantee that the request will be
203  //    completed. Your request may fail if the specified host denies the
204  //    request (e.g. due to request throttling or DOS prevention), or if Google
205  //    throttles requests to the site for abuse prevention. You should not
206  //    depend on externally-hosted images for production applications.
207  //
208  // When both `gcs_image_uri` and `image_uri` are specified, `image_uri` takes
209  // precedence.
210  string image_uri = 2;
211}
212
213// Client image to perform Google Cloud Vision API tasks over.
214message Image {
215  // Image content, represented as a stream of bytes.
216  // Note: As with all `bytes` fields, protobuffers use a pure binary
217  // representation, whereas JSON representations use base64.
218  bytes content = 1;
219
220  // Google Cloud Storage image location, or publicly-accessible image
221  // URL. If both `content` and `source` are provided for an image, `content`
222  // takes precedence and is used to perform the image annotation request.
223  ImageSource source = 2;
224}
225
226// A bucketized representation of likelihood, which is intended to give clients
227// highly stable results across model upgrades.
228enum Likelihood {
229  // Unknown likelihood.
230  UNKNOWN = 0;
231
232  // It is very unlikely.
233  VERY_UNLIKELY = 1;
234
235  // It is unlikely.
236  UNLIKELY = 2;
237
238  // It is possible.
239  POSSIBLE = 3;
240
241  // It is likely.
242  LIKELY = 4;
243
244  // It is very likely.
245  VERY_LIKELY = 5;
246}
247
248// A face annotation object contains the results of face detection.
249message FaceAnnotation {
250  // A face-specific landmark (for example, a face feature).
251  message Landmark {
252    // Face landmark (feature) type.
253    // Left and right are defined from the vantage of the viewer of the image
254    // without considering mirror projections typical of photos. So, `LEFT_EYE`,
255    // typically, is the person's right eye.
256    enum Type {
257      // Unknown face landmark detected. Should not be filled.
258      UNKNOWN_LANDMARK = 0;
259
260      // Left eye.
261      LEFT_EYE = 1;
262
263      // Right eye.
264      RIGHT_EYE = 2;
265
266      // Left of left eyebrow.
267      LEFT_OF_LEFT_EYEBROW = 3;
268
269      // Right of left eyebrow.
270      RIGHT_OF_LEFT_EYEBROW = 4;
271
272      // Left of right eyebrow.
273      LEFT_OF_RIGHT_EYEBROW = 5;
274
275      // Right of right eyebrow.
276      RIGHT_OF_RIGHT_EYEBROW = 6;
277
278      // Midpoint between eyes.
279      MIDPOINT_BETWEEN_EYES = 7;
280
281      // Nose tip.
282      NOSE_TIP = 8;
283
284      // Upper lip.
285      UPPER_LIP = 9;
286
287      // Lower lip.
288      LOWER_LIP = 10;
289
290      // Mouth left.
291      MOUTH_LEFT = 11;
292
293      // Mouth right.
294      MOUTH_RIGHT = 12;
295
296      // Mouth center.
297      MOUTH_CENTER = 13;
298
299      // Nose, bottom right.
300      NOSE_BOTTOM_RIGHT = 14;
301
302      // Nose, bottom left.
303      NOSE_BOTTOM_LEFT = 15;
304
305      // Nose, bottom center.
306      NOSE_BOTTOM_CENTER = 16;
307
308      // Left eye, top boundary.
309      LEFT_EYE_TOP_BOUNDARY = 17;
310
311      // Left eye, right corner.
312      LEFT_EYE_RIGHT_CORNER = 18;
313
314      // Left eye, bottom boundary.
315      LEFT_EYE_BOTTOM_BOUNDARY = 19;
316
317      // Left eye, left corner.
318      LEFT_EYE_LEFT_CORNER = 20;
319
320      // Right eye, top boundary.
321      RIGHT_EYE_TOP_BOUNDARY = 21;
322
323      // Right eye, right corner.
324      RIGHT_EYE_RIGHT_CORNER = 22;
325
326      // Right eye, bottom boundary.
327      RIGHT_EYE_BOTTOM_BOUNDARY = 23;
328
329      // Right eye, left corner.
330      RIGHT_EYE_LEFT_CORNER = 24;
331
332      // Left eyebrow, upper midpoint.
333      LEFT_EYEBROW_UPPER_MIDPOINT = 25;
334
335      // Right eyebrow, upper midpoint.
336      RIGHT_EYEBROW_UPPER_MIDPOINT = 26;
337
338      // Left ear tragion.
339      LEFT_EAR_TRAGION = 27;
340
341      // Right ear tragion.
342      RIGHT_EAR_TRAGION = 28;
343
344      // Left eye pupil.
345      LEFT_EYE_PUPIL = 29;
346
347      // Right eye pupil.
348      RIGHT_EYE_PUPIL = 30;
349
350      // Forehead glabella.
351      FOREHEAD_GLABELLA = 31;
352
353      // Chin gnathion.
354      CHIN_GNATHION = 32;
355
356      // Chin left gonion.
357      CHIN_LEFT_GONION = 33;
358
359      // Chin right gonion.
360      CHIN_RIGHT_GONION = 34;
361    }
362
363    // Face landmark type.
364    Type type = 3;
365
366    // Face landmark position.
367    Position position = 4;
368  }
369
370  // The bounding polygon around the face. The coordinates of the bounding box
371  // are in the original image's scale.
372  // The bounding box is computed to "frame" the face in accordance with human
373  // expectations. It is based on the landmarker results.
374  // Note that one or more x and/or y coordinates may not be generated in the
375  // `BoundingPoly` (the polygon will be unbounded) if only a partial face
376  // appears in the image to be annotated.
377  BoundingPoly bounding_poly = 1;
378
379  // The `fd_bounding_poly` bounding polygon is tighter than the
380  // `boundingPoly`, and encloses only the skin part of the face. Typically, it
381  // is used to eliminate the face from any image analysis that detects the
382  // "amount of skin" visible in an image. It is not based on the
383  // landmarker results, only on the initial face detection, hence
384  // the <code>fd</code> (face detection) prefix.
385  BoundingPoly fd_bounding_poly = 2;
386
387  // Detected face landmarks.
388  repeated Landmark landmarks = 3;
389
390  // Roll angle, which indicates the amount of clockwise/anti-clockwise rotation
391  // of the face relative to the image vertical about the axis perpendicular to
392  // the face. Range [-180,180].
393  float roll_angle = 4;
394
395  // Yaw angle, which indicates the leftward/rightward angle that the face is
396  // pointing relative to the vertical plane perpendicular to the image. Range
397  // [-180,180].
398  float pan_angle = 5;
399
400  // Pitch angle, which indicates the upwards/downwards angle that the face is
401  // pointing relative to the image's horizontal plane. Range [-180,180].
402  float tilt_angle = 6;
403
404  // Detection confidence. Range [0, 1].
405  float detection_confidence = 7;
406
407  // Face landmarking confidence. Range [0, 1].
408  float landmarking_confidence = 8;
409
410  // Joy likelihood.
411  Likelihood joy_likelihood = 9;
412
413  // Sorrow likelihood.
414  Likelihood sorrow_likelihood = 10;
415
416  // Anger likelihood.
417  Likelihood anger_likelihood = 11;
418
419  // Surprise likelihood.
420  Likelihood surprise_likelihood = 12;
421
422  // Under-exposed likelihood.
423  Likelihood under_exposed_likelihood = 13;
424
425  // Blurred likelihood.
426  Likelihood blurred_likelihood = 14;
427
428  // Headwear likelihood.
429  Likelihood headwear_likelihood = 15;
430
431  // Additional recognition information. Only computed if
432  // image_context.face_recognition_params is provided, **and** a match is found
433  // to a [Celebrity][google.cloud.vision.v1p4beta1.Celebrity] in the input
434  // [CelebritySet][google.cloud.vision.v1p4beta1.CelebritySet]. This field is
435  // sorted in order of decreasing confidence values.
436  repeated FaceRecognitionResult recognition_result = 16;
437}
438
439// Detected entity location information.
440message LocationInfo {
441  // lat/long location coordinates.
442  google.type.LatLng lat_lng = 1;
443}
444
445// A `Property` consists of a user-supplied name/value pair.
446message Property {
447  // Name of the property.
448  string name = 1;
449
450  // Value of the property.
451  string value = 2;
452
453  // Value of numeric properties.
454  uint64 uint64_value = 3;
455}
456
457// Set of detected entity features.
458message EntityAnnotation {
459  // Opaque entity ID. Some IDs may be available in
460  // [Google Knowledge Graph Search
461  // API](https://developers.google.com/knowledge-graph/).
462  string mid = 1;
463
464  // The language code for the locale in which the entity textual
465  // `description` is expressed.
466  string locale = 2;
467
468  // Entity textual description, expressed in its `locale` language.
469  string description = 3;
470
471  // Overall score of the result. Range [0, 1].
472  float score = 4;
473
474  // **Deprecated. Use `score` instead.**
475  // The accuracy of the entity detection in an image.
476  // For example, for an image in which the "Eiffel Tower" entity is detected,
477  // this field represents the confidence that there is a tower in the query
478  // image. Range [0, 1].
479  float confidence = 5 [deprecated = true];
480
481  // The relevancy of the ICA (Image Content Annotation) label to the
482  // image. For example, the relevancy of "tower" is likely higher to an image
483  // containing the detected "Eiffel Tower" than to an image containing a
484  // detected distant towering building, even though the confidence that
485  // there is a tower in each image may be the same. Range [0, 1].
486  float topicality = 6;
487
488  // Image region to which this entity belongs. Not produced
489  // for `LABEL_DETECTION` features.
490  BoundingPoly bounding_poly = 7;
491
492  // The location information for the detected entity. Multiple
493  // `LocationInfo` elements can be present because one location may
494  // indicate the location of the scene in the image, and another location
495  // may indicate the location of the place where the image was taken.
496  // Location information is usually present for landmarks.
497  repeated LocationInfo locations = 8;
498
499  // Some entities may have optional user-supplied `Property` (name/value)
500  // fields, such a score or string that qualifies the entity.
501  repeated Property properties = 9;
502}
503
504// Set of detected objects with bounding boxes.
505message LocalizedObjectAnnotation {
506  // Object ID that should align with EntityAnnotation mid.
507  string mid = 1;
508
509  // The BCP-47 language code, such as "en-US" or "sr-Latn". For more
510  // information, see
511  // http://www.unicode.org/reports/tr35/#Unicode_locale_identifier.
512  string language_code = 2;
513
514  // Object name, expressed in its `language_code` language.
515  string name = 3;
516
517  // Score of the result. Range [0, 1].
518  float score = 4;
519
520  // Image region to which this object belongs. This must be populated.
521  BoundingPoly bounding_poly = 5;
522}
523
524// Set of features pertaining to the image, computed by computer vision
525// methods over safe-search verticals (for example, adult, spoof, medical,
526// violence).
527message SafeSearchAnnotation {
528  // Represents the adult content likelihood for the image. Adult content may
529  // contain elements such as nudity, pornographic images or cartoons, or
530  // sexual activities.
531  Likelihood adult = 1;
532
533  // Spoof likelihood. The likelihood that an modification
534  // was made to the image's canonical version to make it appear
535  // funny or offensive.
536  Likelihood spoof = 2;
537
538  // Likelihood that this is a medical image.
539  Likelihood medical = 3;
540
541  // Likelihood that this image contains violent content.
542  Likelihood violence = 4;
543
544  // Likelihood that the request image contains racy content. Racy content may
545  // include (but is not limited to) skimpy or sheer clothing, strategically
546  // covered nudity, lewd or provocative poses, or close-ups of sensitive
547  // body areas.
548  Likelihood racy = 9;
549}
550
551// Rectangle determined by min and max `LatLng` pairs.
552message LatLongRect {
553  // Min lat/long pair.
554  google.type.LatLng min_lat_lng = 1;
555
556  // Max lat/long pair.
557  google.type.LatLng max_lat_lng = 2;
558}
559
560// Color information consists of RGB channels, score, and the fraction of
561// the image that the color occupies in the image.
562message ColorInfo {
563  // RGB components of the color.
564  google.type.Color color = 1;
565
566  // Image-specific score for this color. Value in range [0, 1].
567  float score = 2;
568
569  // The fraction of pixels the color occupies in the image.
570  // Value in range [0, 1].
571  float pixel_fraction = 3;
572}
573
574// Set of dominant colors and their corresponding scores.
575message DominantColorsAnnotation {
576  // RGB color values with their score and pixel fraction.
577  repeated ColorInfo colors = 1;
578}
579
580// Stores image properties, such as dominant colors.
581message ImageProperties {
582  // If present, dominant colors completed successfully.
583  DominantColorsAnnotation dominant_colors = 1;
584}
585
586// Single crop hint that is used to generate a new crop when serving an image.
587message CropHint {
588  // The bounding polygon for the crop region. The coordinates of the bounding
589  // box are in the original image's scale.
590  BoundingPoly bounding_poly = 1;
591
592  // Confidence of this being a salient region.  Range [0, 1].
593  float confidence = 2;
594
595  // Fraction of importance of this salient region with respect to the original
596  // image.
597  float importance_fraction = 3;
598}
599
600// Set of crop hints that are used to generate new crops when serving images.
601message CropHintsAnnotation {
602  // Crop hint results.
603  repeated CropHint crop_hints = 1;
604}
605
606// Parameters for crop hints annotation request.
607message CropHintsParams {
608  // Aspect ratios in floats, representing the ratio of the width to the height
609  // of the image. For example, if the desired aspect ratio is 4/3, the
610  // corresponding float value should be 1.33333.  If not specified, the
611  // best possible crop is returned. The number of provided aspect ratios is
612  // limited to a maximum of 16; any aspect ratios provided after the 16th are
613  // ignored.
614  repeated float aspect_ratios = 1;
615}
616
617// Parameters for web detection request.
618message WebDetectionParams {
619  // Whether to include results derived from the geo information in the image.
620  bool include_geo_results = 2;
621}
622
623// Parameters for text detections. This is used to control TEXT_DETECTION and
624// DOCUMENT_TEXT_DETECTION features.
625message TextDetectionParams {
626
627  // By default, Cloud Vision API only includes confidence score for
628  // DOCUMENT_TEXT_DETECTION result. Set the flag to true to include confidence
629  // score for TEXT_DETECTION as well.
630  bool enable_text_detection_confidence_score = 9;
631
632  // A list of advanced OCR options to fine-tune OCR behavior.
633  repeated string advanced_ocr_options = 11;
634}
635
636// Image context and/or feature-specific parameters.
637message ImageContext {
638  // Not used.
639  LatLongRect lat_long_rect = 1;
640
641  // List of languages to use for TEXT_DETECTION. In most cases, an empty value
642  // yields the best results since it enables automatic language detection. For
643  // languages based on the Latin alphabet, setting `language_hints` is not
644  // needed. In rare cases, when the language of the text in the image is known,
645  // setting a hint will help get better results (although it will be a
646  // significant hindrance if the hint is wrong). Text detection returns an
647  // error if one or more of the specified languages is not one of the
648  // [supported languages](https://cloud.google.com/vision/docs/languages).
649  repeated string language_hints = 2;
650
651  // Parameters for crop hints annotation request.
652  CropHintsParams crop_hints_params = 4;
653
654  // Parameters for face recognition.
655  FaceRecognitionParams face_recognition_params = 10;
656
657  // Parameters for product search.
658  ProductSearchParams product_search_params = 5;
659
660  // Parameters for web detection.
661  WebDetectionParams web_detection_params = 6;
662
663  // Parameters for text detection and document text detection.
664  TextDetectionParams text_detection_params = 12;
665}
666
667// Request for performing Google Cloud Vision API tasks over a user-provided
668// image, with user-requested features, and with context information.
669message AnnotateImageRequest {
670  // The image to be processed.
671  Image image = 1;
672
673  // Requested features.
674  repeated Feature features = 2;
675
676  // Additional context that may accompany the image.
677  ImageContext image_context = 3;
678}
679
680// If an image was produced from a file (e.g. a PDF), this message gives
681// information about the source of that image.
682message ImageAnnotationContext {
683  // The URI of the file used to produce the image.
684  string uri = 1;
685
686  // If the file was a PDF or TIFF, this field gives the page number within
687  // the file used to produce the image.
688  int32 page_number = 2;
689}
690
691// Response to an image annotation request.
692message AnnotateImageResponse {
693  // If present, face detection has completed successfully.
694  repeated FaceAnnotation face_annotations = 1;
695
696  // If present, landmark detection has completed successfully.
697  repeated EntityAnnotation landmark_annotations = 2;
698
699  // If present, logo detection has completed successfully.
700  repeated EntityAnnotation logo_annotations = 3;
701
702  // If present, label detection has completed successfully.
703  repeated EntityAnnotation label_annotations = 4;
704
705  // If present, localized object detection has completed successfully.
706  // This will be sorted descending by confidence score.
707  repeated LocalizedObjectAnnotation localized_object_annotations = 22;
708
709  // If present, text (OCR) detection has completed successfully.
710  repeated EntityAnnotation text_annotations = 5;
711
712  // If present, text (OCR) detection or document (OCR) text detection has
713  // completed successfully.
714  // This annotation provides the structural hierarchy for the OCR detected
715  // text.
716  TextAnnotation full_text_annotation = 12;
717
718  // If present, safe-search annotation has completed successfully.
719  SafeSearchAnnotation safe_search_annotation = 6;
720
721  // If present, image properties were extracted successfully.
722  ImageProperties image_properties_annotation = 8;
723
724  // If present, crop hints have completed successfully.
725  CropHintsAnnotation crop_hints_annotation = 11;
726
727  // If present, web detection has completed successfully.
728  WebDetection web_detection = 13;
729
730  // If present, product search has completed successfully.
731  ProductSearchResults product_search_results = 14;
732
733  // If set, represents the error message for the operation.
734  // Note that filled-in image annotations are guaranteed to be
735  // correct, even when `error` is set.
736  google.rpc.Status error = 9;
737
738  // If present, contextual information is needed to understand where this image
739  // comes from.
740  ImageAnnotationContext context = 21;
741}
742
743// Multiple image annotation requests are batched into a single service call.
744message BatchAnnotateImagesRequest {
745  // Required. Individual image annotation requests for this batch.
746  repeated AnnotateImageRequest requests = 1 [(google.api.field_behavior) = REQUIRED];
747}
748
749// Response to a batch image annotation request.
750message BatchAnnotateImagesResponse {
751  // Individual responses to image annotation requests within the batch.
752  repeated AnnotateImageResponse responses = 1;
753}
754
755// A request to annotate one single file, e.g. a PDF, TIFF or GIF file.
756message AnnotateFileRequest {
757  // Required. Information about the input file.
758  InputConfig input_config = 1;
759
760  // Required. Requested features.
761  repeated Feature features = 2;
762
763  // Additional context that may accompany the image(s) in the file.
764  ImageContext image_context = 3;
765
766  // Pages of the file to perform image annotation.
767  //
768  // Pages starts from 1, we assume the first page of the file is page 1.
769  // At most 5 pages are supported per request. Pages can be negative.
770  //
771  // Page 1 means the first page.
772  // Page 2 means the second page.
773  // Page -1 means the last page.
774  // Page -2 means the second to the last page.
775  //
776  // If the file is GIF instead of PDF or TIFF, page refers to GIF frames.
777  //
778  // If this field is empty, by default the service performs image annotation
779  // for the first 5 pages of the file.
780  repeated int32 pages = 4;
781}
782
783// Response to a single file annotation request. A file may contain one or more
784// images, which individually have their own responses.
785message AnnotateFileResponse {
786  // Information about the file for which this response is generated.
787  InputConfig input_config = 1;
788
789  // Individual responses to images found within the file. This field will be
790  // empty if the `error` field is set.
791  repeated AnnotateImageResponse responses = 2;
792
793  // This field gives the total number of pages in the file.
794  int32 total_pages = 3;
795
796  // If set, represents the error message for the failed request. The
797  // `responses` field will not be set in this case.
798  google.rpc.Status error = 4;
799}
800
801// A list of requests to annotate files using the BatchAnnotateFiles API.
802message BatchAnnotateFilesRequest {
803  // Required. The list of file annotation requests. Right now we support only
804  // one AnnotateFileRequest in BatchAnnotateFilesRequest.
805  repeated AnnotateFileRequest requests = 1
806      [(google.api.field_behavior) = REQUIRED];
807}
808
809// A list of file annotation responses.
810message BatchAnnotateFilesResponse {
811  // The list of file annotation responses, each response corresponding to each
812  // AnnotateFileRequest in BatchAnnotateFilesRequest.
813  repeated AnnotateFileResponse responses = 1;
814}
815
816// An offline file annotation request.
817message AsyncAnnotateFileRequest {
818  // Required. Information about the input file.
819  InputConfig input_config = 1;
820
821  // Required. Requested features.
822  repeated Feature features = 2;
823
824  // Additional context that may accompany the image(s) in the file.
825  ImageContext image_context = 3;
826
827  // Required. The desired output location and metadata (e.g. format).
828  OutputConfig output_config = 4;
829}
830
831// The response for a single offline file annotation request.
832message AsyncAnnotateFileResponse {
833  // The output location and metadata from AsyncAnnotateFileRequest.
834  OutputConfig output_config = 1;
835}
836
837// Request for async image annotation for a list of images.
838message AsyncBatchAnnotateImagesRequest {
839  // Required. Individual image annotation requests for this batch.
840  repeated AnnotateImageRequest requests = 1
841      [(google.api.field_behavior) = REQUIRED];
842
843  // Required. The desired output location and metadata (e.g. format).
844  OutputConfig output_config = 2 [(google.api.field_behavior) = REQUIRED];
845}
846
847// Response to an async batch image annotation request.
848message AsyncBatchAnnotateImagesResponse {
849  // The output location and metadata from AsyncBatchAnnotateImagesRequest.
850  OutputConfig output_config = 1;
851}
852
853// Multiple async file annotation requests are batched into a single service
854// call.
855message AsyncBatchAnnotateFilesRequest {
856  // Required. Individual async file annotation requests for this batch.
857  repeated AsyncAnnotateFileRequest requests = 1
858      [(google.api.field_behavior) = REQUIRED];
859}
860
861// Response to an async batch file annotation request.
862message AsyncBatchAnnotateFilesResponse {
863  // The list of file annotation responses, one for each request in
864  // AsyncBatchAnnotateFilesRequest.
865  repeated AsyncAnnotateFileResponse responses = 1;
866}
867
868// The desired input location and metadata.
869message InputConfig {
870  // The Google Cloud Storage location to read the input from.
871  GcsSource gcs_source = 1;
872
873  // File content, represented as a stream of bytes.
874  // Note: As with all `bytes` fields, protobuffers use a pure binary
875  // representation, whereas JSON representations use base64.
876  //
877  // Currently, this field only works for BatchAnnotateFiles requests. It does
878  // not work for AsyncBatchAnnotateFiles requests.
879  bytes content = 3;
880
881  // The type of the file. Currently only "application/pdf", "image/tiff" and
882  // "image/gif" are supported. Wildcards are not supported.
883  string mime_type = 2;
884}
885
886// The desired output location and metadata.
887message OutputConfig {
888  // The Google Cloud Storage location to write the output(s) to.
889  GcsDestination gcs_destination = 1;
890
891  // The max number of response protos to put into each output JSON file on
892  // Google Cloud Storage.
893  // The valid range is [1, 100]. If not specified, the default value is 20.
894  //
895  // For example, for one pdf file with 100 pages, 100 response protos will
896  // be generated. If `batch_size` = 20, then 5 json files each
897  // containing 20 response protos will be written under the prefix
898  // `gcs_destination`.`uri`.
899  //
900  // Currently, batch_size only applies to GcsDestination, with potential future
901  // support for other output configurations.
902  int32 batch_size = 2;
903}
904
905// The Google Cloud Storage location where the input will be read from.
906message GcsSource {
907  // Google Cloud Storage URI for the input file. This must only be a
908  // Google Cloud Storage object. Wildcards are not currently supported.
909  string uri = 1;
910}
911
912// The Google Cloud Storage location where the output will be written to.
913message GcsDestination {
914  // Google Cloud Storage URI prefix where the results will be stored. Results
915  // will be in JSON format and preceded by its corresponding input URI prefix.
916  // This field can either represent a gcs file prefix or gcs directory. In
917  // either case, the uri should be unique because in order to get all of the
918  // output files, you will need to do a wildcard gcs search on the uri prefix
919  // you provide.
920  //
921  // Examples:
922  //
923  // *    File Prefix: gs://bucket-name/here/filenameprefix   The output files
924  // will be created in gs://bucket-name/here/ and the names of the
925  // output files will begin with "filenameprefix".
926  //
927  // *    Directory Prefix: gs://bucket-name/some/location/   The output files
928  // will be created in gs://bucket-name/some/location/ and the names of the
929  // output files could be anything because there was no filename prefix
930  // specified.
931  //
932  // If multiple outputs, each response is still AnnotateFileResponse, each of
933  // which contains some subset of the full list of AnnotateImageResponse.
934  // Multiple outputs can happen if, for example, the output JSON is too large
935  // and overflows into multiple sharded files.
936  string uri = 1;
937}
938
939// Contains metadata for the BatchAnnotateImages operation.
940message OperationMetadata {
941  // Batch operation states.
942  enum State {
943    // Invalid.
944    STATE_UNSPECIFIED = 0;
945
946    // Request is received.
947    CREATED = 1;
948
949    // Request is actively being processed.
950    RUNNING = 2;
951
952    // The batch processing is done.
953    DONE = 3;
954
955    // The batch processing was cancelled.
956    CANCELLED = 4;
957  }
958
959  // Current state of the batch operation.
960  State state = 1;
961
962  // The time when the batch request was received.
963  google.protobuf.Timestamp create_time = 5;
964
965  // The time when the operation result was last updated.
966  google.protobuf.Timestamp update_time = 6;
967}
968