xref: /aosp_15_r20/external/googleapis/google/cloud/vision/v1p3beta1/image_annotator.proto (revision d5c09012810ac0c9f33fe448fb6da8260d444cc9)
1// Copyright 2019 Google LLC.
2//
3// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at
6//
7//     http://www.apache.org/licenses/LICENSE-2.0
8//
9// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS,
11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12// See the License for the specific language governing permissions and
13// limitations under the License.
14//
15
16syntax = "proto3";
17
18package google.cloud.vision.v1p3beta1;
19
20import "google/api/annotations.proto";
21import "google/api/client.proto";
22import "google/api/field_behavior.proto";
23import "google/cloud/vision/v1p3beta1/geometry.proto";
24import "google/cloud/vision/v1p3beta1/product_search.proto";
25import "google/cloud/vision/v1p3beta1/text_annotation.proto";
26import "google/cloud/vision/v1p3beta1/web_detection.proto";
27import "google/longrunning/operations.proto";
28import "google/protobuf/timestamp.proto";
29import "google/rpc/status.proto";
30import "google/type/color.proto";
31import "google/type/latlng.proto";
32
33option cc_enable_arenas = true;
34option go_package = "cloud.google.com/go/vision/apiv1p3beta1/visionpb;visionpb";
35option java_multiple_files = true;
36option java_outer_classname = "ImageAnnotatorProto";
37option java_package = "com.google.cloud.vision.v1p3beta1";
38
39// Service that performs Google Cloud Vision API detection tasks over client
40// images, such as face, landmark, logo, label, and text detection. The
41// ImageAnnotator service returns detected entities from the images.
42service ImageAnnotator {
43  option (google.api.default_host) = "vision.googleapis.com";
44  option (google.api.oauth_scopes) =
45      "https://www.googleapis.com/auth/cloud-platform,"
46      "https://www.googleapis.com/auth/cloud-vision";
47
48  // Run image detection and annotation for a batch of images.
49  rpc BatchAnnotateImages(BatchAnnotateImagesRequest) returns (BatchAnnotateImagesResponse) {
50    option (google.api.http) = {
51      post: "/v1p3beta1/images:annotate"
52      body: "*"
53    };
54    option (google.api.method_signature) = "requests";
55  }
56
57  // Run asynchronous image detection and annotation for a list of generic
58  // files, such as PDF files, which may contain multiple pages and multiple
59  // images per page. Progress and results can be retrieved through the
60  // `google.longrunning.Operations` interface.
61  // `Operation.metadata` contains `OperationMetadata` (metadata).
62  // `Operation.response` contains `AsyncBatchAnnotateFilesResponse` (results).
63  rpc AsyncBatchAnnotateFiles(AsyncBatchAnnotateFilesRequest)
64      returns (google.longrunning.Operation) {
65    option (google.api.http) = {
66      post: "/v1p3beta1/files:asyncBatchAnnotate"
67      body: "*"
68    };
69    option (google.api.method_signature) = "requests";
70    option (google.longrunning.operation_info) = {
71      response_type: "AsyncBatchAnnotateFilesResponse"
72      metadata_type: "OperationMetadata"
73    };
74  }
75}
76
77// The type of Google Cloud Vision API detection to perform, and the maximum
78// number of results to return for that type. Multiple `Feature` objects can
79// be specified in the `features` list.
80message Feature {
81  // Type of Google Cloud Vision API feature to be extracted.
82  enum Type {
83    // Unspecified feature type.
84    TYPE_UNSPECIFIED = 0;
85
86    // Run face detection.
87    FACE_DETECTION = 1;
88
89    // Run landmark detection.
90    LANDMARK_DETECTION = 2;
91
92    // Run logo detection.
93    LOGO_DETECTION = 3;
94
95    // Run label detection.
96    LABEL_DETECTION = 4;
97
98    // Run text detection / optical character recognition (OCR). Text detection
99    // is optimized for areas of text within a larger image; if the image is
100    // a document, use `DOCUMENT_TEXT_DETECTION` instead.
101    TEXT_DETECTION = 5;
102
103    // Run dense text document OCR. Takes precedence when both
104    // `DOCUMENT_TEXT_DETECTION` and `TEXT_DETECTION` are present.
105    DOCUMENT_TEXT_DETECTION = 11;
106
107    // Run Safe Search to detect potentially unsafe
108    // or undesirable content.
109    SAFE_SEARCH_DETECTION = 6;
110
111    // Compute a set of image properties, such as the
112    // image's dominant colors.
113    IMAGE_PROPERTIES = 7;
114
115    // Run crop hints.
116    CROP_HINTS = 9;
117
118    // Run web detection.
119    WEB_DETECTION = 10;
120
121    // Run Product Search.
122    PRODUCT_SEARCH = 12;
123
124    // Run localizer for object detection.
125    OBJECT_LOCALIZATION = 19;
126  }
127
128  // The feature type.
129  Type type = 1;
130
131  // Maximum number of results of this type. Does not apply to
132  // `TEXT_DETECTION`, `DOCUMENT_TEXT_DETECTION`, or `CROP_HINTS`.
133  int32 max_results = 2;
134
135  // Model to use for the feature.
136  // Supported values: "builtin/stable" (the default if unset) and
137  // "builtin/latest". `DOCUMENT_TEXT_DETECTION` and `TEXT_DETECTION` also
138  // support "builtin/weekly" for the bleeding edge release updated weekly.
139  string model = 3;
140}
141
142// External image source (Google Cloud Storage or web URL image location).
143message ImageSource {
144  // **Use `image_uri` instead.**
145  //
146  // The Google Cloud Storage  URI of the form
147  // `gs://bucket_name/object_name`. Object versioning is not supported. See
148  // [Google Cloud Storage Request
149  // URIs](https://cloud.google.com/storage/docs/reference-uris) for more info.
150  string gcs_image_uri = 1;
151
152  // The URI of the source image. Can be either:
153  //
154  // 1. A Google Cloud Storage URI of the form
155  //    `gs://bucket_name/object_name`. Object versioning is not supported. See
156  //    [Google Cloud Storage Request
157  //    URIs](https://cloud.google.com/storage/docs/reference-uris) for more
158  //    info.
159  //
160  // 2. A publicly-accessible image HTTP/HTTPS URL. When fetching images from
161  //    HTTP/HTTPS URLs, Google cannot guarantee that the request will be
162  //    completed. Your request may fail if the specified host denies the
163  //    request (e.g. due to request throttling or DOS prevention), or if Google
164  //    throttles requests to the site for abuse prevention. You should not
165  //    depend on externally-hosted images for production applications.
166  //
167  // When both `gcs_image_uri` and `image_uri` are specified, `image_uri` takes
168  // precedence.
169  string image_uri = 2;
170}
171
172// Client image to perform Google Cloud Vision API tasks over.
173message Image {
174  // Image content, represented as a stream of bytes.
175  // Note: As with all `bytes` fields, protobuffers use a pure binary
176  // representation, whereas JSON representations use base64.
177  bytes content = 1;
178
179  // Google Cloud Storage image location, or publicly-accessible image
180  // URL. If both `content` and `source` are provided for an image, `content`
181  // takes precedence and is used to perform the image annotation request.
182  ImageSource source = 2;
183}
184
185// A face annotation object contains the results of face detection.
186message FaceAnnotation {
187  // A face-specific landmark (for example, a face feature).
188  message Landmark {
189    // Face landmark (feature) type.
190    // Left and right are defined from the vantage of the viewer of the image
191    // without considering mirror projections typical of photos. So, `LEFT_EYE`,
192    // typically, is the person's right eye.
193    enum Type {
194      // Unknown face landmark detected. Should not be filled.
195      UNKNOWN_LANDMARK = 0;
196
197      // Left eye.
198      LEFT_EYE = 1;
199
200      // Right eye.
201      RIGHT_EYE = 2;
202
203      // Left of left eyebrow.
204      LEFT_OF_LEFT_EYEBROW = 3;
205
206      // Right of left eyebrow.
207      RIGHT_OF_LEFT_EYEBROW = 4;
208
209      // Left of right eyebrow.
210      LEFT_OF_RIGHT_EYEBROW = 5;
211
212      // Right of right eyebrow.
213      RIGHT_OF_RIGHT_EYEBROW = 6;
214
215      // Midpoint between eyes.
216      MIDPOINT_BETWEEN_EYES = 7;
217
218      // Nose tip.
219      NOSE_TIP = 8;
220
221      // Upper lip.
222      UPPER_LIP = 9;
223
224      // Lower lip.
225      LOWER_LIP = 10;
226
227      // Mouth left.
228      MOUTH_LEFT = 11;
229
230      // Mouth right.
231      MOUTH_RIGHT = 12;
232
233      // Mouth center.
234      MOUTH_CENTER = 13;
235
236      // Nose, bottom right.
237      NOSE_BOTTOM_RIGHT = 14;
238
239      // Nose, bottom left.
240      NOSE_BOTTOM_LEFT = 15;
241
242      // Nose, bottom center.
243      NOSE_BOTTOM_CENTER = 16;
244
245      // Left eye, top boundary.
246      LEFT_EYE_TOP_BOUNDARY = 17;
247
248      // Left eye, right corner.
249      LEFT_EYE_RIGHT_CORNER = 18;
250
251      // Left eye, bottom boundary.
252      LEFT_EYE_BOTTOM_BOUNDARY = 19;
253
254      // Left eye, left corner.
255      LEFT_EYE_LEFT_CORNER = 20;
256
257      // Right eye, top boundary.
258      RIGHT_EYE_TOP_BOUNDARY = 21;
259
260      // Right eye, right corner.
261      RIGHT_EYE_RIGHT_CORNER = 22;
262
263      // Right eye, bottom boundary.
264      RIGHT_EYE_BOTTOM_BOUNDARY = 23;
265
266      // Right eye, left corner.
267      RIGHT_EYE_LEFT_CORNER = 24;
268
269      // Left eyebrow, upper midpoint.
270      LEFT_EYEBROW_UPPER_MIDPOINT = 25;
271
272      // Right eyebrow, upper midpoint.
273      RIGHT_EYEBROW_UPPER_MIDPOINT = 26;
274
275      // Left ear tragion.
276      LEFT_EAR_TRAGION = 27;
277
278      // Right ear tragion.
279      RIGHT_EAR_TRAGION = 28;
280
281      // Left eye pupil.
282      LEFT_EYE_PUPIL = 29;
283
284      // Right eye pupil.
285      RIGHT_EYE_PUPIL = 30;
286
287      // Forehead glabella.
288      FOREHEAD_GLABELLA = 31;
289
290      // Chin gnathion.
291      CHIN_GNATHION = 32;
292
293      // Chin left gonion.
294      CHIN_LEFT_GONION = 33;
295
296      // Chin right gonion.
297      CHIN_RIGHT_GONION = 34;
298    }
299
300    // Face landmark type.
301    Type type = 3;
302
303    // Face landmark position.
304    Position position = 4;
305  }
306
307  // The bounding polygon around the face. The coordinates of the bounding box
308  // are in the original image's scale, as returned in `ImageParams`.
309  // The bounding box is computed to "frame" the face in accordance with human
310  // expectations. It is based on the landmarker results.
311  // Note that one or more x and/or y coordinates may not be generated in the
312  // `BoundingPoly` (the polygon will be unbounded) if only a partial face
313  // appears in the image to be annotated.
314  BoundingPoly bounding_poly = 1;
315
316  // The `fd_bounding_poly` bounding polygon is tighter than the
317  // `boundingPoly`, and encloses only the skin part of the face. Typically, it
318  // is used to eliminate the face from any image analysis that detects the
319  // "amount of skin" visible in an image. It is not based on the
320  // landmarker results, only on the initial face detection, hence
321  // the <code>fd</code> (face detection) prefix.
322  BoundingPoly fd_bounding_poly = 2;
323
324  // Detected face landmarks.
325  repeated Landmark landmarks = 3;
326
327  // Roll angle, which indicates the amount of clockwise/anti-clockwise rotation
328  // of the face relative to the image vertical about the axis perpendicular to
329  // the face. Range [-180,180].
330  float roll_angle = 4;
331
332  // Yaw angle, which indicates the leftward/rightward angle that the face is
333  // pointing relative to the vertical plane perpendicular to the image. Range
334  // [-180,180].
335  float pan_angle = 5;
336
337  // Pitch angle, which indicates the upwards/downwards angle that the face is
338  // pointing relative to the image's horizontal plane. Range [-180,180].
339  float tilt_angle = 6;
340
341  // Detection confidence. Range [0, 1].
342  float detection_confidence = 7;
343
344  // Face landmarking confidence. Range [0, 1].
345  float landmarking_confidence = 8;
346
347  // Joy likelihood.
348  Likelihood joy_likelihood = 9;
349
350  // Sorrow likelihood.
351  Likelihood sorrow_likelihood = 10;
352
353  // Anger likelihood.
354  Likelihood anger_likelihood = 11;
355
356  // Surprise likelihood.
357  Likelihood surprise_likelihood = 12;
358
359  // Under-exposed likelihood.
360  Likelihood under_exposed_likelihood = 13;
361
362  // Blurred likelihood.
363  Likelihood blurred_likelihood = 14;
364
365  // Headwear likelihood.
366  Likelihood headwear_likelihood = 15;
367}
368
369// Detected entity location information.
370message LocationInfo {
371  // lat/long location coordinates.
372  google.type.LatLng lat_lng = 1;
373}
374
375// A `Property` consists of a user-supplied name/value pair.
376message Property {
377  // Name of the property.
378  string name = 1;
379
380  // Value of the property.
381  string value = 2;
382
383  // Value of numeric properties.
384  uint64 uint64_value = 3;
385}
386
387// Set of detected entity features.
388message EntityAnnotation {
389  // Opaque entity ID. Some IDs may be available in
390  // [Google Knowledge Graph Search
391  // API](https://developers.google.com/knowledge-graph/).
392  string mid = 1;
393
394  // The language code for the locale in which the entity textual
395  // `description` is expressed.
396  string locale = 2;
397
398  // Entity textual description, expressed in its `locale` language.
399  string description = 3;
400
401  // Overall score of the result. Range [0, 1].
402  float score = 4;
403
404  // **Deprecated. Use `score` instead.**
405  // The accuracy of the entity detection in an image.
406  // For example, for an image in which the "Eiffel Tower" entity is detected,
407  // this field represents the confidence that there is a tower in the query
408  // image. Range [0, 1].
409  float confidence = 5;
410
411  // The relevancy of the ICA (Image Content Annotation) label to the
412  // image. For example, the relevancy of "tower" is likely higher to an image
413  // containing the detected "Eiffel Tower" than to an image containing a
414  // detected distant towering building, even though the confidence that
415  // there is a tower in each image may be the same. Range [0, 1].
416  float topicality = 6;
417
418  // Image region to which this entity belongs. Not produced
419  // for `LABEL_DETECTION` features.
420  BoundingPoly bounding_poly = 7;
421
422  // The location information for the detected entity. Multiple
423  // `LocationInfo` elements can be present because one location may
424  // indicate the location of the scene in the image, and another location
425  // may indicate the location of the place where the image was taken.
426  // Location information is usually present for landmarks.
427  repeated LocationInfo locations = 8;
428
429  // Some entities may have optional user-supplied `Property` (name/value)
430  // fields, such a score or string that qualifies the entity.
431  repeated Property properties = 9;
432}
433
434// Set of detected objects with bounding boxes.
435message LocalizedObjectAnnotation {
436  // Object ID that should align with EntityAnnotation mid.
437  string mid = 1;
438
439  // The BCP-47 language code, such as "en-US" or "sr-Latn". For more
440  // information, see
441  // http://www.unicode.org/reports/tr35/#Unicode_locale_identifier.
442  string language_code = 2;
443
444  // Object name, expressed in its `language_code` language.
445  string name = 3;
446
447  // Score of the result. Range [0, 1].
448  float score = 4;
449
450  // Image region to which this object belongs. This must be populated.
451  BoundingPoly bounding_poly = 5;
452}
453
454// Set of features pertaining to the image, computed by computer vision
455// methods over safe-search verticals (for example, adult, spoof, medical,
456// violence).
457message SafeSearchAnnotation {
458  // Represents the adult content likelihood for the image. Adult content may
459  // contain elements such as nudity, pornographic images or cartoons, or
460  // sexual activities.
461  Likelihood adult = 1;
462
463  // Spoof likelihood. The likelihood that an modification
464  // was made to the image's canonical version to make it appear
465  // funny or offensive.
466  Likelihood spoof = 2;
467
468  // Likelihood that this is a medical image.
469  Likelihood medical = 3;
470
471  // Likelihood that this image contains violent content.
472  Likelihood violence = 4;
473
474  // Likelihood that the request image contains racy content. Racy content may
475  // include (but is not limited to) skimpy or sheer clothing, strategically
476  // covered nudity, lewd or provocative poses, or close-ups of sensitive
477  // body areas.
478  Likelihood racy = 9;
479}
480
481// Rectangle determined by min and max `LatLng` pairs.
482message LatLongRect {
483  // Min lat/long pair.
484  google.type.LatLng min_lat_lng = 1;
485
486  // Max lat/long pair.
487  google.type.LatLng max_lat_lng = 2;
488}
489
490// Color information consists of RGB channels, score, and the fraction of
491// the image that the color occupies in the image.
492message ColorInfo {
493  // RGB components of the color.
494  google.type.Color color = 1;
495
496  // Image-specific score for this color. Value in range [0, 1].
497  float score = 2;
498
499  // The fraction of pixels the color occupies in the image.
500  // Value in range [0, 1].
501  float pixel_fraction = 3;
502}
503
504// Set of dominant colors and their corresponding scores.
505message DominantColorsAnnotation {
506  // RGB color values with their score and pixel fraction.
507  repeated ColorInfo colors = 1;
508}
509
510// Stores image properties, such as dominant colors.
511message ImageProperties {
512  // If present, dominant colors completed successfully.
513  DominantColorsAnnotation dominant_colors = 1;
514}
515
516// Single crop hint that is used to generate a new crop when serving an image.
517message CropHint {
518  // The bounding polygon for the crop region. The coordinates of the bounding
519  // box are in the original image's scale, as returned in `ImageParams`.
520  BoundingPoly bounding_poly = 1;
521
522  // Confidence of this being a salient region.  Range [0, 1].
523  float confidence = 2;
524
525  // Fraction of importance of this salient region with respect to the original
526  // image.
527  float importance_fraction = 3;
528}
529
530// Set of crop hints that are used to generate new crops when serving images.
531message CropHintsAnnotation {
532  // Crop hint results.
533  repeated CropHint crop_hints = 1;
534}
535
536// Parameters for crop hints annotation request.
537message CropHintsParams {
538  // Aspect ratios in floats, representing the ratio of the width to the height
539  // of the image. For example, if the desired aspect ratio is 4/3, the
540  // corresponding float value should be 1.33333.  If not specified, the
541  // best possible crop is returned. The number of provided aspect ratios is
542  // limited to a maximum of 16; any aspect ratios provided after the 16th are
543  // ignored.
544  repeated float aspect_ratios = 1;
545}
546
547// Parameters for web detection request.
548message WebDetectionParams {
549  // Whether to include results derived from the geo information in the image.
550  bool include_geo_results = 2;
551}
552
553// Parameters for text detections. This is used to control TEXT_DETECTION and
554// DOCUMENT_TEXT_DETECTION features.
555message TextDetectionParams {
556
557  // By default, Cloud Vision API only includes confidence score for
558  // DOCUMENT_TEXT_DETECTION result. Set the flag to true to include confidence
559  // score for TEXT_DETECTION as well.
560  bool enable_text_detection_confidence_score = 9;
561
562  // A list of advanced OCR options to fine-tune OCR behavior.
563  repeated string advanced_ocr_options = 11;
564}
565
566// Image context and/or feature-specific parameters.
567message ImageContext {
568  // Not used.
569  LatLongRect lat_long_rect = 1;
570
571  // List of languages to use for TEXT_DETECTION. In most cases, an empty value
572  // yields the best results since it enables automatic language detection. For
573  // languages based on the Latin alphabet, setting `language_hints` is not
574  // needed. In rare cases, when the language of the text in the image is known,
575  // setting a hint will help get better results (although it will be a
576  // significant hindrance if the hint is wrong). Text detection returns an
577  // error if one or more of the specified languages is not one of the
578  // [supported languages](https://cloud.google.com/vision/docs/languages).
579  repeated string language_hints = 2;
580
581  // Parameters for crop hints annotation request.
582  CropHintsParams crop_hints_params = 4;
583
584  // Parameters for product search.
585  google.cloud.vision.v1p3beta1.ProductSearchParams product_search_params = 5;
586
587  // Parameters for web detection.
588  WebDetectionParams web_detection_params = 6;
589
590  // Parameters for text detection and document text detection.
591  TextDetectionParams text_detection_params = 12;
592}
593
594// Request for performing Google Cloud Vision API tasks over a user-provided
595// image, with user-requested features.
596message AnnotateImageRequest {
597  // The image to be processed.
598  Image image = 1;
599
600  // Requested features.
601  repeated Feature features = 2;
602
603  // Additional context that may accompany the image.
604  ImageContext image_context = 3;
605}
606
607// If an image was produced from a file (e.g. a PDF), this message gives
608// information about the source of that image.
609message ImageAnnotationContext {
610  // The URI of the file used to produce the image.
611  string uri = 1;
612
613  // If the file was a PDF or TIFF, this field gives the page number within
614  // the file used to produce the image.
615  int32 page_number = 2;
616}
617
618// Response to an image annotation request.
619message AnnotateImageResponse {
620  // If present, face detection has completed successfully.
621  repeated FaceAnnotation face_annotations = 1;
622
623  // If present, landmark detection has completed successfully.
624  repeated EntityAnnotation landmark_annotations = 2;
625
626  // If present, logo detection has completed successfully.
627  repeated EntityAnnotation logo_annotations = 3;
628
629  // If present, label detection has completed successfully.
630  repeated EntityAnnotation label_annotations = 4;
631
632  // If present, localized object detection has completed successfully.
633  // This will be sorted descending by confidence score.
634  repeated LocalizedObjectAnnotation localized_object_annotations = 22;
635
636  // If present, text (OCR) detection has completed successfully.
637  repeated EntityAnnotation text_annotations = 5;
638
639  // If present, text (OCR) detection or document (OCR) text detection has
640  // completed successfully.
641  // This annotation provides the structural hierarchy for the OCR detected
642  // text.
643  TextAnnotation full_text_annotation = 12;
644
645  // If present, safe-search annotation has completed successfully.
646  SafeSearchAnnotation safe_search_annotation = 6;
647
648  // If present, image properties were extracted successfully.
649  ImageProperties image_properties_annotation = 8;
650
651  // If present, crop hints have completed successfully.
652  CropHintsAnnotation crop_hints_annotation = 11;
653
654  // If present, web detection has completed successfully.
655  WebDetection web_detection = 13;
656
657  // If present, product search has completed successfully.
658  google.cloud.vision.v1p3beta1.ProductSearchResults product_search_results =
659      14;
660
661  // If set, represents the error message for the operation.
662  // Note that filled-in image annotations are guaranteed to be
663  // correct, even when `error` is set.
664  google.rpc.Status error = 9;
665
666  // If present, contextual information is needed to understand where this image
667  // comes from.
668  ImageAnnotationContext context = 21;
669}
670
671// Response to a single file annotation request. A file may contain one or more
672// images, which individually have their own responses.
673message AnnotateFileResponse {
674  // Information about the file for which this response is generated.
675  InputConfig input_config = 1;
676
677  // Individual responses to images found within the file.
678  repeated AnnotateImageResponse responses = 2;
679}
680
681// Multiple image annotation requests are batched into a single service call.
682message BatchAnnotateImagesRequest {
683  // Individual image annotation requests for this batch.
684  repeated AnnotateImageRequest requests = 1 [(google.api.field_behavior) = REQUIRED];
685}
686
687// Response to a batch image annotation request.
688message BatchAnnotateImagesResponse {
689  // Individual responses to image annotation requests within the batch.
690  repeated AnnotateImageResponse responses = 1;
691}
692
693// An offline file annotation request.
694message AsyncAnnotateFileRequest {
695  // Required. Information about the input file.
696  InputConfig input_config = 1;
697
698  // Required. Requested features.
699  repeated Feature features = 2;
700
701  // Additional context that may accompany the image(s) in the file.
702  ImageContext image_context = 3;
703
704  // Required. The desired output location and metadata (e.g. format).
705  OutputConfig output_config = 4;
706}
707
708// The response for a single offline file annotation request.
709message AsyncAnnotateFileResponse {
710  // The output location and metadata from AsyncAnnotateFileRequest.
711  OutputConfig output_config = 1;
712}
713
714// Multiple async file annotation requests are batched into a single service
715// call.
716message AsyncBatchAnnotateFilesRequest {
717  // Required. Individual async file annotation requests for this batch.
718  repeated AsyncAnnotateFileRequest requests = 1 [(google.api.field_behavior) = REQUIRED];
719}
720
721// Response to an async batch file annotation request.
722message AsyncBatchAnnotateFilesResponse {
723  // The list of file annotation responses, one for each request in
724  // AsyncBatchAnnotateFilesRequest.
725  repeated AsyncAnnotateFileResponse responses = 1;
726}
727
728// The desired input location and metadata.
729message InputConfig {
730  // The Google Cloud Storage location to read the input from.
731  GcsSource gcs_source = 1;
732
733  // The type of the file. Currently only "application/pdf" and "image/tiff"
734  // are supported. Wildcards are not supported.
735  string mime_type = 2;
736}
737
738// The desired output location and metadata.
739message OutputConfig {
740  // The Google Cloud Storage location to write the output(s) to.
741  GcsDestination gcs_destination = 1;
742
743  // The max number of response protos to put into each output JSON file on
744  // Google Cloud Storage.
745  // The valid range is [1, 100]. If not specified, the default value is 20.
746  //
747  // For example, for one pdf file with 100 pages, 100 response protos will
748  // be generated. If `batch_size` = 20, then 5 json files each
749  // containing 20 response protos will be written under the prefix
750  // `gcs_destination`.`uri`.
751  //
752  // Currently, batch_size only applies to GcsDestination, with potential future
753  // support for other output configurations.
754  int32 batch_size = 2;
755}
756
757// The Google Cloud Storage location where the input will be read from.
758message GcsSource {
759  // Google Cloud Storage URI for the input file. This must only be a
760  // Google Cloud Storage object. Wildcards are not currently supported.
761  string uri = 1;
762}
763
764// The Google Cloud Storage location where the output will be written to.
765message GcsDestination {
766  // Google Cloud Storage URI where the results will be stored. Results will
767  // be in JSON format and preceded by its corresponding input URI. This field
768  // can either represent a single file, or a prefix for multiple outputs.
769  // Prefixes must end in a `/`.
770  //
771  // Examples:
772  //
773  // *    File: gs://bucket-name/filename.json
774  // *    Prefix: gs://bucket-name/prefix/here/
775  // *    File: gs://bucket-name/prefix/here
776  //
777  // If multiple outputs, each response is still AnnotateFileResponse, each of
778  // which contains some subset of the full list of AnnotateImageResponse.
779  // Multiple outputs can happen if, for example, the output JSON is too large
780  // and overflows into multiple sharded files.
781  string uri = 1;
782}
783
784// A bucketized representation of likelihood, which is intended to give clients
785// highly stable results across model upgrades.
786enum Likelihood {
787  // Unknown likelihood.
788  UNKNOWN = 0;
789
790  // It is very unlikely that the image belongs to the specified vertical.
791  VERY_UNLIKELY = 1;
792
793  // It is unlikely that the image belongs to the specified vertical.
794  UNLIKELY = 2;
795
796  // It is possible that the image belongs to the specified vertical.
797  POSSIBLE = 3;
798
799  // It is likely that the image belongs to the specified vertical.
800  LIKELY = 4;
801
802  // It is very likely that the image belongs to the specified vertical.
803  VERY_LIKELY = 5;
804}
805
806// Contains metadata for the BatchAnnotateImages operation.
807message OperationMetadata {
808  // Batch operation states.
809  enum State {
810    // Invalid.
811    STATE_UNSPECIFIED = 0;
812
813    // Request is received.
814    CREATED = 1;
815
816    // Request is actively being processed.
817    RUNNING = 2;
818
819    // The batch processing is done.
820    DONE = 3;
821
822    // The batch processing was cancelled.
823    CANCELLED = 4;
824  }
825  // Current state of the batch operation.
826  State state = 1;
827
828  // The time when the batch request was received.
829  google.protobuf.Timestamp create_time = 5;
830
831  // The time when the operation result was last updated.
832  google.protobuf.Timestamp update_time = 6;
833}
834