xref: /aosp_15_r20/external/googleapis/google/cloud/vision/v1p2beta1/image_annotator.proto (revision d5c09012810ac0c9f33fe448fb6da8260d444cc9)
1// Copyright 2019 Google LLC.
2//
3// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at
6//
7//     http://www.apache.org/licenses/LICENSE-2.0
8//
9// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS,
11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12// See the License for the specific language governing permissions and
13// limitations under the License.
14//
15
16syntax = "proto3";
17
18package google.cloud.vision.v1p2beta1;
19
20import "google/api/annotations.proto";
21import "google/api/client.proto";
22import "google/api/field_behavior.proto";
23import "google/cloud/vision/v1p2beta1/geometry.proto";
24import "google/cloud/vision/v1p2beta1/text_annotation.proto";
25import "google/cloud/vision/v1p2beta1/web_detection.proto";
26import "google/longrunning/operations.proto";
27import "google/protobuf/timestamp.proto";
28import "google/rpc/status.proto";
29import "google/type/color.proto";
30import "google/type/latlng.proto";
31
32option cc_enable_arenas = true;
33option go_package = "cloud.google.com/go/vision/apiv1p2beta1/visionpb;visionpb";
34option java_multiple_files = true;
35option java_outer_classname = "ImageAnnotatorProto";
36option java_package = "com.google.cloud.vision.v1p2beta1";
37
38// Service that performs Google Cloud Vision API detection tasks over client
39// images, such as face, landmark, logo, label, and text detection. The
40// ImageAnnotator service returns detected entities from the images.
41service ImageAnnotator {
42  option (google.api.default_host) = "vision.googleapis.com";
43  option (google.api.oauth_scopes) =
44      "https://www.googleapis.com/auth/cloud-platform,"
45      "https://www.googleapis.com/auth/cloud-vision";
46
47  // Run image detection and annotation for a batch of images.
48  rpc BatchAnnotateImages(BatchAnnotateImagesRequest) returns (BatchAnnotateImagesResponse) {
49    option (google.api.http) = {
50      post: "/v1p2beta1/images:annotate"
51      body: "*"
52    };
53    option (google.api.method_signature) = "requests";
54  }
55
56  // Run async image detection and annotation for a list of generic files (e.g.
57  // PDF) which may contain multiple pages and multiple images per page.
58  // Progress and results can be retrieved through the
59  // `google.longrunning.Operations` interface.
60  // `Operation.metadata` contains `OperationMetadata` (metadata).
61  // `Operation.response` contains `AsyncBatchAnnotateFilesResponse` (results).
62  rpc AsyncBatchAnnotateFiles(AsyncBatchAnnotateFilesRequest) returns (google.longrunning.Operation) {
63    option (google.api.http) = {
64      post: "/v1p2beta1/files:asyncBatchAnnotate"
65      body: "*"
66    };
67    option (google.api.method_signature) = "requests";
68    option (google.longrunning.operation_info) = {
69      response_type: "AsyncBatchAnnotateFilesResponse"
70      metadata_type: "OperationMetadata"
71    };
72  }
73}
74
75// The type of Google Cloud Vision API detection to perform, and the maximum
76// number of results to return for that type. Multiple `Feature` objects can
77// be specified in the `features` list.
78message Feature {
79  // Type of Google Cloud Vision API feature to be extracted.
80  enum Type {
81    // Unspecified feature type.
82    TYPE_UNSPECIFIED = 0;
83
84    // Run face detection.
85    FACE_DETECTION = 1;
86
87    // Run landmark detection.
88    LANDMARK_DETECTION = 2;
89
90    // Run logo detection.
91    LOGO_DETECTION = 3;
92
93    // Run label detection.
94    LABEL_DETECTION = 4;
95
96    // Run text detection / optical character recognition (OCR). Text detection
97    // is optimized for areas of text within a larger image; if the image is
98    // a document, use `DOCUMENT_TEXT_DETECTION` instead.
99    TEXT_DETECTION = 5;
100
101    // Run dense text document OCR. Takes precedence when both
102    // `DOCUMENT_TEXT_DETECTION` and `TEXT_DETECTION` are present.
103    DOCUMENT_TEXT_DETECTION = 11;
104
105    // Run Safe Search to detect potentially unsafe
106    // or undesirable content.
107    SAFE_SEARCH_DETECTION = 6;
108
109    // Compute a set of image properties, such as the
110    // image's dominant colors.
111    IMAGE_PROPERTIES = 7;
112
113    // Run crop hints.
114    CROP_HINTS = 9;
115
116    // Run web detection.
117    WEB_DETECTION = 10;
118  }
119
120  // The feature type.
121  Type type = 1;
122
123  // Maximum number of results of this type. Does not apply to
124  // `TEXT_DETECTION`, `DOCUMENT_TEXT_DETECTION`, or `CROP_HINTS`.
125  int32 max_results = 2;
126
127  // Model to use for the feature.
128  // Supported values: "builtin/stable" (the default if unset) and
129  // "builtin/latest". `DOCUMENT_TEXT_DETECTION` and `TEXT_DETECTION` also
130  // support "builtin/weekly" for the bleeding edge release updated weekly.
131  string model = 3;
132}
133
134// External image source (Google Cloud Storage or web URL image location).
135message ImageSource {
136  // **Use `image_uri` instead.**
137  //
138  // The Google Cloud Storage  URI of the form
139  // `gs://bucket_name/object_name`. Object versioning is not supported. See
140  // [Google Cloud Storage Request
141  // URIs](https://cloud.google.com/storage/docs/reference-uris) for more info.
142  string gcs_image_uri = 1;
143
144  // The URI of the source image. Can be either:
145  //
146  // 1. A Google Cloud Storage URI of the form
147  //    `gs://bucket_name/object_name`. Object versioning is not supported. See
148  //    [Google Cloud Storage Request
149  //    URIs](https://cloud.google.com/storage/docs/reference-uris) for more
150  //    info.
151  //
152  // 2. A publicly-accessible image HTTP/HTTPS URL. When fetching images from
153  //    HTTP/HTTPS URLs, Google cannot guarantee that the request will be
154  //    completed. Your request may fail if the specified host denies the
155  //    request (e.g. due to request throttling or DOS prevention), or if Google
156  //    throttles requests to the site for abuse prevention. You should not
157  //    depend on externally-hosted images for production applications.
158  //
159  // When both `gcs_image_uri` and `image_uri` are specified, `image_uri` takes
160  // precedence.
161  string image_uri = 2;
162}
163
164// Client image to perform Google Cloud Vision API tasks over.
165message Image {
166  // Image content, represented as a stream of bytes.
167  // Note: As with all `bytes` fields, protobuffers use a pure binary
168  // representation, whereas JSON representations use base64.
169  bytes content = 1;
170
171  // Google Cloud Storage image location, or publicly-accessible image
172  // URL. If both `content` and `source` are provided for an image, `content`
173  // takes precedence and is used to perform the image annotation request.
174  ImageSource source = 2;
175}
176
177// A face annotation object contains the results of face detection.
178message FaceAnnotation {
179  // A face-specific landmark (for example, a face feature).
180  message Landmark {
181    // Face landmark (feature) type.
182    // Left and right are defined from the vantage of the viewer of the image
183    // without considering mirror projections typical of photos. So, `LEFT_EYE`,
184    // typically, is the person's right eye.
185    enum Type {
186      // Unknown face landmark detected. Should not be filled.
187      UNKNOWN_LANDMARK = 0;
188
189      // Left eye.
190      LEFT_EYE = 1;
191
192      // Right eye.
193      RIGHT_EYE = 2;
194
195      // Left of left eyebrow.
196      LEFT_OF_LEFT_EYEBROW = 3;
197
198      // Right of left eyebrow.
199      RIGHT_OF_LEFT_EYEBROW = 4;
200
201      // Left of right eyebrow.
202      LEFT_OF_RIGHT_EYEBROW = 5;
203
204      // Right of right eyebrow.
205      RIGHT_OF_RIGHT_EYEBROW = 6;
206
207      // Midpoint between eyes.
208      MIDPOINT_BETWEEN_EYES = 7;
209
210      // Nose tip.
211      NOSE_TIP = 8;
212
213      // Upper lip.
214      UPPER_LIP = 9;
215
216      // Lower lip.
217      LOWER_LIP = 10;
218
219      // Mouth left.
220      MOUTH_LEFT = 11;
221
222      // Mouth right.
223      MOUTH_RIGHT = 12;
224
225      // Mouth center.
226      MOUTH_CENTER = 13;
227
228      // Nose, bottom right.
229      NOSE_BOTTOM_RIGHT = 14;
230
231      // Nose, bottom left.
232      NOSE_BOTTOM_LEFT = 15;
233
234      // Nose, bottom center.
235      NOSE_BOTTOM_CENTER = 16;
236
237      // Left eye, top boundary.
238      LEFT_EYE_TOP_BOUNDARY = 17;
239
240      // Left eye, right corner.
241      LEFT_EYE_RIGHT_CORNER = 18;
242
243      // Left eye, bottom boundary.
244      LEFT_EYE_BOTTOM_BOUNDARY = 19;
245
246      // Left eye, left corner.
247      LEFT_EYE_LEFT_CORNER = 20;
248
249      // Right eye, top boundary.
250      RIGHT_EYE_TOP_BOUNDARY = 21;
251
252      // Right eye, right corner.
253      RIGHT_EYE_RIGHT_CORNER = 22;
254
255      // Right eye, bottom boundary.
256      RIGHT_EYE_BOTTOM_BOUNDARY = 23;
257
258      // Right eye, left corner.
259      RIGHT_EYE_LEFT_CORNER = 24;
260
261      // Left eyebrow, upper midpoint.
262      LEFT_EYEBROW_UPPER_MIDPOINT = 25;
263
264      // Right eyebrow, upper midpoint.
265      RIGHT_EYEBROW_UPPER_MIDPOINT = 26;
266
267      // Left ear tragion.
268      LEFT_EAR_TRAGION = 27;
269
270      // Right ear tragion.
271      RIGHT_EAR_TRAGION = 28;
272
273      // Left eye pupil.
274      LEFT_EYE_PUPIL = 29;
275
276      // Right eye pupil.
277      RIGHT_EYE_PUPIL = 30;
278
279      // Forehead glabella.
280      FOREHEAD_GLABELLA = 31;
281
282      // Chin gnathion.
283      CHIN_GNATHION = 32;
284
285      // Chin left gonion.
286      CHIN_LEFT_GONION = 33;
287
288      // Chin right gonion.
289      CHIN_RIGHT_GONION = 34;
290    }
291
292    // Face landmark type.
293    Type type = 3;
294
295    // Face landmark position.
296    Position position = 4;
297  }
298
299  // The bounding polygon around the face. The coordinates of the bounding box
300  // are in the original image's scale, as returned in `ImageParams`.
301  // The bounding box is computed to "frame" the face in accordance with human
302  // expectations. It is based on the landmarker results.
303  // Note that one or more x and/or y coordinates may not be generated in the
304  // `BoundingPoly` (the polygon will be unbounded) if only a partial face
305  // appears in the image to be annotated.
306  BoundingPoly bounding_poly = 1;
307
308  // The `fd_bounding_poly` bounding polygon is tighter than the
309  // `boundingPoly`, and encloses only the skin part of the face. Typically, it
310  // is used to eliminate the face from any image analysis that detects the
311  // "amount of skin" visible in an image. It is not based on the
312  // landmarker results, only on the initial face detection, hence
313  // the <code>fd</code> (face detection) prefix.
314  BoundingPoly fd_bounding_poly = 2;
315
316  // Detected face landmarks.
317  repeated Landmark landmarks = 3;
318
319  // Roll angle, which indicates the amount of clockwise/anti-clockwise rotation
320  // of the face relative to the image vertical about the axis perpendicular to
321  // the face. Range [-180,180].
322  float roll_angle = 4;
323
324  // Yaw angle, which indicates the leftward/rightward angle that the face is
325  // pointing relative to the vertical plane perpendicular to the image. Range
326  // [-180,180].
327  float pan_angle = 5;
328
329  // Pitch angle, which indicates the upwards/downwards angle that the face is
330  // pointing relative to the image's horizontal plane. Range [-180,180].
331  float tilt_angle = 6;
332
333  // Detection confidence. Range [0, 1].
334  float detection_confidence = 7;
335
336  // Face landmarking confidence. Range [0, 1].
337  float landmarking_confidence = 8;
338
339  // Joy likelihood.
340  Likelihood joy_likelihood = 9;
341
342  // Sorrow likelihood.
343  Likelihood sorrow_likelihood = 10;
344
345  // Anger likelihood.
346  Likelihood anger_likelihood = 11;
347
348  // Surprise likelihood.
349  Likelihood surprise_likelihood = 12;
350
351  // Under-exposed likelihood.
352  Likelihood under_exposed_likelihood = 13;
353
354  // Blurred likelihood.
355  Likelihood blurred_likelihood = 14;
356
357  // Headwear likelihood.
358  Likelihood headwear_likelihood = 15;
359}
360
361// Detected entity location information.
362message LocationInfo {
363  // lat/long location coordinates.
364  google.type.LatLng lat_lng = 1;
365}
366
367// A `Property` consists of a user-supplied name/value pair.
368message Property {
369  // Name of the property.
370  string name = 1;
371
372  // Value of the property.
373  string value = 2;
374
375  // Value of numeric properties.
376  uint64 uint64_value = 3;
377}
378
379// Set of detected entity features.
380message EntityAnnotation {
381  // Opaque entity ID. Some IDs may be available in
382  // [Google Knowledge Graph Search
383  // API](https://developers.google.com/knowledge-graph/).
384  string mid = 1;
385
386  // The language code for the locale in which the entity textual
387  // `description` is expressed.
388  string locale = 2;
389
390  // Entity textual description, expressed in its `locale` language.
391  string description = 3;
392
393  // Overall score of the result. Range [0, 1].
394  float score = 4;
395
396  // **Deprecated. Use `score` instead.**
397  // The accuracy of the entity detection in an image.
398  // For example, for an image in which the "Eiffel Tower" entity is detected,
399  // this field represents the confidence that there is a tower in the query
400  // image. Range [0, 1].
401  float confidence = 5;
402
403  // The relevancy of the ICA (Image Content Annotation) label to the
404  // image. For example, the relevancy of "tower" is likely higher to an image
405  // containing the detected "Eiffel Tower" than to an image containing a
406  // detected distant towering building, even though the confidence that
407  // there is a tower in each image may be the same. Range [0, 1].
408  float topicality = 6;
409
410  // Image region to which this entity belongs. Not produced
411  // for `LABEL_DETECTION` features.
412  BoundingPoly bounding_poly = 7;
413
414  // The location information for the detected entity. Multiple
415  // `LocationInfo` elements can be present because one location may
416  // indicate the location of the scene in the image, and another location
417  // may indicate the location of the place where the image was taken.
418  // Location information is usually present for landmarks.
419  repeated LocationInfo locations = 8;
420
421  // Some entities may have optional user-supplied `Property` (name/value)
422  // fields, such a score or string that qualifies the entity.
423  repeated Property properties = 9;
424}
425
426// Set of features pertaining to the image, computed by computer vision
427// methods over safe-search verticals (for example, adult, spoof, medical,
428// violence).
429message SafeSearchAnnotation {
430  // Represents the adult content likelihood for the image. Adult content may
431  // contain elements such as nudity, pornographic images or cartoons, or
432  // sexual activities.
433  Likelihood adult = 1;
434
435  // Spoof likelihood. The likelihood that an modification
436  // was made to the image's canonical version to make it appear
437  // funny or offensive.
438  Likelihood spoof = 2;
439
440  // Likelihood that this is a medical image.
441  Likelihood medical = 3;
442
443  // Likelihood that this image contains violent content.
444  Likelihood violence = 4;
445
446  // Likelihood that the request image contains racy content. Racy content may
447  // include (but is not limited to) skimpy or sheer clothing, strategically
448  // covered nudity, lewd or provocative poses, or close-ups of sensitive
449  // body areas.
450  Likelihood racy = 9;
451}
452
453// Rectangle determined by min and max `LatLng` pairs.
454message LatLongRect {
455  // Min lat/long pair.
456  google.type.LatLng min_lat_lng = 1;
457
458  // Max lat/long pair.
459  google.type.LatLng max_lat_lng = 2;
460}
461
462// Color information consists of RGB channels, score, and the fraction of
463// the image that the color occupies in the image.
464message ColorInfo {
465  // RGB components of the color.
466  google.type.Color color = 1;
467
468  // Image-specific score for this color. Value in range [0, 1].
469  float score = 2;
470
471  // The fraction of pixels the color occupies in the image.
472  // Value in range [0, 1].
473  float pixel_fraction = 3;
474}
475
476// Set of dominant colors and their corresponding scores.
477message DominantColorsAnnotation {
478  // RGB color values with their score and pixel fraction.
479  repeated ColorInfo colors = 1;
480}
481
482// Stores image properties, such as dominant colors.
483message ImageProperties {
484  // If present, dominant colors completed successfully.
485  DominantColorsAnnotation dominant_colors = 1;
486}
487
488// Single crop hint that is used to generate a new crop when serving an image.
489message CropHint {
490  // The bounding polygon for the crop region. The coordinates of the bounding
491  // box are in the original image's scale, as returned in `ImageParams`.
492  BoundingPoly bounding_poly = 1;
493
494  // Confidence of this being a salient region.  Range [0, 1].
495  float confidence = 2;
496
497  // Fraction of importance of this salient region with respect to the original
498  // image.
499  float importance_fraction = 3;
500}
501
502// Set of crop hints that are used to generate new crops when serving images.
503message CropHintsAnnotation {
504  // Crop hint results.
505  repeated CropHint crop_hints = 1;
506}
507
508// Parameters for crop hints annotation request.
509message CropHintsParams {
510  // Aspect ratios in floats, representing the ratio of the width to the height
511  // of the image. For example, if the desired aspect ratio is 4/3, the
512  // corresponding float value should be 1.33333.  If not specified, the
513  // best possible crop is returned. The number of provided aspect ratios is
514  // limited to a maximum of 16; any aspect ratios provided after the 16th are
515  // ignored.
516  repeated float aspect_ratios = 1;
517}
518
519// Parameters for web detection request.
520message WebDetectionParams {
521  // Whether to include results derived from the geo information in the image.
522  bool include_geo_results = 2;
523}
524
525// Parameters for text detections. This is used to control TEXT_DETECTION and
526// DOCUMENT_TEXT_DETECTION features.
527message TextDetectionParams {
528
529  // By default, Cloud Vision API only includes confidence score for
530  // DOCUMENT_TEXT_DETECTION result. Set the flag to true to include confidence
531  // score for TEXT_DETECTION as well.
532  bool enable_text_detection_confidence_score = 9;
533
534  // A list of advanced OCR options to fine-tune OCR behavior.
535  repeated string advanced_ocr_options = 11;
536}
537
538// Image context and/or feature-specific parameters.
539message ImageContext {
540  // Not used.
541  LatLongRect lat_long_rect = 1;
542
543  // List of languages to use for TEXT_DETECTION. In most cases, an empty value
544  // yields the best results since it enables automatic language detection. For
545  // languages based on the Latin alphabet, setting `language_hints` is not
546  // needed. In rare cases, when the language of the text in the image is known,
547  // setting a hint will help get better results (although it will be a
548  // significant hindrance if the hint is wrong). Text detection returns an
549  // error if one or more of the specified languages is not one of the
550  // [supported languages](https://cloud.google.com/vision/docs/languages).
551  repeated string language_hints = 2;
552
553  // Parameters for crop hints annotation request.
554  CropHintsParams crop_hints_params = 4;
555
556  // Parameters for web detection.
557  WebDetectionParams web_detection_params = 6;
558
559  // Parameters for text detection and document text detection.
560  TextDetectionParams text_detection_params = 12;
561}
562
563// Request for performing Google Cloud Vision API tasks over a user-provided
564// image, with user-requested features.
565message AnnotateImageRequest {
566  // The image to be processed.
567  Image image = 1;
568
569  // Requested features.
570  repeated Feature features = 2;
571
572  // Additional context that may accompany the image.
573  ImageContext image_context = 3;
574}
575
576// If an image was produced from a file (e.g. a PDF), this message gives
577// information about the source of that image.
578message ImageAnnotationContext {
579  // The URI of the file used to produce the image.
580  string uri = 1;
581
582  // If the file was a PDF or TIFF, this field gives the page number within
583  // the file used to produce the image.
584  int32 page_number = 2;
585}
586
587// Response to an image annotation request.
588message AnnotateImageResponse {
589  // If present, face detection has completed successfully.
590  repeated FaceAnnotation face_annotations = 1;
591
592  // If present, landmark detection has completed successfully.
593  repeated EntityAnnotation landmark_annotations = 2;
594
595  // If present, logo detection has completed successfully.
596  repeated EntityAnnotation logo_annotations = 3;
597
598  // If present, label detection has completed successfully.
599  repeated EntityAnnotation label_annotations = 4;
600
601  // If present, text (OCR) detection has completed successfully.
602  repeated EntityAnnotation text_annotations = 5;
603
604  // If present, text (OCR) detection or document (OCR) text detection has
605  // completed successfully.
606  // This annotation provides the structural hierarchy for the OCR detected
607  // text.
608  TextAnnotation full_text_annotation = 12;
609
610  // If present, safe-search annotation has completed successfully.
611  SafeSearchAnnotation safe_search_annotation = 6;
612
613  // If present, image properties were extracted successfully.
614  ImageProperties image_properties_annotation = 8;
615
616  // If present, crop hints have completed successfully.
617  CropHintsAnnotation crop_hints_annotation = 11;
618
619  // If present, web detection has completed successfully.
620  WebDetection web_detection = 13;
621
622  // If set, represents the error message for the operation.
623  // Note that filled-in image annotations are guaranteed to be
624  // correct, even when `error` is set.
625  google.rpc.Status error = 9;
626
627  // If present, contextual information is needed to understand where this image
628  // comes from.
629  ImageAnnotationContext context = 21;
630}
631
632// Response to a single file annotation request. A file may contain one or more
633// images, which individually have their own responses.
634message AnnotateFileResponse {
635  // Information about the file for which this response is generated.
636  InputConfig input_config = 1;
637
638  // Individual responses to images found within the file.
639  repeated AnnotateImageResponse responses = 2;
640}
641
642// Multiple image annotation requests are batched into a single service call.
643message BatchAnnotateImagesRequest {
644  // Required. Individual image annotation requests for this batch.
645  repeated AnnotateImageRequest requests = 1 [(google.api.field_behavior) = REQUIRED];
646}
647
648// Response to a batch image annotation request.
649message BatchAnnotateImagesResponse {
650  // Individual responses to image annotation requests within the batch.
651  repeated AnnotateImageResponse responses = 1;
652}
653
654// An offline file annotation request.
655message AsyncAnnotateFileRequest {
656  // Required. Information about the input file.
657  InputConfig input_config = 1;
658
659  // Required. Requested features.
660  repeated Feature features = 2;
661
662  // Additional context that may accompany the image(s) in the file.
663  ImageContext image_context = 3;
664
665  // Required. The desired output location and metadata (e.g. format).
666  OutputConfig output_config = 4;
667}
668
669// The response for a single offline file annotation request.
670message AsyncAnnotateFileResponse {
671  // The output location and metadata from AsyncAnnotateFileRequest.
672  OutputConfig output_config = 1;
673}
674
675// Multiple async file annotation requests are batched into a single service
676// call.
677message AsyncBatchAnnotateFilesRequest {
678  // Required. Individual async file annotation requests for this batch.
679  repeated AsyncAnnotateFileRequest requests = 1 [(google.api.field_behavior) = REQUIRED];
680}
681
682// Response to an async batch file annotation request.
683message AsyncBatchAnnotateFilesResponse {
684  // The list of file annotation responses, one for each request in
685  // AsyncBatchAnnotateFilesRequest.
686  repeated AsyncAnnotateFileResponse responses = 1;
687}
688
689// The desired input location and metadata.
690message InputConfig {
691  // The Google Cloud Storage location to read the input from.
692  GcsSource gcs_source = 1;
693
694  // The type of the file. Currently only "application/pdf" and "image/tiff"
695  // are supported. Wildcards are not supported.
696  string mime_type = 2;
697}
698
699// The desired output location and metadata.
700message OutputConfig {
701  // The Google Cloud Storage location to write the output(s) to.
702  GcsDestination gcs_destination = 1;
703
704  // The max number of response protos to put into each output JSON file on GCS.
705  // The valid range is [1, 100]. If not specified, the default value is 20.
706  //
707  // For example, for one pdf file with 100 pages, 100 response protos will
708  // be generated. If `batch_size` = 20, then 5 json files each
709  // containing 20 response protos will be written under the prefix
710  // `gcs_destination`.`uri`.
711  //
712  // Currently, batch_size only applies to GcsDestination, with potential future
713  // support for other output configurations.
714  int32 batch_size = 2;
715}
716
717// The Google Cloud Storage location where the input will be read from.
718message GcsSource {
719  // Google Cloud Storage URI for the input file. This must only be a GCS
720  // object. Wildcards are not currently supported.
721  string uri = 1;
722}
723
724// The Google Cloud Storage location where the output will be written to.
725message GcsDestination {
726  // Google Cloud Storage URI where the results will be stored. Results will
727  // be in JSON format and preceded by its corresponding input URI. This field
728  // can either represent a single file, or a prefix for multiple outputs.
729  // Prefixes must end in a `/`.
730  //
731  // Examples:
732  //
733  // *    File: gs://bucket-name/filename.json
734  // *    Prefix: gs://bucket-name/prefix/here/
735  // *    File: gs://bucket-name/prefix/here
736  //
737  // If multiple outputs, each response is still AnnotateFileResponse, each of
738  // which contains some subset of the full list of AnnotateImageResponse.
739  // Multiple outputs can happen if, for example, the output JSON is too large
740  // and overflows into multiple sharded files.
741  string uri = 1;
742}
743
744// Contains metadata for the BatchAnnotateImages operation.
745message OperationMetadata {
746  // Batch operation states.
747  enum State {
748    // Invalid.
749    STATE_UNSPECIFIED = 0;
750
751    // Request is received.
752    CREATED = 1;
753
754    // Request is actively being processed.
755    RUNNING = 2;
756
757    // The batch processing is done.
758    DONE = 3;
759
760    // The batch processing was cancelled.
761    CANCELLED = 4;
762  }
763
764  // Current state of the batch operation.
765  State state = 1;
766
767  // The time when the batch request was received.
768  google.protobuf.Timestamp create_time = 5;
769
770  // The time when the operation result was last updated.
771  google.protobuf.Timestamp update_time = 6;
772}
773
774// A bucketized representation of likelihood, which is intended to give clients
775// highly stable results across model upgrades.
776enum Likelihood {
777  // Unknown likelihood.
778  UNKNOWN = 0;
779
780  // It is very unlikely that the image belongs to the specified vertical.
781  VERY_UNLIKELY = 1;
782
783  // It is unlikely that the image belongs to the specified vertical.
784  UNLIKELY = 2;
785
786  // It is possible that the image belongs to the specified vertical.
787  POSSIBLE = 3;
788
789  // It is likely that the image belongs to the specified vertical.
790  LIKELY = 4;
791
792  // It is very likely that the image belongs to the specified vertical.
793  VERY_LIKELY = 5;
794}
795