1// Copyright 2019 Google LLC. 2// 3// Licensed under the Apache License, Version 2.0 (the "License"); 4// you may not use this file except in compliance with the License. 5// You may obtain a copy of the License at 6// 7// http://www.apache.org/licenses/LICENSE-2.0 8// 9// Unless required by applicable law or agreed to in writing, software 10// distributed under the License is distributed on an "AS IS" BASIS, 11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12// See the License for the specific language governing permissions and 13// limitations under the License. 14// 15 16syntax = "proto3"; 17 18package google.cloud.vision.v1p4beta1; 19 20import "google/api/annotations.proto"; 21import "google/api/client.proto"; 22import "google/api/field_behavior.proto"; 23import "google/cloud/vision/v1p4beta1/face.proto"; 24import "google/cloud/vision/v1p4beta1/geometry.proto"; 25import "google/cloud/vision/v1p4beta1/product_search.proto"; 26import "google/cloud/vision/v1p4beta1/text_annotation.proto"; 27import "google/cloud/vision/v1p4beta1/web_detection.proto"; 28import "google/longrunning/operations.proto"; 29import "google/protobuf/timestamp.proto"; 30import "google/rpc/status.proto"; 31import "google/type/color.proto"; 32import "google/type/latlng.proto"; 33 34option cc_enable_arenas = true; 35option go_package = "cloud.google.com/go/vision/apiv1p4beta1/visionpb;visionpb"; 36option java_multiple_files = true; 37option java_outer_classname = "ImageAnnotatorProto"; 38option java_package = "com.google.cloud.vision.v1p4beta1"; 39option objc_class_prefix = "GCVN"; 40 41// Service that performs Google Cloud Vision API detection tasks over client 42// images, such as face, landmark, logo, label, and text detection. The 43// ImageAnnotator service returns detected entities from the images. 44service ImageAnnotator { 45 option (google.api.default_host) = "vision.googleapis.com"; 46 option (google.api.oauth_scopes) = 47 "https://www.googleapis.com/auth/cloud-platform," 48 "https://www.googleapis.com/auth/cloud-vision"; 49 50 // Run image detection and annotation for a batch of images. 51 rpc BatchAnnotateImages(BatchAnnotateImagesRequest) 52 returns (BatchAnnotateImagesResponse) { 53 option (google.api.http) = { 54 post: "/v1p4beta1/images:annotate" 55 body: "*" 56 }; 57 option (google.api.method_signature) = "requests"; 58 } 59 60 // Service that performs image detection and annotation for a batch of files. 61 // Now only "application/pdf", "image/tiff" and "image/gif" are supported. 62 // 63 // This service will extract at most 5 (customers can specify which 5 in 64 // AnnotateFileRequest.pages) frames (gif) or pages (pdf or tiff) from each 65 // file provided and perform detection and annotation for each image 66 // extracted. 67 rpc BatchAnnotateFiles(BatchAnnotateFilesRequest) 68 returns (BatchAnnotateFilesResponse) { 69 option (google.api.http) = { 70 post: "/v1p4beta1/files:annotate" 71 body: "*" 72 }; 73 option (google.api.method_signature) = "requests"; 74 } 75 76 // Run asynchronous image detection and annotation for a list of images. 77 // 78 // Progress and results can be retrieved through the 79 // `google.longrunning.Operations` interface. 80 // `Operation.metadata` contains `OperationMetadata` (metadata). 81 // `Operation.response` contains `AsyncBatchAnnotateImagesResponse` (results). 82 // 83 // This service will write image annotation outputs to json files in customer 84 // GCS bucket, each json file containing BatchAnnotateImagesResponse proto. 85 rpc AsyncBatchAnnotateImages(AsyncBatchAnnotateImagesRequest) 86 returns (google.longrunning.Operation) { 87 option (google.api.http) = { 88 post: "/v1p4beta1/images:asyncBatchAnnotate" 89 body: "*" 90 }; 91 option (google.api.method_signature) = "requests,output_config"; 92 option (google.longrunning.operation_info) = { 93 response_type: "AsyncBatchAnnotateImagesResponse" 94 metadata_type: "OperationMetadata" 95 }; 96 } 97 98 // Run asynchronous image detection and annotation for a list of generic 99 // files, such as PDF files, which may contain multiple pages and multiple 100 // images per page. Progress and results can be retrieved through the 101 // `google.longrunning.Operations` interface. 102 // `Operation.metadata` contains `OperationMetadata` (metadata). 103 // `Operation.response` contains `AsyncBatchAnnotateFilesResponse` (results). 104 rpc AsyncBatchAnnotateFiles(AsyncBatchAnnotateFilesRequest) 105 returns (google.longrunning.Operation) { 106 option (google.api.http) = { 107 post: "/v1p4beta1/files:asyncBatchAnnotate" 108 body: "*" 109 }; 110 option (google.api.method_signature) = "requests"; 111 option (google.longrunning.operation_info) = { 112 response_type: "AsyncBatchAnnotateFilesResponse" 113 metadata_type: "OperationMetadata" 114 }; 115 } 116} 117 118// The type of Google Cloud Vision API detection to perform, and the maximum 119// number of results to return for that type. Multiple `Feature` objects can 120// be specified in the `features` list. 121message Feature { 122 // Type of Google Cloud Vision API feature to be extracted. 123 enum Type { 124 // Unspecified feature type. 125 TYPE_UNSPECIFIED = 0; 126 127 // Run face detection. 128 FACE_DETECTION = 1; 129 130 // Run landmark detection. 131 LANDMARK_DETECTION = 2; 132 133 // Run logo detection. 134 LOGO_DETECTION = 3; 135 136 // Run label detection. 137 LABEL_DETECTION = 4; 138 139 // Run text detection / optical character recognition (OCR). Text detection 140 // is optimized for areas of text within a larger image; if the image is 141 // a document, use `DOCUMENT_TEXT_DETECTION` instead. 142 TEXT_DETECTION = 5; 143 144 // Run dense text document OCR. Takes precedence when both 145 // `DOCUMENT_TEXT_DETECTION` and `TEXT_DETECTION` are present. 146 DOCUMENT_TEXT_DETECTION = 11; 147 148 // Run Safe Search to detect potentially unsafe 149 // or undesirable content. 150 SAFE_SEARCH_DETECTION = 6; 151 152 // Compute a set of image properties, such as the 153 // image's dominant colors. 154 IMAGE_PROPERTIES = 7; 155 156 // Run crop hints. 157 CROP_HINTS = 9; 158 159 // Run web detection. 160 WEB_DETECTION = 10; 161 162 // Run Product Search. 163 PRODUCT_SEARCH = 12; 164 165 // Run localizer for object detection. 166 OBJECT_LOCALIZATION = 19; 167 } 168 169 // The feature type. 170 Type type = 1; 171 172 // Maximum number of results of this type. Does not apply to 173 // `TEXT_DETECTION`, `DOCUMENT_TEXT_DETECTION`, or `CROP_HINTS`. 174 int32 max_results = 2; 175 176 // Model to use for the feature. 177 // Supported values: "builtin/stable" (the default if unset) and 178 // "builtin/latest". `DOCUMENT_TEXT_DETECTION` and `TEXT_DETECTION` also 179 // support "builtin/weekly" for the bleeding edge release updated weekly. 180 string model = 3; 181} 182 183// External image source (Google Cloud Storage or web URL image location). 184message ImageSource { 185 // **Use `image_uri` instead.** 186 // 187 // The Google Cloud Storage URI of the form 188 // `gs://bucket_name/object_name`. Object versioning is not supported. See 189 // [Google Cloud Storage Request 190 // URIs](https://cloud.google.com/storage/docs/reference-uris) for more info. 191 string gcs_image_uri = 1; 192 193 // The URI of the source image. Can be either: 194 // 195 // 1. A Google Cloud Storage URI of the form 196 // `gs://bucket_name/object_name`. Object versioning is not supported. See 197 // [Google Cloud Storage Request 198 // URIs](https://cloud.google.com/storage/docs/reference-uris) for more 199 // info. 200 // 201 // 2. A publicly-accessible image HTTP/HTTPS URL. When fetching images from 202 // HTTP/HTTPS URLs, Google cannot guarantee that the request will be 203 // completed. Your request may fail if the specified host denies the 204 // request (e.g. due to request throttling or DOS prevention), or if Google 205 // throttles requests to the site for abuse prevention. You should not 206 // depend on externally-hosted images for production applications. 207 // 208 // When both `gcs_image_uri` and `image_uri` are specified, `image_uri` takes 209 // precedence. 210 string image_uri = 2; 211} 212 213// Client image to perform Google Cloud Vision API tasks over. 214message Image { 215 // Image content, represented as a stream of bytes. 216 // Note: As with all `bytes` fields, protobuffers use a pure binary 217 // representation, whereas JSON representations use base64. 218 bytes content = 1; 219 220 // Google Cloud Storage image location, or publicly-accessible image 221 // URL. If both `content` and `source` are provided for an image, `content` 222 // takes precedence and is used to perform the image annotation request. 223 ImageSource source = 2; 224} 225 226// A bucketized representation of likelihood, which is intended to give clients 227// highly stable results across model upgrades. 228enum Likelihood { 229 // Unknown likelihood. 230 UNKNOWN = 0; 231 232 // It is very unlikely. 233 VERY_UNLIKELY = 1; 234 235 // It is unlikely. 236 UNLIKELY = 2; 237 238 // It is possible. 239 POSSIBLE = 3; 240 241 // It is likely. 242 LIKELY = 4; 243 244 // It is very likely. 245 VERY_LIKELY = 5; 246} 247 248// A face annotation object contains the results of face detection. 249message FaceAnnotation { 250 // A face-specific landmark (for example, a face feature). 251 message Landmark { 252 // Face landmark (feature) type. 253 // Left and right are defined from the vantage of the viewer of the image 254 // without considering mirror projections typical of photos. So, `LEFT_EYE`, 255 // typically, is the person's right eye. 256 enum Type { 257 // Unknown face landmark detected. Should not be filled. 258 UNKNOWN_LANDMARK = 0; 259 260 // Left eye. 261 LEFT_EYE = 1; 262 263 // Right eye. 264 RIGHT_EYE = 2; 265 266 // Left of left eyebrow. 267 LEFT_OF_LEFT_EYEBROW = 3; 268 269 // Right of left eyebrow. 270 RIGHT_OF_LEFT_EYEBROW = 4; 271 272 // Left of right eyebrow. 273 LEFT_OF_RIGHT_EYEBROW = 5; 274 275 // Right of right eyebrow. 276 RIGHT_OF_RIGHT_EYEBROW = 6; 277 278 // Midpoint between eyes. 279 MIDPOINT_BETWEEN_EYES = 7; 280 281 // Nose tip. 282 NOSE_TIP = 8; 283 284 // Upper lip. 285 UPPER_LIP = 9; 286 287 // Lower lip. 288 LOWER_LIP = 10; 289 290 // Mouth left. 291 MOUTH_LEFT = 11; 292 293 // Mouth right. 294 MOUTH_RIGHT = 12; 295 296 // Mouth center. 297 MOUTH_CENTER = 13; 298 299 // Nose, bottom right. 300 NOSE_BOTTOM_RIGHT = 14; 301 302 // Nose, bottom left. 303 NOSE_BOTTOM_LEFT = 15; 304 305 // Nose, bottom center. 306 NOSE_BOTTOM_CENTER = 16; 307 308 // Left eye, top boundary. 309 LEFT_EYE_TOP_BOUNDARY = 17; 310 311 // Left eye, right corner. 312 LEFT_EYE_RIGHT_CORNER = 18; 313 314 // Left eye, bottom boundary. 315 LEFT_EYE_BOTTOM_BOUNDARY = 19; 316 317 // Left eye, left corner. 318 LEFT_EYE_LEFT_CORNER = 20; 319 320 // Right eye, top boundary. 321 RIGHT_EYE_TOP_BOUNDARY = 21; 322 323 // Right eye, right corner. 324 RIGHT_EYE_RIGHT_CORNER = 22; 325 326 // Right eye, bottom boundary. 327 RIGHT_EYE_BOTTOM_BOUNDARY = 23; 328 329 // Right eye, left corner. 330 RIGHT_EYE_LEFT_CORNER = 24; 331 332 // Left eyebrow, upper midpoint. 333 LEFT_EYEBROW_UPPER_MIDPOINT = 25; 334 335 // Right eyebrow, upper midpoint. 336 RIGHT_EYEBROW_UPPER_MIDPOINT = 26; 337 338 // Left ear tragion. 339 LEFT_EAR_TRAGION = 27; 340 341 // Right ear tragion. 342 RIGHT_EAR_TRAGION = 28; 343 344 // Left eye pupil. 345 LEFT_EYE_PUPIL = 29; 346 347 // Right eye pupil. 348 RIGHT_EYE_PUPIL = 30; 349 350 // Forehead glabella. 351 FOREHEAD_GLABELLA = 31; 352 353 // Chin gnathion. 354 CHIN_GNATHION = 32; 355 356 // Chin left gonion. 357 CHIN_LEFT_GONION = 33; 358 359 // Chin right gonion. 360 CHIN_RIGHT_GONION = 34; 361 } 362 363 // Face landmark type. 364 Type type = 3; 365 366 // Face landmark position. 367 Position position = 4; 368 } 369 370 // The bounding polygon around the face. The coordinates of the bounding box 371 // are in the original image's scale. 372 // The bounding box is computed to "frame" the face in accordance with human 373 // expectations. It is based on the landmarker results. 374 // Note that one or more x and/or y coordinates may not be generated in the 375 // `BoundingPoly` (the polygon will be unbounded) if only a partial face 376 // appears in the image to be annotated. 377 BoundingPoly bounding_poly = 1; 378 379 // The `fd_bounding_poly` bounding polygon is tighter than the 380 // `boundingPoly`, and encloses only the skin part of the face. Typically, it 381 // is used to eliminate the face from any image analysis that detects the 382 // "amount of skin" visible in an image. It is not based on the 383 // landmarker results, only on the initial face detection, hence 384 // the <code>fd</code> (face detection) prefix. 385 BoundingPoly fd_bounding_poly = 2; 386 387 // Detected face landmarks. 388 repeated Landmark landmarks = 3; 389 390 // Roll angle, which indicates the amount of clockwise/anti-clockwise rotation 391 // of the face relative to the image vertical about the axis perpendicular to 392 // the face. Range [-180,180]. 393 float roll_angle = 4; 394 395 // Yaw angle, which indicates the leftward/rightward angle that the face is 396 // pointing relative to the vertical plane perpendicular to the image. Range 397 // [-180,180]. 398 float pan_angle = 5; 399 400 // Pitch angle, which indicates the upwards/downwards angle that the face is 401 // pointing relative to the image's horizontal plane. Range [-180,180]. 402 float tilt_angle = 6; 403 404 // Detection confidence. Range [0, 1]. 405 float detection_confidence = 7; 406 407 // Face landmarking confidence. Range [0, 1]. 408 float landmarking_confidence = 8; 409 410 // Joy likelihood. 411 Likelihood joy_likelihood = 9; 412 413 // Sorrow likelihood. 414 Likelihood sorrow_likelihood = 10; 415 416 // Anger likelihood. 417 Likelihood anger_likelihood = 11; 418 419 // Surprise likelihood. 420 Likelihood surprise_likelihood = 12; 421 422 // Under-exposed likelihood. 423 Likelihood under_exposed_likelihood = 13; 424 425 // Blurred likelihood. 426 Likelihood blurred_likelihood = 14; 427 428 // Headwear likelihood. 429 Likelihood headwear_likelihood = 15; 430 431 // Additional recognition information. Only computed if 432 // image_context.face_recognition_params is provided, **and** a match is found 433 // to a [Celebrity][google.cloud.vision.v1p4beta1.Celebrity] in the input 434 // [CelebritySet][google.cloud.vision.v1p4beta1.CelebritySet]. This field is 435 // sorted in order of decreasing confidence values. 436 repeated FaceRecognitionResult recognition_result = 16; 437} 438 439// Detected entity location information. 440message LocationInfo { 441 // lat/long location coordinates. 442 google.type.LatLng lat_lng = 1; 443} 444 445// A `Property` consists of a user-supplied name/value pair. 446message Property { 447 // Name of the property. 448 string name = 1; 449 450 // Value of the property. 451 string value = 2; 452 453 // Value of numeric properties. 454 uint64 uint64_value = 3; 455} 456 457// Set of detected entity features. 458message EntityAnnotation { 459 // Opaque entity ID. Some IDs may be available in 460 // [Google Knowledge Graph Search 461 // API](https://developers.google.com/knowledge-graph/). 462 string mid = 1; 463 464 // The language code for the locale in which the entity textual 465 // `description` is expressed. 466 string locale = 2; 467 468 // Entity textual description, expressed in its `locale` language. 469 string description = 3; 470 471 // Overall score of the result. Range [0, 1]. 472 float score = 4; 473 474 // **Deprecated. Use `score` instead.** 475 // The accuracy of the entity detection in an image. 476 // For example, for an image in which the "Eiffel Tower" entity is detected, 477 // this field represents the confidence that there is a tower in the query 478 // image. Range [0, 1]. 479 float confidence = 5 [deprecated = true]; 480 481 // The relevancy of the ICA (Image Content Annotation) label to the 482 // image. For example, the relevancy of "tower" is likely higher to an image 483 // containing the detected "Eiffel Tower" than to an image containing a 484 // detected distant towering building, even though the confidence that 485 // there is a tower in each image may be the same. Range [0, 1]. 486 float topicality = 6; 487 488 // Image region to which this entity belongs. Not produced 489 // for `LABEL_DETECTION` features. 490 BoundingPoly bounding_poly = 7; 491 492 // The location information for the detected entity. Multiple 493 // `LocationInfo` elements can be present because one location may 494 // indicate the location of the scene in the image, and another location 495 // may indicate the location of the place where the image was taken. 496 // Location information is usually present for landmarks. 497 repeated LocationInfo locations = 8; 498 499 // Some entities may have optional user-supplied `Property` (name/value) 500 // fields, such a score or string that qualifies the entity. 501 repeated Property properties = 9; 502} 503 504// Set of detected objects with bounding boxes. 505message LocalizedObjectAnnotation { 506 // Object ID that should align with EntityAnnotation mid. 507 string mid = 1; 508 509 // The BCP-47 language code, such as "en-US" or "sr-Latn". For more 510 // information, see 511 // http://www.unicode.org/reports/tr35/#Unicode_locale_identifier. 512 string language_code = 2; 513 514 // Object name, expressed in its `language_code` language. 515 string name = 3; 516 517 // Score of the result. Range [0, 1]. 518 float score = 4; 519 520 // Image region to which this object belongs. This must be populated. 521 BoundingPoly bounding_poly = 5; 522} 523 524// Set of features pertaining to the image, computed by computer vision 525// methods over safe-search verticals (for example, adult, spoof, medical, 526// violence). 527message SafeSearchAnnotation { 528 // Represents the adult content likelihood for the image. Adult content may 529 // contain elements such as nudity, pornographic images or cartoons, or 530 // sexual activities. 531 Likelihood adult = 1; 532 533 // Spoof likelihood. The likelihood that an modification 534 // was made to the image's canonical version to make it appear 535 // funny or offensive. 536 Likelihood spoof = 2; 537 538 // Likelihood that this is a medical image. 539 Likelihood medical = 3; 540 541 // Likelihood that this image contains violent content. 542 Likelihood violence = 4; 543 544 // Likelihood that the request image contains racy content. Racy content may 545 // include (but is not limited to) skimpy or sheer clothing, strategically 546 // covered nudity, lewd or provocative poses, or close-ups of sensitive 547 // body areas. 548 Likelihood racy = 9; 549} 550 551// Rectangle determined by min and max `LatLng` pairs. 552message LatLongRect { 553 // Min lat/long pair. 554 google.type.LatLng min_lat_lng = 1; 555 556 // Max lat/long pair. 557 google.type.LatLng max_lat_lng = 2; 558} 559 560// Color information consists of RGB channels, score, and the fraction of 561// the image that the color occupies in the image. 562message ColorInfo { 563 // RGB components of the color. 564 google.type.Color color = 1; 565 566 // Image-specific score for this color. Value in range [0, 1]. 567 float score = 2; 568 569 // The fraction of pixels the color occupies in the image. 570 // Value in range [0, 1]. 571 float pixel_fraction = 3; 572} 573 574// Set of dominant colors and their corresponding scores. 575message DominantColorsAnnotation { 576 // RGB color values with their score and pixel fraction. 577 repeated ColorInfo colors = 1; 578} 579 580// Stores image properties, such as dominant colors. 581message ImageProperties { 582 // If present, dominant colors completed successfully. 583 DominantColorsAnnotation dominant_colors = 1; 584} 585 586// Single crop hint that is used to generate a new crop when serving an image. 587message CropHint { 588 // The bounding polygon for the crop region. The coordinates of the bounding 589 // box are in the original image's scale. 590 BoundingPoly bounding_poly = 1; 591 592 // Confidence of this being a salient region. Range [0, 1]. 593 float confidence = 2; 594 595 // Fraction of importance of this salient region with respect to the original 596 // image. 597 float importance_fraction = 3; 598} 599 600// Set of crop hints that are used to generate new crops when serving images. 601message CropHintsAnnotation { 602 // Crop hint results. 603 repeated CropHint crop_hints = 1; 604} 605 606// Parameters for crop hints annotation request. 607message CropHintsParams { 608 // Aspect ratios in floats, representing the ratio of the width to the height 609 // of the image. For example, if the desired aspect ratio is 4/3, the 610 // corresponding float value should be 1.33333. If not specified, the 611 // best possible crop is returned. The number of provided aspect ratios is 612 // limited to a maximum of 16; any aspect ratios provided after the 16th are 613 // ignored. 614 repeated float aspect_ratios = 1; 615} 616 617// Parameters for web detection request. 618message WebDetectionParams { 619 // Whether to include results derived from the geo information in the image. 620 bool include_geo_results = 2; 621} 622 623// Parameters for text detections. This is used to control TEXT_DETECTION and 624// DOCUMENT_TEXT_DETECTION features. 625message TextDetectionParams { 626 627 // By default, Cloud Vision API only includes confidence score for 628 // DOCUMENT_TEXT_DETECTION result. Set the flag to true to include confidence 629 // score for TEXT_DETECTION as well. 630 bool enable_text_detection_confidence_score = 9; 631 632 // A list of advanced OCR options to fine-tune OCR behavior. 633 repeated string advanced_ocr_options = 11; 634} 635 636// Image context and/or feature-specific parameters. 637message ImageContext { 638 // Not used. 639 LatLongRect lat_long_rect = 1; 640 641 // List of languages to use for TEXT_DETECTION. In most cases, an empty value 642 // yields the best results since it enables automatic language detection. For 643 // languages based on the Latin alphabet, setting `language_hints` is not 644 // needed. In rare cases, when the language of the text in the image is known, 645 // setting a hint will help get better results (although it will be a 646 // significant hindrance if the hint is wrong). Text detection returns an 647 // error if one or more of the specified languages is not one of the 648 // [supported languages](https://cloud.google.com/vision/docs/languages). 649 repeated string language_hints = 2; 650 651 // Parameters for crop hints annotation request. 652 CropHintsParams crop_hints_params = 4; 653 654 // Parameters for face recognition. 655 FaceRecognitionParams face_recognition_params = 10; 656 657 // Parameters for product search. 658 ProductSearchParams product_search_params = 5; 659 660 // Parameters for web detection. 661 WebDetectionParams web_detection_params = 6; 662 663 // Parameters for text detection and document text detection. 664 TextDetectionParams text_detection_params = 12; 665} 666 667// Request for performing Google Cloud Vision API tasks over a user-provided 668// image, with user-requested features, and with context information. 669message AnnotateImageRequest { 670 // The image to be processed. 671 Image image = 1; 672 673 // Requested features. 674 repeated Feature features = 2; 675 676 // Additional context that may accompany the image. 677 ImageContext image_context = 3; 678} 679 680// If an image was produced from a file (e.g. a PDF), this message gives 681// information about the source of that image. 682message ImageAnnotationContext { 683 // The URI of the file used to produce the image. 684 string uri = 1; 685 686 // If the file was a PDF or TIFF, this field gives the page number within 687 // the file used to produce the image. 688 int32 page_number = 2; 689} 690 691// Response to an image annotation request. 692message AnnotateImageResponse { 693 // If present, face detection has completed successfully. 694 repeated FaceAnnotation face_annotations = 1; 695 696 // If present, landmark detection has completed successfully. 697 repeated EntityAnnotation landmark_annotations = 2; 698 699 // If present, logo detection has completed successfully. 700 repeated EntityAnnotation logo_annotations = 3; 701 702 // If present, label detection has completed successfully. 703 repeated EntityAnnotation label_annotations = 4; 704 705 // If present, localized object detection has completed successfully. 706 // This will be sorted descending by confidence score. 707 repeated LocalizedObjectAnnotation localized_object_annotations = 22; 708 709 // If present, text (OCR) detection has completed successfully. 710 repeated EntityAnnotation text_annotations = 5; 711 712 // If present, text (OCR) detection or document (OCR) text detection has 713 // completed successfully. 714 // This annotation provides the structural hierarchy for the OCR detected 715 // text. 716 TextAnnotation full_text_annotation = 12; 717 718 // If present, safe-search annotation has completed successfully. 719 SafeSearchAnnotation safe_search_annotation = 6; 720 721 // If present, image properties were extracted successfully. 722 ImageProperties image_properties_annotation = 8; 723 724 // If present, crop hints have completed successfully. 725 CropHintsAnnotation crop_hints_annotation = 11; 726 727 // If present, web detection has completed successfully. 728 WebDetection web_detection = 13; 729 730 // If present, product search has completed successfully. 731 ProductSearchResults product_search_results = 14; 732 733 // If set, represents the error message for the operation. 734 // Note that filled-in image annotations are guaranteed to be 735 // correct, even when `error` is set. 736 google.rpc.Status error = 9; 737 738 // If present, contextual information is needed to understand where this image 739 // comes from. 740 ImageAnnotationContext context = 21; 741} 742 743// Multiple image annotation requests are batched into a single service call. 744message BatchAnnotateImagesRequest { 745 // Required. Individual image annotation requests for this batch. 746 repeated AnnotateImageRequest requests = 1 [(google.api.field_behavior) = REQUIRED]; 747} 748 749// Response to a batch image annotation request. 750message BatchAnnotateImagesResponse { 751 // Individual responses to image annotation requests within the batch. 752 repeated AnnotateImageResponse responses = 1; 753} 754 755// A request to annotate one single file, e.g. a PDF, TIFF or GIF file. 756message AnnotateFileRequest { 757 // Required. Information about the input file. 758 InputConfig input_config = 1; 759 760 // Required. Requested features. 761 repeated Feature features = 2; 762 763 // Additional context that may accompany the image(s) in the file. 764 ImageContext image_context = 3; 765 766 // Pages of the file to perform image annotation. 767 // 768 // Pages starts from 1, we assume the first page of the file is page 1. 769 // At most 5 pages are supported per request. Pages can be negative. 770 // 771 // Page 1 means the first page. 772 // Page 2 means the second page. 773 // Page -1 means the last page. 774 // Page -2 means the second to the last page. 775 // 776 // If the file is GIF instead of PDF or TIFF, page refers to GIF frames. 777 // 778 // If this field is empty, by default the service performs image annotation 779 // for the first 5 pages of the file. 780 repeated int32 pages = 4; 781} 782 783// Response to a single file annotation request. A file may contain one or more 784// images, which individually have their own responses. 785message AnnotateFileResponse { 786 // Information about the file for which this response is generated. 787 InputConfig input_config = 1; 788 789 // Individual responses to images found within the file. This field will be 790 // empty if the `error` field is set. 791 repeated AnnotateImageResponse responses = 2; 792 793 // This field gives the total number of pages in the file. 794 int32 total_pages = 3; 795 796 // If set, represents the error message for the failed request. The 797 // `responses` field will not be set in this case. 798 google.rpc.Status error = 4; 799} 800 801// A list of requests to annotate files using the BatchAnnotateFiles API. 802message BatchAnnotateFilesRequest { 803 // Required. The list of file annotation requests. Right now we support only 804 // one AnnotateFileRequest in BatchAnnotateFilesRequest. 805 repeated AnnotateFileRequest requests = 1 806 [(google.api.field_behavior) = REQUIRED]; 807} 808 809// A list of file annotation responses. 810message BatchAnnotateFilesResponse { 811 // The list of file annotation responses, each response corresponding to each 812 // AnnotateFileRequest in BatchAnnotateFilesRequest. 813 repeated AnnotateFileResponse responses = 1; 814} 815 816// An offline file annotation request. 817message AsyncAnnotateFileRequest { 818 // Required. Information about the input file. 819 InputConfig input_config = 1; 820 821 // Required. Requested features. 822 repeated Feature features = 2; 823 824 // Additional context that may accompany the image(s) in the file. 825 ImageContext image_context = 3; 826 827 // Required. The desired output location and metadata (e.g. format). 828 OutputConfig output_config = 4; 829} 830 831// The response for a single offline file annotation request. 832message AsyncAnnotateFileResponse { 833 // The output location and metadata from AsyncAnnotateFileRequest. 834 OutputConfig output_config = 1; 835} 836 837// Request for async image annotation for a list of images. 838message AsyncBatchAnnotateImagesRequest { 839 // Required. Individual image annotation requests for this batch. 840 repeated AnnotateImageRequest requests = 1 841 [(google.api.field_behavior) = REQUIRED]; 842 843 // Required. The desired output location and metadata (e.g. format). 844 OutputConfig output_config = 2 [(google.api.field_behavior) = REQUIRED]; 845} 846 847// Response to an async batch image annotation request. 848message AsyncBatchAnnotateImagesResponse { 849 // The output location and metadata from AsyncBatchAnnotateImagesRequest. 850 OutputConfig output_config = 1; 851} 852 853// Multiple async file annotation requests are batched into a single service 854// call. 855message AsyncBatchAnnotateFilesRequest { 856 // Required. Individual async file annotation requests for this batch. 857 repeated AsyncAnnotateFileRequest requests = 1 858 [(google.api.field_behavior) = REQUIRED]; 859} 860 861// Response to an async batch file annotation request. 862message AsyncBatchAnnotateFilesResponse { 863 // The list of file annotation responses, one for each request in 864 // AsyncBatchAnnotateFilesRequest. 865 repeated AsyncAnnotateFileResponse responses = 1; 866} 867 868// The desired input location and metadata. 869message InputConfig { 870 // The Google Cloud Storage location to read the input from. 871 GcsSource gcs_source = 1; 872 873 // File content, represented as a stream of bytes. 874 // Note: As with all `bytes` fields, protobuffers use a pure binary 875 // representation, whereas JSON representations use base64. 876 // 877 // Currently, this field only works for BatchAnnotateFiles requests. It does 878 // not work for AsyncBatchAnnotateFiles requests. 879 bytes content = 3; 880 881 // The type of the file. Currently only "application/pdf", "image/tiff" and 882 // "image/gif" are supported. Wildcards are not supported. 883 string mime_type = 2; 884} 885 886// The desired output location and metadata. 887message OutputConfig { 888 // The Google Cloud Storage location to write the output(s) to. 889 GcsDestination gcs_destination = 1; 890 891 // The max number of response protos to put into each output JSON file on 892 // Google Cloud Storage. 893 // The valid range is [1, 100]. If not specified, the default value is 20. 894 // 895 // For example, for one pdf file with 100 pages, 100 response protos will 896 // be generated. If `batch_size` = 20, then 5 json files each 897 // containing 20 response protos will be written under the prefix 898 // `gcs_destination`.`uri`. 899 // 900 // Currently, batch_size only applies to GcsDestination, with potential future 901 // support for other output configurations. 902 int32 batch_size = 2; 903} 904 905// The Google Cloud Storage location where the input will be read from. 906message GcsSource { 907 // Google Cloud Storage URI for the input file. This must only be a 908 // Google Cloud Storage object. Wildcards are not currently supported. 909 string uri = 1; 910} 911 912// The Google Cloud Storage location where the output will be written to. 913message GcsDestination { 914 // Google Cloud Storage URI prefix where the results will be stored. Results 915 // will be in JSON format and preceded by its corresponding input URI prefix. 916 // This field can either represent a gcs file prefix or gcs directory. In 917 // either case, the uri should be unique because in order to get all of the 918 // output files, you will need to do a wildcard gcs search on the uri prefix 919 // you provide. 920 // 921 // Examples: 922 // 923 // * File Prefix: gs://bucket-name/here/filenameprefix The output files 924 // will be created in gs://bucket-name/here/ and the names of the 925 // output files will begin with "filenameprefix". 926 // 927 // * Directory Prefix: gs://bucket-name/some/location/ The output files 928 // will be created in gs://bucket-name/some/location/ and the names of the 929 // output files could be anything because there was no filename prefix 930 // specified. 931 // 932 // If multiple outputs, each response is still AnnotateFileResponse, each of 933 // which contains some subset of the full list of AnnotateImageResponse. 934 // Multiple outputs can happen if, for example, the output JSON is too large 935 // and overflows into multiple sharded files. 936 string uri = 1; 937} 938 939// Contains metadata for the BatchAnnotateImages operation. 940message OperationMetadata { 941 // Batch operation states. 942 enum State { 943 // Invalid. 944 STATE_UNSPECIFIED = 0; 945 946 // Request is received. 947 CREATED = 1; 948 949 // Request is actively being processed. 950 RUNNING = 2; 951 952 // The batch processing is done. 953 DONE = 3; 954 955 // The batch processing was cancelled. 956 CANCELLED = 4; 957 } 958 959 // Current state of the batch operation. 960 State state = 1; 961 962 // The time when the batch request was received. 963 google.protobuf.Timestamp create_time = 5; 964 965 // The time when the operation result was last updated. 966 google.protobuf.Timestamp update_time = 6; 967} 968