1// Copyright 2022 Google LLC 2// 3// Licensed under the Apache License, Version 2.0 (the "License"); 4// you may not use this file except in compliance with the License. 5// You may obtain a copy of the License at 6// 7// http://www.apache.org/licenses/LICENSE-2.0 8// 9// Unless required by applicable law or agreed to in writing, software 10// distributed under the License is distributed on an "AS IS" BASIS, 11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12// See the License for the specific language governing permissions and 13// limitations under the License. 14 15syntax = "proto3"; 16 17package google.cloud.vision.v1; 18 19import "google/api/annotations.proto"; 20import "google/api/client.proto"; 21import "google/api/field_behavior.proto"; 22import "google/cloud/vision/v1/geometry.proto"; 23import "google/cloud/vision/v1/product_search.proto"; 24import "google/cloud/vision/v1/text_annotation.proto"; 25import "google/cloud/vision/v1/web_detection.proto"; 26import "google/longrunning/operations.proto"; 27import "google/protobuf/timestamp.proto"; 28import "google/rpc/status.proto"; 29import "google/type/color.proto"; 30import "google/type/latlng.proto"; 31 32option cc_enable_arenas = true; 33option go_package = "cloud.google.com/go/vision/v2/apiv1/visionpb;visionpb"; 34option java_multiple_files = true; 35option java_outer_classname = "ImageAnnotatorProto"; 36option java_package = "com.google.cloud.vision.v1"; 37option objc_class_prefix = "GCVN"; 38 39// Service that performs Google Cloud Vision API detection tasks over client 40// images, such as face, landmark, logo, label, and text detection. The 41// ImageAnnotator service returns detected entities from the images. 42service ImageAnnotator { 43 option (google.api.default_host) = "vision.googleapis.com"; 44 option (google.api.oauth_scopes) = 45 "https://www.googleapis.com/auth/cloud-platform," 46 "https://www.googleapis.com/auth/cloud-vision"; 47 48 // Run image detection and annotation for a batch of images. 49 rpc BatchAnnotateImages(BatchAnnotateImagesRequest) returns (BatchAnnotateImagesResponse) { 50 option (google.api.http) = { 51 post: "/v1/images:annotate" 52 body: "*" 53 additional_bindings { 54 post: "/v1/{parent=projects/*/locations/*}/images:annotate" 55 body: "*" 56 } 57 additional_bindings { 58 post: "/v1/{parent=projects/*}/images:annotate" 59 body: "*" 60 } 61 }; 62 option (google.api.method_signature) = "requests"; 63 } 64 65 // Service that performs image detection and annotation for a batch of files. 66 // Now only "application/pdf", "image/tiff" and "image/gif" are supported. 67 // 68 // This service will extract at most 5 (customers can specify which 5 in 69 // AnnotateFileRequest.pages) frames (gif) or pages (pdf or tiff) from each 70 // file provided and perform detection and annotation for each image 71 // extracted. 72 rpc BatchAnnotateFiles(BatchAnnotateFilesRequest) returns (BatchAnnotateFilesResponse) { 73 option (google.api.http) = { 74 post: "/v1/files:annotate" 75 body: "*" 76 additional_bindings { 77 post: "/v1/{parent=projects/*/locations/*}/files:annotate" 78 body: "*" 79 } 80 additional_bindings { 81 post: "/v1/{parent=projects/*}/files:annotate" 82 body: "*" 83 } 84 }; 85 option (google.api.method_signature) = "requests"; 86 } 87 88 // Run asynchronous image detection and annotation for a list of images. 89 // 90 // Progress and results can be retrieved through the 91 // `google.longrunning.Operations` interface. 92 // `Operation.metadata` contains `OperationMetadata` (metadata). 93 // `Operation.response` contains `AsyncBatchAnnotateImagesResponse` (results). 94 // 95 // This service will write image annotation outputs to json files in customer 96 // GCS bucket, each json file containing BatchAnnotateImagesResponse proto. 97 rpc AsyncBatchAnnotateImages(AsyncBatchAnnotateImagesRequest) returns (google.longrunning.Operation) { 98 option (google.api.http) = { 99 post: "/v1/images:asyncBatchAnnotate" 100 body: "*" 101 additional_bindings { 102 post: "/v1/{parent=projects/*/locations/*}/images:asyncBatchAnnotate" 103 body: "*" 104 } 105 additional_bindings { 106 post: "/v1/{parent=projects/*}/images:asyncBatchAnnotate" 107 body: "*" 108 } 109 }; 110 option (google.api.method_signature) = "requests,output_config"; 111 option (google.longrunning.operation_info) = { 112 response_type: "AsyncBatchAnnotateImagesResponse" 113 metadata_type: "OperationMetadata" 114 }; 115 } 116 117 // Run asynchronous image detection and annotation for a list of generic 118 // files, such as PDF files, which may contain multiple pages and multiple 119 // images per page. Progress and results can be retrieved through the 120 // `google.longrunning.Operations` interface. 121 // `Operation.metadata` contains `OperationMetadata` (metadata). 122 // `Operation.response` contains `AsyncBatchAnnotateFilesResponse` (results). 123 rpc AsyncBatchAnnotateFiles(AsyncBatchAnnotateFilesRequest) returns (google.longrunning.Operation) { 124 option (google.api.http) = { 125 post: "/v1/files:asyncBatchAnnotate" 126 body: "*" 127 additional_bindings { 128 post: "/v1/{parent=projects/*/locations/*}/files:asyncBatchAnnotate" 129 body: "*" 130 } 131 additional_bindings { 132 post: "/v1/{parent=projects/*}/files:asyncBatchAnnotate" 133 body: "*" 134 } 135 }; 136 option (google.api.method_signature) = "requests"; 137 option (google.longrunning.operation_info) = { 138 response_type: "AsyncBatchAnnotateFilesResponse" 139 metadata_type: "OperationMetadata" 140 }; 141 } 142} 143 144// A bucketized representation of likelihood, which is intended to give clients 145// highly stable results across model upgrades. 146enum Likelihood { 147 // Unknown likelihood. 148 UNKNOWN = 0; 149 150 // It is very unlikely. 151 VERY_UNLIKELY = 1; 152 153 // It is unlikely. 154 UNLIKELY = 2; 155 156 // It is possible. 157 POSSIBLE = 3; 158 159 // It is likely. 160 LIKELY = 4; 161 162 // It is very likely. 163 VERY_LIKELY = 5; 164} 165 166// The type of Google Cloud Vision API detection to perform, and the maximum 167// number of results to return for that type. Multiple `Feature` objects can 168// be specified in the `features` list. 169message Feature { 170 // Type of Google Cloud Vision API feature to be extracted. 171 enum Type { 172 // Unspecified feature type. 173 TYPE_UNSPECIFIED = 0; 174 175 // Run face detection. 176 FACE_DETECTION = 1; 177 178 // Run landmark detection. 179 LANDMARK_DETECTION = 2; 180 181 // Run logo detection. 182 LOGO_DETECTION = 3; 183 184 // Run label detection. 185 LABEL_DETECTION = 4; 186 187 // Run text detection / optical character recognition (OCR). Text detection 188 // is optimized for areas of text within a larger image; if the image is 189 // a document, use `DOCUMENT_TEXT_DETECTION` instead. 190 TEXT_DETECTION = 5; 191 192 // Run dense text document OCR. Takes precedence when both 193 // `DOCUMENT_TEXT_DETECTION` and `TEXT_DETECTION` are present. 194 DOCUMENT_TEXT_DETECTION = 11; 195 196 // Run Safe Search to detect potentially unsafe 197 // or undesirable content. 198 SAFE_SEARCH_DETECTION = 6; 199 200 // Compute a set of image properties, such as the 201 // image's dominant colors. 202 IMAGE_PROPERTIES = 7; 203 204 // Run crop hints. 205 CROP_HINTS = 9; 206 207 // Run web detection. 208 WEB_DETECTION = 10; 209 210 // Run Product Search. 211 PRODUCT_SEARCH = 12; 212 213 // Run localizer for object detection. 214 OBJECT_LOCALIZATION = 19; 215 } 216 217 // The feature type. 218 Type type = 1; 219 220 // Maximum number of results of this type. Does not apply to 221 // `TEXT_DETECTION`, `DOCUMENT_TEXT_DETECTION`, or `CROP_HINTS`. 222 int32 max_results = 2; 223 224 // Model to use for the feature. 225 // Supported values: "builtin/stable" (the default if unset) and 226 // "builtin/latest". `DOCUMENT_TEXT_DETECTION` and `TEXT_DETECTION` also 227 // support "builtin/weekly" for the bleeding edge release updated weekly. 228 string model = 3; 229} 230 231// External image source (Google Cloud Storage or web URL image location). 232message ImageSource { 233 // **Use `image_uri` instead.** 234 // 235 // The Google Cloud Storage URI of the form 236 // `gs://bucket_name/object_name`. Object versioning is not supported. See 237 // [Google Cloud Storage Request 238 // URIs](https://cloud.google.com/storage/docs/reference-uris) for more info. 239 string gcs_image_uri = 1; 240 241 // The URI of the source image. Can be either: 242 // 243 // 1. A Google Cloud Storage URI of the form 244 // `gs://bucket_name/object_name`. Object versioning is not supported. See 245 // [Google Cloud Storage Request 246 // URIs](https://cloud.google.com/storage/docs/reference-uris) for more 247 // info. 248 // 249 // 2. A publicly-accessible image HTTP/HTTPS URL. When fetching images from 250 // HTTP/HTTPS URLs, Google cannot guarantee that the request will be 251 // completed. Your request may fail if the specified host denies the 252 // request (e.g. due to request throttling or DOS prevention), or if Google 253 // throttles requests to the site for abuse prevention. You should not 254 // depend on externally-hosted images for production applications. 255 // 256 // When both `gcs_image_uri` and `image_uri` are specified, `image_uri` takes 257 // precedence. 258 string image_uri = 2; 259} 260 261// Client image to perform Google Cloud Vision API tasks over. 262message Image { 263 // Image content, represented as a stream of bytes. 264 // Note: As with all `bytes` fields, protobuffers use a pure binary 265 // representation, whereas JSON representations use base64. 266 // 267 // Currently, this field only works for BatchAnnotateImages requests. It does 268 // not work for AsyncBatchAnnotateImages requests. 269 bytes content = 1; 270 271 // Google Cloud Storage image location, or publicly-accessible image 272 // URL. If both `content` and `source` are provided for an image, `content` 273 // takes precedence and is used to perform the image annotation request. 274 ImageSource source = 2; 275} 276 277// A face annotation object contains the results of face detection. 278message FaceAnnotation { 279 // A face-specific landmark (for example, a face feature). 280 message Landmark { 281 // Face landmark (feature) type. 282 // Left and right are defined from the vantage of the viewer of the image 283 // without considering mirror projections typical of photos. So, `LEFT_EYE`, 284 // typically, is the person's right eye. 285 enum Type { 286 // Unknown face landmark detected. Should not be filled. 287 UNKNOWN_LANDMARK = 0; 288 289 // Left eye. 290 LEFT_EYE = 1; 291 292 // Right eye. 293 RIGHT_EYE = 2; 294 295 // Left of left eyebrow. 296 LEFT_OF_LEFT_EYEBROW = 3; 297 298 // Right of left eyebrow. 299 RIGHT_OF_LEFT_EYEBROW = 4; 300 301 // Left of right eyebrow. 302 LEFT_OF_RIGHT_EYEBROW = 5; 303 304 // Right of right eyebrow. 305 RIGHT_OF_RIGHT_EYEBROW = 6; 306 307 // Midpoint between eyes. 308 MIDPOINT_BETWEEN_EYES = 7; 309 310 // Nose tip. 311 NOSE_TIP = 8; 312 313 // Upper lip. 314 UPPER_LIP = 9; 315 316 // Lower lip. 317 LOWER_LIP = 10; 318 319 // Mouth left. 320 MOUTH_LEFT = 11; 321 322 // Mouth right. 323 MOUTH_RIGHT = 12; 324 325 // Mouth center. 326 MOUTH_CENTER = 13; 327 328 // Nose, bottom right. 329 NOSE_BOTTOM_RIGHT = 14; 330 331 // Nose, bottom left. 332 NOSE_BOTTOM_LEFT = 15; 333 334 // Nose, bottom center. 335 NOSE_BOTTOM_CENTER = 16; 336 337 // Left eye, top boundary. 338 LEFT_EYE_TOP_BOUNDARY = 17; 339 340 // Left eye, right corner. 341 LEFT_EYE_RIGHT_CORNER = 18; 342 343 // Left eye, bottom boundary. 344 LEFT_EYE_BOTTOM_BOUNDARY = 19; 345 346 // Left eye, left corner. 347 LEFT_EYE_LEFT_CORNER = 20; 348 349 // Right eye, top boundary. 350 RIGHT_EYE_TOP_BOUNDARY = 21; 351 352 // Right eye, right corner. 353 RIGHT_EYE_RIGHT_CORNER = 22; 354 355 // Right eye, bottom boundary. 356 RIGHT_EYE_BOTTOM_BOUNDARY = 23; 357 358 // Right eye, left corner. 359 RIGHT_EYE_LEFT_CORNER = 24; 360 361 // Left eyebrow, upper midpoint. 362 LEFT_EYEBROW_UPPER_MIDPOINT = 25; 363 364 // Right eyebrow, upper midpoint. 365 RIGHT_EYEBROW_UPPER_MIDPOINT = 26; 366 367 // Left ear tragion. 368 LEFT_EAR_TRAGION = 27; 369 370 // Right ear tragion. 371 RIGHT_EAR_TRAGION = 28; 372 373 // Left eye pupil. 374 LEFT_EYE_PUPIL = 29; 375 376 // Right eye pupil. 377 RIGHT_EYE_PUPIL = 30; 378 379 // Forehead glabella. 380 FOREHEAD_GLABELLA = 31; 381 382 // Chin gnathion. 383 CHIN_GNATHION = 32; 384 385 // Chin left gonion. 386 CHIN_LEFT_GONION = 33; 387 388 // Chin right gonion. 389 CHIN_RIGHT_GONION = 34; 390 391 // Left cheek center. 392 LEFT_CHEEK_CENTER = 35; 393 394 // Right cheek center. 395 RIGHT_CHEEK_CENTER = 36; 396 } 397 398 // Face landmark type. 399 Type type = 3; 400 401 // Face landmark position. 402 Position position = 4; 403 } 404 405 // The bounding polygon around the face. The coordinates of the bounding box 406 // are in the original image's scale. 407 // The bounding box is computed to "frame" the face in accordance with human 408 // expectations. It is based on the landmarker results. 409 // Note that one or more x and/or y coordinates may not be generated in the 410 // `BoundingPoly` (the polygon will be unbounded) if only a partial face 411 // appears in the image to be annotated. 412 BoundingPoly bounding_poly = 1; 413 414 // The `fd_bounding_poly` bounding polygon is tighter than the 415 // `boundingPoly`, and encloses only the skin part of the face. Typically, it 416 // is used to eliminate the face from any image analysis that detects the 417 // "amount of skin" visible in an image. It is not based on the 418 // landmarker results, only on the initial face detection, hence 419 // the <code>fd</code> (face detection) prefix. 420 BoundingPoly fd_bounding_poly = 2; 421 422 // Detected face landmarks. 423 repeated Landmark landmarks = 3; 424 425 // Roll angle, which indicates the amount of clockwise/anti-clockwise rotation 426 // of the face relative to the image vertical about the axis perpendicular to 427 // the face. Range [-180,180]. 428 float roll_angle = 4; 429 430 // Yaw angle, which indicates the leftward/rightward angle that the face is 431 // pointing relative to the vertical plane perpendicular to the image. Range 432 // [-180,180]. 433 float pan_angle = 5; 434 435 // Pitch angle, which indicates the upwards/downwards angle that the face is 436 // pointing relative to the image's horizontal plane. Range [-180,180]. 437 float tilt_angle = 6; 438 439 // Detection confidence. Range [0, 1]. 440 float detection_confidence = 7; 441 442 // Face landmarking confidence. Range [0, 1]. 443 float landmarking_confidence = 8; 444 445 // Joy likelihood. 446 Likelihood joy_likelihood = 9; 447 448 // Sorrow likelihood. 449 Likelihood sorrow_likelihood = 10; 450 451 // Anger likelihood. 452 Likelihood anger_likelihood = 11; 453 454 // Surprise likelihood. 455 Likelihood surprise_likelihood = 12; 456 457 // Under-exposed likelihood. 458 Likelihood under_exposed_likelihood = 13; 459 460 // Blurred likelihood. 461 Likelihood blurred_likelihood = 14; 462 463 // Headwear likelihood. 464 Likelihood headwear_likelihood = 15; 465} 466 467// Detected entity location information. 468message LocationInfo { 469 // lat/long location coordinates. 470 google.type.LatLng lat_lng = 1; 471} 472 473// A `Property` consists of a user-supplied name/value pair. 474message Property { 475 // Name of the property. 476 string name = 1; 477 478 // Value of the property. 479 string value = 2; 480 481 // Value of numeric properties. 482 uint64 uint64_value = 3; 483} 484 485// Set of detected entity features. 486message EntityAnnotation { 487 // Opaque entity ID. Some IDs may be available in 488 // [Google Knowledge Graph Search 489 // API](https://developers.google.com/knowledge-graph/). 490 string mid = 1; 491 492 // The language code for the locale in which the entity textual 493 // `description` is expressed. 494 string locale = 2; 495 496 // Entity textual description, expressed in its `locale` language. 497 string description = 3; 498 499 // Overall score of the result. Range [0, 1]. 500 float score = 4; 501 502 // **Deprecated. Use `score` instead.** 503 // The accuracy of the entity detection in an image. 504 // For example, for an image in which the "Eiffel Tower" entity is detected, 505 // this field represents the confidence that there is a tower in the query 506 // image. Range [0, 1]. 507 float confidence = 5 [deprecated = true]; 508 509 // The relevancy of the ICA (Image Content Annotation) label to the 510 // image. For example, the relevancy of "tower" is likely higher to an image 511 // containing the detected "Eiffel Tower" than to an image containing a 512 // detected distant towering building, even though the confidence that 513 // there is a tower in each image may be the same. Range [0, 1]. 514 float topicality = 6; 515 516 // Image region to which this entity belongs. Not produced 517 // for `LABEL_DETECTION` features. 518 BoundingPoly bounding_poly = 7; 519 520 // The location information for the detected entity. Multiple 521 // `LocationInfo` elements can be present because one location may 522 // indicate the location of the scene in the image, and another location 523 // may indicate the location of the place where the image was taken. 524 // Location information is usually present for landmarks. 525 repeated LocationInfo locations = 8; 526 527 // Some entities may have optional user-supplied `Property` (name/value) 528 // fields, such a score or string that qualifies the entity. 529 repeated Property properties = 9; 530} 531 532// Set of detected objects with bounding boxes. 533message LocalizedObjectAnnotation { 534 // Object ID that should align with EntityAnnotation mid. 535 string mid = 1; 536 537 // The BCP-47 language code, such as "en-US" or "sr-Latn". For more 538 // information, see 539 // http://www.unicode.org/reports/tr35/#Unicode_locale_identifier. 540 string language_code = 2; 541 542 // Object name, expressed in its `language_code` language. 543 string name = 3; 544 545 // Score of the result. Range [0, 1]. 546 float score = 4; 547 548 // Image region to which this object belongs. This must be populated. 549 BoundingPoly bounding_poly = 5; 550} 551 552// Set of features pertaining to the image, computed by computer vision 553// methods over safe-search verticals (for example, adult, spoof, medical, 554// violence). 555message SafeSearchAnnotation { 556 // Represents the adult content likelihood for the image. Adult content may 557 // contain elements such as nudity, pornographic images or cartoons, or 558 // sexual activities. 559 Likelihood adult = 1; 560 561 // Spoof likelihood. The likelihood that an modification 562 // was made to the image's canonical version to make it appear 563 // funny or offensive. 564 Likelihood spoof = 2; 565 566 // Likelihood that this is a medical image. 567 Likelihood medical = 3; 568 569 // Likelihood that this image contains violent content. 570 Likelihood violence = 4; 571 572 // Likelihood that the request image contains racy content. Racy content may 573 // include (but is not limited to) skimpy or sheer clothing, strategically 574 // covered nudity, lewd or provocative poses, or close-ups of sensitive 575 // body areas. 576 Likelihood racy = 9; 577} 578 579// Rectangle determined by min and max `LatLng` pairs. 580message LatLongRect { 581 // Min lat/long pair. 582 google.type.LatLng min_lat_lng = 1; 583 584 // Max lat/long pair. 585 google.type.LatLng max_lat_lng = 2; 586} 587 588// Color information consists of RGB channels, score, and the fraction of 589// the image that the color occupies in the image. 590message ColorInfo { 591 // RGB components of the color. 592 google.type.Color color = 1; 593 594 // Image-specific score for this color. Value in range [0, 1]. 595 float score = 2; 596 597 // The fraction of pixels the color occupies in the image. 598 // Value in range [0, 1]. 599 float pixel_fraction = 3; 600} 601 602// Set of dominant colors and their corresponding scores. 603message DominantColorsAnnotation { 604 // RGB color values with their score and pixel fraction. 605 repeated ColorInfo colors = 1; 606} 607 608// Stores image properties, such as dominant colors. 609message ImageProperties { 610 // If present, dominant colors completed successfully. 611 DominantColorsAnnotation dominant_colors = 1; 612} 613 614// Single crop hint that is used to generate a new crop when serving an image. 615message CropHint { 616 // The bounding polygon for the crop region. The coordinates of the bounding 617 // box are in the original image's scale. 618 BoundingPoly bounding_poly = 1; 619 620 // Confidence of this being a salient region. Range [0, 1]. 621 float confidence = 2; 622 623 // Fraction of importance of this salient region with respect to the original 624 // image. 625 float importance_fraction = 3; 626} 627 628// Set of crop hints that are used to generate new crops when serving images. 629message CropHintsAnnotation { 630 // Crop hint results. 631 repeated CropHint crop_hints = 1; 632} 633 634// Parameters for crop hints annotation request. 635message CropHintsParams { 636 // Aspect ratios in floats, representing the ratio of the width to the height 637 // of the image. For example, if the desired aspect ratio is 4/3, the 638 // corresponding float value should be 1.33333. If not specified, the 639 // best possible crop is returned. The number of provided aspect ratios is 640 // limited to a maximum of 16; any aspect ratios provided after the 16th are 641 // ignored. 642 repeated float aspect_ratios = 1; 643} 644 645// Parameters for web detection request. 646message WebDetectionParams { 647 // Whether to include results derived from the geo information in the image. 648 bool include_geo_results = 2; 649} 650 651// Parameters for text detections. This is used to control TEXT_DETECTION and 652// DOCUMENT_TEXT_DETECTION features. 653message TextDetectionParams { 654 // By default, Cloud Vision API only includes confidence score for 655 // DOCUMENT_TEXT_DETECTION result. Set the flag to true to include confidence 656 // score for TEXT_DETECTION as well. 657 bool enable_text_detection_confidence_score = 9; 658 659 // A list of advanced OCR options to fine-tune OCR behavior. 660 repeated string advanced_ocr_options = 11; 661} 662 663// Image context and/or feature-specific parameters. 664message ImageContext { 665 // Not used. 666 LatLongRect lat_long_rect = 1; 667 668 // List of languages to use for TEXT_DETECTION. In most cases, an empty value 669 // yields the best results since it enables automatic language detection. For 670 // languages based on the Latin alphabet, setting `language_hints` is not 671 // needed. In rare cases, when the language of the text in the image is known, 672 // setting a hint will help get better results (although it will be a 673 // significant hindrance if the hint is wrong). Text detection returns an 674 // error if one or more of the specified languages is not one of the 675 // [supported languages](https://cloud.google.com/vision/docs/languages). 676 repeated string language_hints = 2; 677 678 // Parameters for crop hints annotation request. 679 CropHintsParams crop_hints_params = 4; 680 681 // Parameters for product search. 682 ProductSearchParams product_search_params = 5; 683 684 // Parameters for web detection. 685 WebDetectionParams web_detection_params = 6; 686 687 // Parameters for text detection and document text detection. 688 TextDetectionParams text_detection_params = 12; 689} 690 691// Request for performing Google Cloud Vision API tasks over a user-provided 692// image, with user-requested features, and with context information. 693message AnnotateImageRequest { 694 // The image to be processed. 695 Image image = 1; 696 697 // Requested features. 698 repeated Feature features = 2; 699 700 // Additional context that may accompany the image. 701 ImageContext image_context = 3; 702} 703 704// If an image was produced from a file (e.g. a PDF), this message gives 705// information about the source of that image. 706message ImageAnnotationContext { 707 // The URI of the file used to produce the image. 708 string uri = 1; 709 710 // If the file was a PDF or TIFF, this field gives the page number within 711 // the file used to produce the image. 712 int32 page_number = 2; 713} 714 715// Response to an image annotation request. 716message AnnotateImageResponse { 717 // If present, face detection has completed successfully. 718 repeated FaceAnnotation face_annotations = 1; 719 720 // If present, landmark detection has completed successfully. 721 repeated EntityAnnotation landmark_annotations = 2; 722 723 // If present, logo detection has completed successfully. 724 repeated EntityAnnotation logo_annotations = 3; 725 726 // If present, label detection has completed successfully. 727 repeated EntityAnnotation label_annotations = 4; 728 729 // If present, localized object detection has completed successfully. 730 // This will be sorted descending by confidence score. 731 repeated LocalizedObjectAnnotation localized_object_annotations = 22; 732 733 // If present, text (OCR) detection has completed successfully. 734 repeated EntityAnnotation text_annotations = 5; 735 736 // If present, text (OCR) detection or document (OCR) text detection has 737 // completed successfully. 738 // This annotation provides the structural hierarchy for the OCR detected 739 // text. 740 TextAnnotation full_text_annotation = 12; 741 742 // If present, safe-search annotation has completed successfully. 743 SafeSearchAnnotation safe_search_annotation = 6; 744 745 // If present, image properties were extracted successfully. 746 ImageProperties image_properties_annotation = 8; 747 748 // If present, crop hints have completed successfully. 749 CropHintsAnnotation crop_hints_annotation = 11; 750 751 // If present, web detection has completed successfully. 752 WebDetection web_detection = 13; 753 754 // If present, product search has completed successfully. 755 ProductSearchResults product_search_results = 14; 756 757 // If set, represents the error message for the operation. 758 // Note that filled-in image annotations are guaranteed to be 759 // correct, even when `error` is set. 760 google.rpc.Status error = 9; 761 762 // If present, contextual information is needed to understand where this image 763 // comes from. 764 ImageAnnotationContext context = 21; 765} 766 767// Multiple image annotation requests are batched into a single service call. 768message BatchAnnotateImagesRequest { 769 // Required. Individual image annotation requests for this batch. 770 repeated AnnotateImageRequest requests = 1 [(google.api.field_behavior) = REQUIRED]; 771 772 // Optional. Target project and location to make a call. 773 // 774 // Format: `projects/{project-id}/locations/{location-id}`. 775 // 776 // If no parent is specified, a region will be chosen automatically. 777 // 778 // Supported location-ids: 779 // `us`: USA country only, 780 // `asia`: East asia areas, like Japan, Taiwan, 781 // `eu`: The European Union. 782 // 783 // Example: `projects/project-A/locations/eu`. 784 string parent = 4; 785} 786 787// Response to a batch image annotation request. 788message BatchAnnotateImagesResponse { 789 // Individual responses to image annotation requests within the batch. 790 repeated AnnotateImageResponse responses = 1; 791} 792 793// A request to annotate one single file, e.g. a PDF, TIFF or GIF file. 794message AnnotateFileRequest { 795 // Required. Information about the input file. 796 InputConfig input_config = 1; 797 798 // Required. Requested features. 799 repeated Feature features = 2; 800 801 // Additional context that may accompany the image(s) in the file. 802 ImageContext image_context = 3; 803 804 // Pages of the file to perform image annotation. 805 // 806 // Pages starts from 1, we assume the first page of the file is page 1. 807 // At most 5 pages are supported per request. Pages can be negative. 808 // 809 // Page 1 means the first page. 810 // Page 2 means the second page. 811 // Page -1 means the last page. 812 // Page -2 means the second to the last page. 813 // 814 // If the file is GIF instead of PDF or TIFF, page refers to GIF frames. 815 // 816 // If this field is empty, by default the service performs image annotation 817 // for the first 5 pages of the file. 818 repeated int32 pages = 4; 819} 820 821// Response to a single file annotation request. A file may contain one or more 822// images, which individually have their own responses. 823message AnnotateFileResponse { 824 // Information about the file for which this response is generated. 825 InputConfig input_config = 1; 826 827 // Individual responses to images found within the file. This field will be 828 // empty if the `error` field is set. 829 repeated AnnotateImageResponse responses = 2; 830 831 // This field gives the total number of pages in the file. 832 int32 total_pages = 3; 833 834 // If set, represents the error message for the failed request. The 835 // `responses` field will not be set in this case. 836 google.rpc.Status error = 4; 837} 838 839// A list of requests to annotate files using the BatchAnnotateFiles API. 840message BatchAnnotateFilesRequest { 841 // Required. The list of file annotation requests. Right now we support only one 842 // AnnotateFileRequest in BatchAnnotateFilesRequest. 843 repeated AnnotateFileRequest requests = 1 [(google.api.field_behavior) = REQUIRED]; 844 845 // Optional. Target project and location to make a call. 846 // 847 // Format: `projects/{project-id}/locations/{location-id}`. 848 // 849 // If no parent is specified, a region will be chosen automatically. 850 // 851 // Supported location-ids: 852 // `us`: USA country only, 853 // `asia`: East asia areas, like Japan, Taiwan, 854 // `eu`: The European Union. 855 // 856 // Example: `projects/project-A/locations/eu`. 857 string parent = 3; 858} 859 860// A list of file annotation responses. 861message BatchAnnotateFilesResponse { 862 // The list of file annotation responses, each response corresponding to each 863 // AnnotateFileRequest in BatchAnnotateFilesRequest. 864 repeated AnnotateFileResponse responses = 1; 865} 866 867// An offline file annotation request. 868message AsyncAnnotateFileRequest { 869 // Required. Information about the input file. 870 InputConfig input_config = 1; 871 872 // Required. Requested features. 873 repeated Feature features = 2; 874 875 // Additional context that may accompany the image(s) in the file. 876 ImageContext image_context = 3; 877 878 // Required. The desired output location and metadata (e.g. format). 879 OutputConfig output_config = 4; 880} 881 882// The response for a single offline file annotation request. 883message AsyncAnnotateFileResponse { 884 // The output location and metadata from AsyncAnnotateFileRequest. 885 OutputConfig output_config = 1; 886} 887 888// Request for async image annotation for a list of images. 889message AsyncBatchAnnotateImagesRequest { 890 // Required. Individual image annotation requests for this batch. 891 repeated AnnotateImageRequest requests = 1 [(google.api.field_behavior) = REQUIRED]; 892 893 // Required. The desired output location and metadata (e.g. format). 894 OutputConfig output_config = 2 [(google.api.field_behavior) = REQUIRED]; 895 896 // Optional. Target project and location to make a call. 897 // 898 // Format: `projects/{project-id}/locations/{location-id}`. 899 // 900 // If no parent is specified, a region will be chosen automatically. 901 // 902 // Supported location-ids: 903 // `us`: USA country only, 904 // `asia`: East asia areas, like Japan, Taiwan, 905 // `eu`: The European Union. 906 // 907 // Example: `projects/project-A/locations/eu`. 908 string parent = 4; 909} 910 911// Response to an async batch image annotation request. 912message AsyncBatchAnnotateImagesResponse { 913 // The output location and metadata from AsyncBatchAnnotateImagesRequest. 914 OutputConfig output_config = 1; 915} 916 917// Multiple async file annotation requests are batched into a single service 918// call. 919message AsyncBatchAnnotateFilesRequest { 920 // Required. Individual async file annotation requests for this batch. 921 repeated AsyncAnnotateFileRequest requests = 1 [(google.api.field_behavior) = REQUIRED]; 922 923 // Optional. Target project and location to make a call. 924 // 925 // Format: `projects/{project-id}/locations/{location-id}`. 926 // 927 // If no parent is specified, a region will be chosen automatically. 928 // 929 // Supported location-ids: 930 // `us`: USA country only, 931 // `asia`: East asia areas, like Japan, Taiwan, 932 // `eu`: The European Union. 933 // 934 // Example: `projects/project-A/locations/eu`. 935 string parent = 4; 936} 937 938// Response to an async batch file annotation request. 939message AsyncBatchAnnotateFilesResponse { 940 // The list of file annotation responses, one for each request in 941 // AsyncBatchAnnotateFilesRequest. 942 repeated AsyncAnnotateFileResponse responses = 1; 943} 944 945// The desired input location and metadata. 946message InputConfig { 947 // The Google Cloud Storage location to read the input from. 948 GcsSource gcs_source = 1; 949 950 // File content, represented as a stream of bytes. 951 // Note: As with all `bytes` fields, protobuffers use a pure binary 952 // representation, whereas JSON representations use base64. 953 // 954 // Currently, this field only works for BatchAnnotateFiles requests. It does 955 // not work for AsyncBatchAnnotateFiles requests. 956 bytes content = 3; 957 958 // The type of the file. Currently only "application/pdf", "image/tiff" and 959 // "image/gif" are supported. Wildcards are not supported. 960 string mime_type = 2; 961} 962 963// The desired output location and metadata. 964message OutputConfig { 965 // The Google Cloud Storage location to write the output(s) to. 966 GcsDestination gcs_destination = 1; 967 968 // The max number of response protos to put into each output JSON file on 969 // Google Cloud Storage. 970 // The valid range is [1, 100]. If not specified, the default value is 20. 971 // 972 // For example, for one pdf file with 100 pages, 100 response protos will 973 // be generated. If `batch_size` = 20, then 5 json files each 974 // containing 20 response protos will be written under the prefix 975 // `gcs_destination`.`uri`. 976 // 977 // Currently, batch_size only applies to GcsDestination, with potential future 978 // support for other output configurations. 979 int32 batch_size = 2; 980} 981 982// The Google Cloud Storage location where the input will be read from. 983message GcsSource { 984 // Google Cloud Storage URI for the input file. This must only be a 985 // Google Cloud Storage object. Wildcards are not currently supported. 986 string uri = 1; 987} 988 989// The Google Cloud Storage location where the output will be written to. 990message GcsDestination { 991 // Google Cloud Storage URI prefix where the results will be stored. Results 992 // will be in JSON format and preceded by its corresponding input URI prefix. 993 // This field can either represent a gcs file prefix or gcs directory. In 994 // either case, the uri should be unique because in order to get all of the 995 // output files, you will need to do a wildcard gcs search on the uri prefix 996 // you provide. 997 // 998 // Examples: 999 // 1000 // * File Prefix: gs://bucket-name/here/filenameprefix The output files 1001 // will be created in gs://bucket-name/here/ and the names of the 1002 // output files will begin with "filenameprefix". 1003 // 1004 // * Directory Prefix: gs://bucket-name/some/location/ The output files 1005 // will be created in gs://bucket-name/some/location/ and the names of the 1006 // output files could be anything because there was no filename prefix 1007 // specified. 1008 // 1009 // If multiple outputs, each response is still AnnotateFileResponse, each of 1010 // which contains some subset of the full list of AnnotateImageResponse. 1011 // Multiple outputs can happen if, for example, the output JSON is too large 1012 // and overflows into multiple sharded files. 1013 string uri = 1; 1014} 1015 1016// Contains metadata for the BatchAnnotateImages operation. 1017message OperationMetadata { 1018 // Batch operation states. 1019 enum State { 1020 // Invalid. 1021 STATE_UNSPECIFIED = 0; 1022 1023 // Request is received. 1024 CREATED = 1; 1025 1026 // Request is actively being processed. 1027 RUNNING = 2; 1028 1029 // The batch processing is done. 1030 DONE = 3; 1031 1032 // The batch processing was cancelled. 1033 CANCELLED = 4; 1034 } 1035 1036 // Current state of the batch operation. 1037 State state = 1; 1038 1039 // The time when the batch request was received. 1040 google.protobuf.Timestamp create_time = 5; 1041 1042 // The time when the operation result was last updated. 1043 google.protobuf.Timestamp update_time = 6; 1044} 1045