1// Copyright 2023 Google LLC 2// 3// Licensed under the Apache License, Version 2.0 (the "License"); 4// you may not use this file except in compliance with the License. 5// You may obtain a copy of the License at 6// 7// http://www.apache.org/licenses/LICENSE-2.0 8// 9// Unless required by applicable law or agreed to in writing, software 10// distributed under the License is distributed on an "AS IS" BASIS, 11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12// See the License for the specific language governing permissions and 13// limitations under the License. 14 15syntax = "proto3"; 16 17package google.cloud.vision.v1; 18 19import "google/api/annotations.proto"; 20import "google/api/client.proto"; 21import "google/api/field_behavior.proto"; 22import "google/cloud/vision/v1/geometry.proto"; 23import "google/cloud/vision/v1/product_search.proto"; 24import "google/cloud/vision/v1/text_annotation.proto"; 25import "google/cloud/vision/v1/web_detection.proto"; 26import "google/longrunning/operations.proto"; 27import "google/protobuf/timestamp.proto"; 28import "google/rpc/status.proto"; 29import "google/type/color.proto"; 30import "google/type/latlng.proto"; 31 32option cc_enable_arenas = true; 33option go_package = "cloud.google.com/go/vision/v2/apiv1/visionpb;visionpb"; 34option java_multiple_files = true; 35option java_outer_classname = "ImageAnnotatorProto"; 36option java_package = "com.google.cloud.vision.v1"; 37option objc_class_prefix = "GCVN"; 38 39// Service that performs Google Cloud Vision API detection tasks over client 40// images, such as face, landmark, logo, label, and text detection. The 41// ImageAnnotator service returns detected entities from the images. 42service ImageAnnotator { 43 option (google.api.default_host) = "vision.googleapis.com"; 44 option (google.api.oauth_scopes) = 45 "https://www.googleapis.com/auth/cloud-platform," 46 "https://www.googleapis.com/auth/cloud-vision"; 47 48 // Run image detection and annotation for a batch of images. 49 rpc BatchAnnotateImages(BatchAnnotateImagesRequest) 50 returns (BatchAnnotateImagesResponse) { 51 option (google.api.http) = { 52 post: "/v1/images:annotate" 53 body: "*" 54 additional_bindings { 55 post: "/v1/{parent=projects/*/locations/*}/images:annotate" 56 body: "*" 57 } 58 additional_bindings { 59 post: "/v1/{parent=projects/*}/images:annotate" 60 body: "*" 61 } 62 }; 63 option (google.api.method_signature) = "requests"; 64 } 65 66 // Service that performs image detection and annotation for a batch of files. 67 // Now only "application/pdf", "image/tiff" and "image/gif" are supported. 68 // 69 // This service will extract at most 5 (customers can specify which 5 in 70 // AnnotateFileRequest.pages) frames (gif) or pages (pdf or tiff) from each 71 // file provided and perform detection and annotation for each image 72 // extracted. 73 rpc BatchAnnotateFiles(BatchAnnotateFilesRequest) 74 returns (BatchAnnotateFilesResponse) { 75 option (google.api.http) = { 76 post: "/v1/files:annotate" 77 body: "*" 78 additional_bindings { 79 post: "/v1/{parent=projects/*/locations/*}/files:annotate" 80 body: "*" 81 } 82 additional_bindings { 83 post: "/v1/{parent=projects/*}/files:annotate" 84 body: "*" 85 } 86 }; 87 option (google.api.method_signature) = "requests"; 88 } 89 90 // Run asynchronous image detection and annotation for a list of images. 91 // 92 // Progress and results can be retrieved through the 93 // `google.longrunning.Operations` interface. 94 // `Operation.metadata` contains `OperationMetadata` (metadata). 95 // `Operation.response` contains `AsyncBatchAnnotateImagesResponse` (results). 96 // 97 // This service will write image annotation outputs to json files in customer 98 // GCS bucket, each json file containing BatchAnnotateImagesResponse proto. 99 rpc AsyncBatchAnnotateImages(AsyncBatchAnnotateImagesRequest) 100 returns (google.longrunning.Operation) { 101 option (google.api.http) = { 102 post: "/v1/images:asyncBatchAnnotate" 103 body: "*" 104 additional_bindings { 105 post: "/v1/{parent=projects/*/locations/*}/images:asyncBatchAnnotate" 106 body: "*" 107 } 108 additional_bindings { 109 post: "/v1/{parent=projects/*}/images:asyncBatchAnnotate" 110 body: "*" 111 } 112 }; 113 option (google.api.method_signature) = "requests,output_config"; 114 option (google.longrunning.operation_info) = { 115 response_type: "AsyncBatchAnnotateImagesResponse" 116 metadata_type: "OperationMetadata" 117 }; 118 } 119 120 // Run asynchronous image detection and annotation for a list of generic 121 // files, such as PDF files, which may contain multiple pages and multiple 122 // images per page. Progress and results can be retrieved through the 123 // `google.longrunning.Operations` interface. 124 // `Operation.metadata` contains `OperationMetadata` (metadata). 125 // `Operation.response` contains `AsyncBatchAnnotateFilesResponse` (results). 126 rpc AsyncBatchAnnotateFiles(AsyncBatchAnnotateFilesRequest) 127 returns (google.longrunning.Operation) { 128 option (google.api.http) = { 129 post: "/v1/files:asyncBatchAnnotate" 130 body: "*" 131 additional_bindings { 132 post: "/v1/{parent=projects/*/locations/*}/files:asyncBatchAnnotate" 133 body: "*" 134 } 135 additional_bindings { 136 post: "/v1/{parent=projects/*}/files:asyncBatchAnnotate" 137 body: "*" 138 } 139 }; 140 option (google.api.method_signature) = "requests"; 141 option (google.longrunning.operation_info) = { 142 response_type: "AsyncBatchAnnotateFilesResponse" 143 metadata_type: "OperationMetadata" 144 }; 145 } 146} 147 148// A bucketized representation of likelihood, which is intended to give clients 149// highly stable results across model upgrades. 150enum Likelihood { 151 // Unknown likelihood. 152 UNKNOWN = 0; 153 154 // It is very unlikely. 155 VERY_UNLIKELY = 1; 156 157 // It is unlikely. 158 UNLIKELY = 2; 159 160 // It is possible. 161 POSSIBLE = 3; 162 163 // It is likely. 164 LIKELY = 4; 165 166 // It is very likely. 167 VERY_LIKELY = 5; 168} 169 170// The type of Google Cloud Vision API detection to perform, and the maximum 171// number of results to return for that type. Multiple `Feature` objects can 172// be specified in the `features` list. 173message Feature { 174 // Type of Google Cloud Vision API feature to be extracted. 175 enum Type { 176 // Unspecified feature type. 177 TYPE_UNSPECIFIED = 0; 178 179 // Run face detection. 180 FACE_DETECTION = 1; 181 182 // Run landmark detection. 183 LANDMARK_DETECTION = 2; 184 185 // Run logo detection. 186 LOGO_DETECTION = 3; 187 188 // Run label detection. 189 LABEL_DETECTION = 4; 190 191 // Run text detection / optical character recognition (OCR). Text detection 192 // is optimized for areas of text within a larger image; if the image is 193 // a document, use `DOCUMENT_TEXT_DETECTION` instead. 194 TEXT_DETECTION = 5; 195 196 // Run dense text document OCR. Takes precedence when both 197 // `DOCUMENT_TEXT_DETECTION` and `TEXT_DETECTION` are present. 198 DOCUMENT_TEXT_DETECTION = 11; 199 200 // Run Safe Search to detect potentially unsafe 201 // or undesirable content. 202 SAFE_SEARCH_DETECTION = 6; 203 204 // Compute a set of image properties, such as the 205 // image's dominant colors. 206 IMAGE_PROPERTIES = 7; 207 208 // Run crop hints. 209 CROP_HINTS = 9; 210 211 // Run web detection. 212 WEB_DETECTION = 10; 213 214 // Run Product Search. 215 PRODUCT_SEARCH = 12; 216 217 // Run localizer for object detection. 218 OBJECT_LOCALIZATION = 19; 219 } 220 221 // The feature type. 222 Type type = 1; 223 224 // Maximum number of results of this type. Does not apply to 225 // `TEXT_DETECTION`, `DOCUMENT_TEXT_DETECTION`, or `CROP_HINTS`. 226 int32 max_results = 2; 227 228 // Model to use for the feature. 229 // Supported values: "builtin/stable" (the default if unset) and 230 // "builtin/latest". `DOCUMENT_TEXT_DETECTION` and `TEXT_DETECTION` also 231 // support "builtin/weekly" for the bleeding edge release updated weekly. 232 string model = 3; 233} 234 235// External image source (Google Cloud Storage or web URL image location). 236message ImageSource { 237 // **Use `image_uri` instead.** 238 // 239 // The Google Cloud Storage URI of the form 240 // `gs://bucket_name/object_name`. Object versioning is not supported. See 241 // [Google Cloud Storage Request 242 // URIs](https://cloud.google.com/storage/docs/reference-uris) for more info. 243 string gcs_image_uri = 1; 244 245 // The URI of the source image. Can be either: 246 // 247 // 1. A Google Cloud Storage URI of the form 248 // `gs://bucket_name/object_name`. Object versioning is not supported. See 249 // [Google Cloud Storage Request 250 // URIs](https://cloud.google.com/storage/docs/reference-uris) for more 251 // info. 252 // 253 // 2. A publicly-accessible image HTTP/HTTPS URL. When fetching images from 254 // HTTP/HTTPS URLs, Google cannot guarantee that the request will be 255 // completed. Your request may fail if the specified host denies the 256 // request (e.g. due to request throttling or DOS prevention), or if Google 257 // throttles requests to the site for abuse prevention. You should not 258 // depend on externally-hosted images for production applications. 259 // 260 // When both `gcs_image_uri` and `image_uri` are specified, `image_uri` takes 261 // precedence. 262 string image_uri = 2; 263} 264 265// Client image to perform Google Cloud Vision API tasks over. 266message Image { 267 // Image content, represented as a stream of bytes. 268 // Note: As with all `bytes` fields, protobuffers use a pure binary 269 // representation, whereas JSON representations use base64. 270 // 271 // Currently, this field only works for BatchAnnotateImages requests. It does 272 // not work for AsyncBatchAnnotateImages requests. 273 bytes content = 1; 274 275 // Google Cloud Storage image location, or publicly-accessible image 276 // URL. If both `content` and `source` are provided for an image, `content` 277 // takes precedence and is used to perform the image annotation request. 278 ImageSource source = 2; 279} 280 281// A face annotation object contains the results of face detection. 282message FaceAnnotation { 283 // A face-specific landmark (for example, a face feature). 284 message Landmark { 285 // Face landmark (feature) type. 286 // Left and right are defined from the vantage of the viewer of the image 287 // without considering mirror projections typical of photos. So, `LEFT_EYE`, 288 // typically, is the person's right eye. 289 enum Type { 290 // Unknown face landmark detected. Should not be filled. 291 UNKNOWN_LANDMARK = 0; 292 293 // Left eye. 294 LEFT_EYE = 1; 295 296 // Right eye. 297 RIGHT_EYE = 2; 298 299 // Left of left eyebrow. 300 LEFT_OF_LEFT_EYEBROW = 3; 301 302 // Right of left eyebrow. 303 RIGHT_OF_LEFT_EYEBROW = 4; 304 305 // Left of right eyebrow. 306 LEFT_OF_RIGHT_EYEBROW = 5; 307 308 // Right of right eyebrow. 309 RIGHT_OF_RIGHT_EYEBROW = 6; 310 311 // Midpoint between eyes. 312 MIDPOINT_BETWEEN_EYES = 7; 313 314 // Nose tip. 315 NOSE_TIP = 8; 316 317 // Upper lip. 318 UPPER_LIP = 9; 319 320 // Lower lip. 321 LOWER_LIP = 10; 322 323 // Mouth left. 324 MOUTH_LEFT = 11; 325 326 // Mouth right. 327 MOUTH_RIGHT = 12; 328 329 // Mouth center. 330 MOUTH_CENTER = 13; 331 332 // Nose, bottom right. 333 NOSE_BOTTOM_RIGHT = 14; 334 335 // Nose, bottom left. 336 NOSE_BOTTOM_LEFT = 15; 337 338 // Nose, bottom center. 339 NOSE_BOTTOM_CENTER = 16; 340 341 // Left eye, top boundary. 342 LEFT_EYE_TOP_BOUNDARY = 17; 343 344 // Left eye, right corner. 345 LEFT_EYE_RIGHT_CORNER = 18; 346 347 // Left eye, bottom boundary. 348 LEFT_EYE_BOTTOM_BOUNDARY = 19; 349 350 // Left eye, left corner. 351 LEFT_EYE_LEFT_CORNER = 20; 352 353 // Right eye, top boundary. 354 RIGHT_EYE_TOP_BOUNDARY = 21; 355 356 // Right eye, right corner. 357 RIGHT_EYE_RIGHT_CORNER = 22; 358 359 // Right eye, bottom boundary. 360 RIGHT_EYE_BOTTOM_BOUNDARY = 23; 361 362 // Right eye, left corner. 363 RIGHT_EYE_LEFT_CORNER = 24; 364 365 // Left eyebrow, upper midpoint. 366 LEFT_EYEBROW_UPPER_MIDPOINT = 25; 367 368 // Right eyebrow, upper midpoint. 369 RIGHT_EYEBROW_UPPER_MIDPOINT = 26; 370 371 // Left ear tragion. 372 LEFT_EAR_TRAGION = 27; 373 374 // Right ear tragion. 375 RIGHT_EAR_TRAGION = 28; 376 377 // Left eye pupil. 378 LEFT_EYE_PUPIL = 29; 379 380 // Right eye pupil. 381 RIGHT_EYE_PUPIL = 30; 382 383 // Forehead glabella. 384 FOREHEAD_GLABELLA = 31; 385 386 // Chin gnathion. 387 CHIN_GNATHION = 32; 388 389 // Chin left gonion. 390 CHIN_LEFT_GONION = 33; 391 392 // Chin right gonion. 393 CHIN_RIGHT_GONION = 34; 394 395 // Left cheek center. 396 LEFT_CHEEK_CENTER = 35; 397 398 // Right cheek center. 399 RIGHT_CHEEK_CENTER = 36; 400 } 401 402 // Face landmark type. 403 Type type = 3; 404 405 // Face landmark position. 406 Position position = 4; 407 } 408 409 // The bounding polygon around the face. The coordinates of the bounding box 410 // are in the original image's scale. 411 // The bounding box is computed to "frame" the face in accordance with human 412 // expectations. It is based on the landmarker results. 413 // Note that one or more x and/or y coordinates may not be generated in the 414 // `BoundingPoly` (the polygon will be unbounded) if only a partial face 415 // appears in the image to be annotated. 416 BoundingPoly bounding_poly = 1; 417 418 // The `fd_bounding_poly` bounding polygon is tighter than the 419 // `boundingPoly`, and encloses only the skin part of the face. Typically, it 420 // is used to eliminate the face from any image analysis that detects the 421 // "amount of skin" visible in an image. It is not based on the 422 // landmarker results, only on the initial face detection, hence 423 // the <code>fd</code> (face detection) prefix. 424 BoundingPoly fd_bounding_poly = 2; 425 426 // Detected face landmarks. 427 repeated Landmark landmarks = 3; 428 429 // Roll angle, which indicates the amount of clockwise/anti-clockwise rotation 430 // of the face relative to the image vertical about the axis perpendicular to 431 // the face. Range [-180,180]. 432 float roll_angle = 4; 433 434 // Yaw angle, which indicates the leftward/rightward angle that the face is 435 // pointing relative to the vertical plane perpendicular to the image. Range 436 // [-180,180]. 437 float pan_angle = 5; 438 439 // Pitch angle, which indicates the upwards/downwards angle that the face is 440 // pointing relative to the image's horizontal plane. Range [-180,180]. 441 float tilt_angle = 6; 442 443 // Detection confidence. Range [0, 1]. 444 float detection_confidence = 7; 445 446 // Face landmarking confidence. Range [0, 1]. 447 float landmarking_confidence = 8; 448 449 // Joy likelihood. 450 Likelihood joy_likelihood = 9; 451 452 // Sorrow likelihood. 453 Likelihood sorrow_likelihood = 10; 454 455 // Anger likelihood. 456 Likelihood anger_likelihood = 11; 457 458 // Surprise likelihood. 459 Likelihood surprise_likelihood = 12; 460 461 // Under-exposed likelihood. 462 Likelihood under_exposed_likelihood = 13; 463 464 // Blurred likelihood. 465 Likelihood blurred_likelihood = 14; 466 467 // Headwear likelihood. 468 Likelihood headwear_likelihood = 15; 469} 470 471// Detected entity location information. 472message LocationInfo { 473 // lat/long location coordinates. 474 google.type.LatLng lat_lng = 1; 475} 476 477// A `Property` consists of a user-supplied name/value pair. 478message Property { 479 // Name of the property. 480 string name = 1; 481 482 // Value of the property. 483 string value = 2; 484 485 // Value of numeric properties. 486 uint64 uint64_value = 3; 487} 488 489// Set of detected entity features. 490message EntityAnnotation { 491 // Opaque entity ID. Some IDs may be available in 492 // [Google Knowledge Graph Search 493 // API](https://developers.google.com/knowledge-graph/). 494 string mid = 1; 495 496 // The language code for the locale in which the entity textual 497 // `description` is expressed. 498 string locale = 2; 499 500 // Entity textual description, expressed in its `locale` language. 501 string description = 3; 502 503 // Overall score of the result. Range [0, 1]. 504 float score = 4; 505 506 // **Deprecated. Use `score` instead.** 507 // The accuracy of the entity detection in an image. 508 // For example, for an image in which the "Eiffel Tower" entity is detected, 509 // this field represents the confidence that there is a tower in the query 510 // image. Range [0, 1]. 511 float confidence = 5 [deprecated = true]; 512 513 // The relevancy of the ICA (Image Content Annotation) label to the 514 // image. For example, the relevancy of "tower" is likely higher to an image 515 // containing the detected "Eiffel Tower" than to an image containing a 516 // detected distant towering building, even though the confidence that 517 // there is a tower in each image may be the same. Range [0, 1]. 518 float topicality = 6; 519 520 // Image region to which this entity belongs. Not produced 521 // for `LABEL_DETECTION` features. 522 BoundingPoly bounding_poly = 7; 523 524 // The location information for the detected entity. Multiple 525 // `LocationInfo` elements can be present because one location may 526 // indicate the location of the scene in the image, and another location 527 // may indicate the location of the place where the image was taken. 528 // Location information is usually present for landmarks. 529 repeated LocationInfo locations = 8; 530 531 // Some entities may have optional user-supplied `Property` (name/value) 532 // fields, such a score or string that qualifies the entity. 533 repeated Property properties = 9; 534} 535 536// Set of detected objects with bounding boxes. 537message LocalizedObjectAnnotation { 538 // Object ID that should align with EntityAnnotation mid. 539 string mid = 1; 540 541 // The BCP-47 language code, such as "en-US" or "sr-Latn". For more 542 // information, see 543 // http://www.unicode.org/reports/tr35/#Unicode_locale_identifier. 544 string language_code = 2; 545 546 // Object name, expressed in its `language_code` language. 547 string name = 3; 548 549 // Score of the result. Range [0, 1]. 550 float score = 4; 551 552 // Image region to which this object belongs. This must be populated. 553 BoundingPoly bounding_poly = 5; 554} 555 556// Set of features pertaining to the image, computed by computer vision 557// methods over safe-search verticals (for example, adult, spoof, medical, 558// violence). 559message SafeSearchAnnotation { 560 // Represents the adult content likelihood for the image. Adult content may 561 // contain elements such as nudity, pornographic images or cartoons, or 562 // sexual activities. 563 Likelihood adult = 1; 564 565 // Spoof likelihood. The likelihood that an modification 566 // was made to the image's canonical version to make it appear 567 // funny or offensive. 568 Likelihood spoof = 2; 569 570 // Likelihood that this is a medical image. 571 Likelihood medical = 3; 572 573 // Likelihood that this image contains violent content. Violent content may 574 // include death, serious harm, or injury to individuals or groups of 575 // individuals. 576 Likelihood violence = 4; 577 578 // Likelihood that the request image contains racy content. Racy content may 579 // include (but is not limited to) skimpy or sheer clothing, strategically 580 // covered nudity, lewd or provocative poses, or close-ups of sensitive 581 // body areas. 582 Likelihood racy = 9; 583} 584 585// Rectangle determined by min and max `LatLng` pairs. 586message LatLongRect { 587 // Min lat/long pair. 588 google.type.LatLng min_lat_lng = 1; 589 590 // Max lat/long pair. 591 google.type.LatLng max_lat_lng = 2; 592} 593 594// Color information consists of RGB channels, score, and the fraction of 595// the image that the color occupies in the image. 596message ColorInfo { 597 // RGB components of the color. 598 google.type.Color color = 1; 599 600 // Image-specific score for this color. Value in range [0, 1]. 601 float score = 2; 602 603 // The fraction of pixels the color occupies in the image. 604 // Value in range [0, 1]. 605 float pixel_fraction = 3; 606} 607 608// Set of dominant colors and their corresponding scores. 609message DominantColorsAnnotation { 610 // RGB color values with their score and pixel fraction. 611 repeated ColorInfo colors = 1; 612} 613 614// Stores image properties, such as dominant colors. 615message ImageProperties { 616 // If present, dominant colors completed successfully. 617 DominantColorsAnnotation dominant_colors = 1; 618} 619 620// Single crop hint that is used to generate a new crop when serving an image. 621message CropHint { 622 // The bounding polygon for the crop region. The coordinates of the bounding 623 // box are in the original image's scale. 624 BoundingPoly bounding_poly = 1; 625 626 // Confidence of this being a salient region. Range [0, 1]. 627 float confidence = 2; 628 629 // Fraction of importance of this salient region with respect to the original 630 // image. 631 float importance_fraction = 3; 632} 633 634// Set of crop hints that are used to generate new crops when serving images. 635message CropHintsAnnotation { 636 // Crop hint results. 637 repeated CropHint crop_hints = 1; 638} 639 640// Parameters for crop hints annotation request. 641message CropHintsParams { 642 // Aspect ratios in floats, representing the ratio of the width to the height 643 // of the image. For example, if the desired aspect ratio is 4/3, the 644 // corresponding float value should be 1.33333. If not specified, the 645 // best possible crop is returned. The number of provided aspect ratios is 646 // limited to a maximum of 16; any aspect ratios provided after the 16th are 647 // ignored. 648 repeated float aspect_ratios = 1; 649} 650 651// Parameters for web detection request. 652message WebDetectionParams { 653 // This field has no effect on results. 654 bool include_geo_results = 2 [deprecated = true]; 655} 656 657// Parameters for text detections. This is used to control TEXT_DETECTION and 658// DOCUMENT_TEXT_DETECTION features. 659message TextDetectionParams { 660 // By default, Cloud Vision API only includes confidence score for 661 // DOCUMENT_TEXT_DETECTION result. Set the flag to true to include confidence 662 // score for TEXT_DETECTION as well. 663 bool enable_text_detection_confidence_score = 9; 664 665 // A list of advanced OCR options to further fine-tune OCR behavior. 666 // Current valid values are: 667 // 668 // - `legacy_layout`: a heuristics layout detection algorithm, which serves as 669 // an alternative to the current ML-based layout detection algorithm. 670 // Customers can choose the best suitable layout algorithm based on their 671 // situation. 672 repeated string advanced_ocr_options = 11; 673} 674 675// Image context and/or feature-specific parameters. 676message ImageContext { 677 // Not used. 678 LatLongRect lat_long_rect = 1; 679 680 // List of languages to use for TEXT_DETECTION. In most cases, an empty value 681 // yields the best results since it enables automatic language detection. For 682 // languages based on the Latin alphabet, setting `language_hints` is not 683 // needed. In rare cases, when the language of the text in the image is known, 684 // setting a hint will help get better results (although it will be a 685 // significant hindrance if the hint is wrong). Text detection returns an 686 // error if one or more of the specified languages is not one of the 687 // [supported languages](https://cloud.google.com/vision/docs/languages). 688 repeated string language_hints = 2; 689 690 // Parameters for crop hints annotation request. 691 CropHintsParams crop_hints_params = 4; 692 693 // Parameters for product search. 694 ProductSearchParams product_search_params = 5; 695 696 // Parameters for web detection. 697 WebDetectionParams web_detection_params = 6; 698 699 // Parameters for text detection and document text detection. 700 TextDetectionParams text_detection_params = 12; 701} 702 703// Request for performing Google Cloud Vision API tasks over a user-provided 704// image, with user-requested features, and with context information. 705message AnnotateImageRequest { 706 // The image to be processed. 707 Image image = 1; 708 709 // Requested features. 710 repeated Feature features = 2; 711 712 // Additional context that may accompany the image. 713 ImageContext image_context = 3; 714} 715 716// If an image was produced from a file (e.g. a PDF), this message gives 717// information about the source of that image. 718message ImageAnnotationContext { 719 // The URI of the file used to produce the image. 720 string uri = 1; 721 722 // If the file was a PDF or TIFF, this field gives the page number within 723 // the file used to produce the image. 724 int32 page_number = 2; 725} 726 727// Response to an image annotation request. 728message AnnotateImageResponse { 729 // If present, face detection has completed successfully. 730 repeated FaceAnnotation face_annotations = 1; 731 732 // If present, landmark detection has completed successfully. 733 repeated EntityAnnotation landmark_annotations = 2; 734 735 // If present, logo detection has completed successfully. 736 repeated EntityAnnotation logo_annotations = 3; 737 738 // If present, label detection has completed successfully. 739 repeated EntityAnnotation label_annotations = 4; 740 741 // If present, localized object detection has completed successfully. 742 // This will be sorted descending by confidence score. 743 repeated LocalizedObjectAnnotation localized_object_annotations = 22; 744 745 // If present, text (OCR) detection has completed successfully. 746 repeated EntityAnnotation text_annotations = 5; 747 748 // If present, text (OCR) detection or document (OCR) text detection has 749 // completed successfully. 750 // This annotation provides the structural hierarchy for the OCR detected 751 // text. 752 TextAnnotation full_text_annotation = 12; 753 754 // If present, safe-search annotation has completed successfully. 755 SafeSearchAnnotation safe_search_annotation = 6; 756 757 // If present, image properties were extracted successfully. 758 ImageProperties image_properties_annotation = 8; 759 760 // If present, crop hints have completed successfully. 761 CropHintsAnnotation crop_hints_annotation = 11; 762 763 // If present, web detection has completed successfully. 764 WebDetection web_detection = 13; 765 766 // If present, product search has completed successfully. 767 ProductSearchResults product_search_results = 14; 768 769 // If set, represents the error message for the operation. 770 // Note that filled-in image annotations are guaranteed to be 771 // correct, even when `error` is set. 772 google.rpc.Status error = 9; 773 774 // If present, contextual information is needed to understand where this image 775 // comes from. 776 ImageAnnotationContext context = 21; 777} 778 779// Multiple image annotation requests are batched into a single service call. 780message BatchAnnotateImagesRequest { 781 // Required. Individual image annotation requests for this batch. 782 repeated AnnotateImageRequest requests = 1 783 [(google.api.field_behavior) = REQUIRED]; 784 785 // Optional. Target project and location to make a call. 786 // 787 // Format: `projects/{project-id}/locations/{location-id}`. 788 // 789 // If no parent is specified, a region will be chosen automatically. 790 // 791 // Supported location-ids: 792 // `us`: USA country only, 793 // `asia`: East asia areas, like Japan, Taiwan, 794 // `eu`: The European Union. 795 // 796 // Example: `projects/project-A/locations/eu`. 797 string parent = 4; 798 799 // Optional. The labels with user-defined metadata for the request. 800 // 801 // Label keys and values can be no longer than 63 characters 802 // (Unicode codepoints), can only contain lowercase letters, numeric 803 // characters, underscores and dashes. International characters are allowed. 804 // Label values are optional. Label keys must start with a letter. 805 map<string, string> labels = 5 [(google.api.field_behavior) = OPTIONAL]; 806} 807 808// Response to a batch image annotation request. 809message BatchAnnotateImagesResponse { 810 // Individual responses to image annotation requests within the batch. 811 repeated AnnotateImageResponse responses = 1; 812} 813 814// A request to annotate one single file, e.g. a PDF, TIFF or GIF file. 815message AnnotateFileRequest { 816 // Required. Information about the input file. 817 InputConfig input_config = 1; 818 819 // Required. Requested features. 820 repeated Feature features = 2; 821 822 // Additional context that may accompany the image(s) in the file. 823 ImageContext image_context = 3; 824 825 // Pages of the file to perform image annotation. 826 // 827 // Pages starts from 1, we assume the first page of the file is page 1. 828 // At most 5 pages are supported per request. Pages can be negative. 829 // 830 // Page 1 means the first page. 831 // Page 2 means the second page. 832 // Page -1 means the last page. 833 // Page -2 means the second to the last page. 834 // 835 // If the file is GIF instead of PDF or TIFF, page refers to GIF frames. 836 // 837 // If this field is empty, by default the service performs image annotation 838 // for the first 5 pages of the file. 839 repeated int32 pages = 4; 840} 841 842// Response to a single file annotation request. A file may contain one or more 843// images, which individually have their own responses. 844message AnnotateFileResponse { 845 // Information about the file for which this response is generated. 846 InputConfig input_config = 1; 847 848 // Individual responses to images found within the file. This field will be 849 // empty if the `error` field is set. 850 repeated AnnotateImageResponse responses = 2; 851 852 // This field gives the total number of pages in the file. 853 int32 total_pages = 3; 854 855 // If set, represents the error message for the failed request. The 856 // `responses` field will not be set in this case. 857 google.rpc.Status error = 4; 858} 859 860// A list of requests to annotate files using the BatchAnnotateFiles API. 861message BatchAnnotateFilesRequest { 862 // Required. The list of file annotation requests. Right now we support only 863 // one AnnotateFileRequest in BatchAnnotateFilesRequest. 864 repeated AnnotateFileRequest requests = 1 865 [(google.api.field_behavior) = REQUIRED]; 866 867 // Optional. Target project and location to make a call. 868 // 869 // Format: `projects/{project-id}/locations/{location-id}`. 870 // 871 // If no parent is specified, a region will be chosen automatically. 872 // 873 // Supported location-ids: 874 // `us`: USA country only, 875 // `asia`: East asia areas, like Japan, Taiwan, 876 // `eu`: The European Union. 877 // 878 // Example: `projects/project-A/locations/eu`. 879 string parent = 3; 880 881 // Optional. The labels with user-defined metadata for the request. 882 // 883 // Label keys and values can be no longer than 63 characters 884 // (Unicode codepoints), can only contain lowercase letters, numeric 885 // characters, underscores and dashes. International characters are allowed. 886 // Label values are optional. Label keys must start with a letter. 887 map<string, string> labels = 5 [(google.api.field_behavior) = OPTIONAL]; 888} 889 890// A list of file annotation responses. 891message BatchAnnotateFilesResponse { 892 // The list of file annotation responses, each response corresponding to each 893 // AnnotateFileRequest in BatchAnnotateFilesRequest. 894 repeated AnnotateFileResponse responses = 1; 895} 896 897// An offline file annotation request. 898message AsyncAnnotateFileRequest { 899 // Required. Information about the input file. 900 InputConfig input_config = 1; 901 902 // Required. Requested features. 903 repeated Feature features = 2; 904 905 // Additional context that may accompany the image(s) in the file. 906 ImageContext image_context = 3; 907 908 // Required. The desired output location and metadata (e.g. format). 909 OutputConfig output_config = 4; 910} 911 912// The response for a single offline file annotation request. 913message AsyncAnnotateFileResponse { 914 // The output location and metadata from AsyncAnnotateFileRequest. 915 OutputConfig output_config = 1; 916} 917 918// Request for async image annotation for a list of images. 919message AsyncBatchAnnotateImagesRequest { 920 // Required. Individual image annotation requests for this batch. 921 repeated AnnotateImageRequest requests = 1 922 [(google.api.field_behavior) = REQUIRED]; 923 924 // Required. The desired output location and metadata (e.g. format). 925 OutputConfig output_config = 2 [(google.api.field_behavior) = REQUIRED]; 926 927 // Optional. Target project and location to make a call. 928 // 929 // Format: `projects/{project-id}/locations/{location-id}`. 930 // 931 // If no parent is specified, a region will be chosen automatically. 932 // 933 // Supported location-ids: 934 // `us`: USA country only, 935 // `asia`: East asia areas, like Japan, Taiwan, 936 // `eu`: The European Union. 937 // 938 // Example: `projects/project-A/locations/eu`. 939 string parent = 4; 940 941 // Optional. The labels with user-defined metadata for the request. 942 // 943 // Label keys and values can be no longer than 63 characters 944 // (Unicode codepoints), can only contain lowercase letters, numeric 945 // characters, underscores and dashes. International characters are allowed. 946 // Label values are optional. Label keys must start with a letter. 947 map<string, string> labels = 5 [(google.api.field_behavior) = OPTIONAL]; 948} 949 950// Response to an async batch image annotation request. 951message AsyncBatchAnnotateImagesResponse { 952 // The output location and metadata from AsyncBatchAnnotateImagesRequest. 953 OutputConfig output_config = 1; 954} 955 956// Multiple async file annotation requests are batched into a single service 957// call. 958message AsyncBatchAnnotateFilesRequest { 959 // Required. Individual async file annotation requests for this batch. 960 repeated AsyncAnnotateFileRequest requests = 1 961 [(google.api.field_behavior) = REQUIRED]; 962 963 // Optional. Target project and location to make a call. 964 // 965 // Format: `projects/{project-id}/locations/{location-id}`. 966 // 967 // If no parent is specified, a region will be chosen automatically. 968 // 969 // Supported location-ids: 970 // `us`: USA country only, 971 // `asia`: East asia areas, like Japan, Taiwan, 972 // `eu`: The European Union. 973 // 974 // Example: `projects/project-A/locations/eu`. 975 string parent = 4; 976 977 // Optional. The labels with user-defined metadata for the request. 978 // 979 // Label keys and values can be no longer than 63 characters 980 // (Unicode codepoints), can only contain lowercase letters, numeric 981 // characters, underscores and dashes. International characters are allowed. 982 // Label values are optional. Label keys must start with a letter. 983 map<string, string> labels = 5 [(google.api.field_behavior) = OPTIONAL]; 984} 985 986// Response to an async batch file annotation request. 987message AsyncBatchAnnotateFilesResponse { 988 // The list of file annotation responses, one for each request in 989 // AsyncBatchAnnotateFilesRequest. 990 repeated AsyncAnnotateFileResponse responses = 1; 991} 992 993// The desired input location and metadata. 994message InputConfig { 995 // The Google Cloud Storage location to read the input from. 996 GcsSource gcs_source = 1; 997 998 // File content, represented as a stream of bytes. 999 // Note: As with all `bytes` fields, protobuffers use a pure binary 1000 // representation, whereas JSON representations use base64. 1001 // 1002 // Currently, this field only works for BatchAnnotateFiles requests. It does 1003 // not work for AsyncBatchAnnotateFiles requests. 1004 bytes content = 3; 1005 1006 // The type of the file. Currently only "application/pdf", "image/tiff" and 1007 // "image/gif" are supported. Wildcards are not supported. 1008 string mime_type = 2; 1009} 1010 1011// The desired output location and metadata. 1012message OutputConfig { 1013 // The Google Cloud Storage location to write the output(s) to. 1014 GcsDestination gcs_destination = 1; 1015 1016 // The max number of response protos to put into each output JSON file on 1017 // Google Cloud Storage. 1018 // The valid range is [1, 100]. If not specified, the default value is 20. 1019 // 1020 // For example, for one pdf file with 100 pages, 100 response protos will 1021 // be generated. If `batch_size` = 20, then 5 json files each 1022 // containing 20 response protos will be written under the prefix 1023 // `gcs_destination`.`uri`. 1024 // 1025 // Currently, batch_size only applies to GcsDestination, with potential future 1026 // support for other output configurations. 1027 int32 batch_size = 2; 1028} 1029 1030// The Google Cloud Storage location where the input will be read from. 1031message GcsSource { 1032 // Google Cloud Storage URI for the input file. This must only be a 1033 // Google Cloud Storage object. Wildcards are not currently supported. 1034 string uri = 1; 1035} 1036 1037// The Google Cloud Storage location where the output will be written to. 1038message GcsDestination { 1039 // Google Cloud Storage URI prefix where the results will be stored. Results 1040 // will be in JSON format and preceded by its corresponding input URI prefix. 1041 // This field can either represent a gcs file prefix or gcs directory. In 1042 // either case, the uri should be unique because in order to get all of the 1043 // output files, you will need to do a wildcard gcs search on the uri prefix 1044 // you provide. 1045 // 1046 // Examples: 1047 // 1048 // * File Prefix: gs://bucket-name/here/filenameprefix The output files 1049 // will be created in gs://bucket-name/here/ and the names of the 1050 // output files will begin with "filenameprefix". 1051 // 1052 // * Directory Prefix: gs://bucket-name/some/location/ The output files 1053 // will be created in gs://bucket-name/some/location/ and the names of the 1054 // output files could be anything because there was no filename prefix 1055 // specified. 1056 // 1057 // If multiple outputs, each response is still AnnotateFileResponse, each of 1058 // which contains some subset of the full list of AnnotateImageResponse. 1059 // Multiple outputs can happen if, for example, the output JSON is too large 1060 // and overflows into multiple sharded files. 1061 string uri = 1; 1062} 1063 1064// Contains metadata for the BatchAnnotateImages operation. 1065message OperationMetadata { 1066 // Batch operation states. 1067 enum State { 1068 // Invalid. 1069 STATE_UNSPECIFIED = 0; 1070 1071 // Request is received. 1072 CREATED = 1; 1073 1074 // Request is actively being processed. 1075 RUNNING = 2; 1076 1077 // The batch processing is done. 1078 DONE = 3; 1079 1080 // The batch processing was cancelled. 1081 CANCELLED = 4; 1082 } 1083 1084 // Current state of the batch operation. 1085 State state = 1; 1086 1087 // The time when the batch request was received. 1088 google.protobuf.Timestamp create_time = 5; 1089 1090 // The time when the operation result was last updated. 1091 google.protobuf.Timestamp update_time = 6; 1092} 1093