1// Copyright 2023 Google LLC 2// 3// Licensed under the Apache License, Version 2.0 (the "License"); 4// you may not use this file except in compliance with the License. 5// You may obtain a copy of the License at 6// 7// http://www.apache.org/licenses/LICENSE-2.0 8// 9// Unless required by applicable law or agreed to in writing, software 10// distributed under the License is distributed on an "AS IS" BASIS, 11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12// See the License for the specific language governing permissions and 13// limitations under the License. 14 15syntax = "proto3"; 16 17package google.cloud.visionai.v1; 18 19import "google/protobuf/struct.proto"; 20import "google/protobuf/timestamp.proto"; 21 22option csharp_namespace = "Google.Cloud.VisionAI.V1"; 23option go_package = "cloud.google.com/go/visionai/apiv1/visionaipb;visionaipb"; 24option java_multiple_files = true; 25option java_outer_classname = "AnnotationsProto"; 26option java_package = "com.google.cloud.visionai.v1"; 27option php_namespace = "Google\\Cloud\\VisionAI\\V1"; 28option ruby_package = "Google::Cloud::VisionAI::V1"; 29 30// Enum describing all possible types of a stream annotation. 31enum StreamAnnotationType { 32 // Type UNSPECIFIED. 33 STREAM_ANNOTATION_TYPE_UNSPECIFIED = 0; 34 35 // active_zone annotation defines a polygon on top of the content from an 36 // image/video based stream, following processing will only focus on the 37 // content inside the active zone. 38 STREAM_ANNOTATION_TYPE_ACTIVE_ZONE = 1; 39 40 // crossing_line annotation defines a polyline on top of the content from an 41 // image/video based Vision AI stream, events happening across the line will 42 // be captured. For example, the counts of people who goes acroos the line 43 // in Occupancy Analytic Processor. 44 STREAM_ANNOTATION_TYPE_CROSSING_LINE = 2; 45} 46 47// Output format for Personal Protective Equipment Detection Operator. 48message PersonalProtectiveEquipmentDetectionOutput { 49 // The entity info for annotations from person detection prediction result. 50 message PersonEntity { 51 // Entity id. 52 int64 person_entity_id = 1; 53 } 54 55 // The entity info for annotations from PPE detection prediction result. 56 message PPEEntity { 57 // Label id. 58 int64 ppe_label_id = 1; 59 60 // Human readable string of the label (Examples: helmet, glove, mask). 61 string ppe_label_string = 2; 62 63 // Human readable string of the super category label (Examples: head_cover, 64 // hands_cover, face_cover). 65 string ppe_supercategory_label_string = 3; 66 67 // Entity id. 68 int64 ppe_entity_id = 4; 69 } 70 71 // Bounding Box in the normalized coordinates. 72 message NormalizedBoundingBox { 73 // Min in x coordinate. 74 float xmin = 1; 75 76 // Min in y coordinate. 77 float ymin = 2; 78 79 // Width of the bounding box. 80 float width = 3; 81 82 // Height of the bounding box. 83 float height = 4; 84 } 85 86 // PersonIdentified box contains the location and the entity info of the 87 // person. 88 message PersonIdentifiedBox { 89 // An unique id for this box. 90 int64 box_id = 1; 91 92 // Bounding Box in the normalized coordinates. 93 NormalizedBoundingBox normalized_bounding_box = 2; 94 95 // Confidence score associated with this box. 96 float confidence_score = 3; 97 98 // Person entity info. 99 PersonEntity person_entity = 4; 100 } 101 102 // PPEIdentified box contains the location and the entity info of the PPE. 103 message PPEIdentifiedBox { 104 // An unique id for this box. 105 int64 box_id = 1; 106 107 // Bounding Box in the normalized coordinates. 108 NormalizedBoundingBox normalized_bounding_box = 2; 109 110 // Confidence score associated with this box. 111 float confidence_score = 3; 112 113 // PPE entity info. 114 PPEEntity ppe_entity = 4; 115 } 116 117 // Detected Person contains the detected person and their associated 118 // ppes and their protecting information. 119 message DetectedPerson { 120 // The id of detected person. 121 int64 person_id = 1; 122 123 // The info of detected person identified box. 124 PersonIdentifiedBox detected_person_identified_box = 2; 125 126 // The info of detected person associated ppe identified boxes. 127 repeated PPEIdentifiedBox detected_ppe_identified_boxes = 3; 128 129 // Coverage score for each body part. 130 // Coverage score for face. 131 optional float face_coverage_score = 4; 132 133 // Coverage score for eyes. 134 optional float eyes_coverage_score = 5; 135 136 // Coverage score for head. 137 optional float head_coverage_score = 6; 138 139 // Coverage score for hands. 140 optional float hands_coverage_score = 7; 141 142 // Coverage score for body. 143 optional float body_coverage_score = 8; 144 145 // Coverage score for feet. 146 optional float feet_coverage_score = 9; 147 } 148 149 // Current timestamp. 150 google.protobuf.Timestamp current_time = 1; 151 152 // A list of DetectedPersons. 153 repeated DetectedPerson detected_persons = 2; 154} 155 156// Prediction output format for Generic Object Detection. 157message ObjectDetectionPredictionResult { 158 // The entity info for annotations from object detection prediction result. 159 message Entity { 160 // Label id. 161 int64 label_id = 1; 162 163 // Human readable string of the label. 164 string label_string = 2; 165 } 166 167 // Identified box contains location and the entity of the object. 168 message IdentifiedBox { 169 // Bounding Box in the normalized coordinates. 170 message NormalizedBoundingBox { 171 // Min in x coordinate. 172 float xmin = 1; 173 174 // Min in y coordinate. 175 float ymin = 2; 176 177 // Width of the bounding box. 178 float width = 3; 179 180 // Height of the bounding box. 181 float height = 4; 182 } 183 184 // An unique id for this box. 185 int64 box_id = 1; 186 187 // Bounding Box in the normalized coordinates. 188 NormalizedBoundingBox normalized_bounding_box = 2; 189 190 // Confidence score associated with this box. 191 float confidence_score = 3; 192 193 // Entity of this box. 194 Entity entity = 4; 195 } 196 197 // Current timestamp. 198 google.protobuf.Timestamp current_time = 1; 199 200 // A list of identified boxes. 201 repeated IdentifiedBox identified_boxes = 2; 202} 203 204// Prediction output format for Image Object Detection. 205message ImageObjectDetectionPredictionResult { 206 // The resource IDs of the AnnotationSpecs that had been identified, ordered 207 // by the confidence score descendingly. It is the id segment instead of full 208 // resource name. 209 repeated int64 ids = 1; 210 211 // The display names of the AnnotationSpecs that had been identified, order 212 // matches the IDs. 213 repeated string display_names = 2; 214 215 // The Model's confidences in correctness of the predicted IDs, higher value 216 // means higher confidence. Order matches the Ids. 217 repeated float confidences = 3; 218 219 // Bounding boxes, i.e. the rectangles over the image, that pinpoint 220 // the found AnnotationSpecs. Given in order that matches the IDs. Each 221 // bounding box is an array of 4 numbers `xMin`, `xMax`, `yMin`, and 222 // `yMax`, which represent the extremal coordinates of the box. They are 223 // relative to the image size, and the point 0,0 is in the top left 224 // of the image. 225 repeated google.protobuf.ListValue bboxes = 4; 226} 227 228// Prediction output format for Image and Text Classification. 229message ClassificationPredictionResult { 230 // The resource IDs of the AnnotationSpecs that had been identified. 231 repeated int64 ids = 1; 232 233 // The display names of the AnnotationSpecs that had been identified, order 234 // matches the IDs. 235 repeated string display_names = 2; 236 237 // The Model's confidences in correctness of the predicted IDs, higher value 238 // means higher confidence. Order matches the Ids. 239 repeated float confidences = 3; 240} 241 242// Prediction output format for Image Segmentation. 243message ImageSegmentationPredictionResult { 244 // A PNG image where each pixel in the mask represents the category in which 245 // the pixel in the original image was predicted to belong to. The size of 246 // this image will be the same as the original image. The mapping between the 247 // AnntoationSpec and the color can be found in model's metadata. The model 248 // will choose the most likely category and if none of the categories reach 249 // the confidence threshold, the pixel will be marked as background. 250 string category_mask = 1; 251 252 // A one channel image which is encoded as an 8bit lossless PNG. The size of 253 // the image will be the same as the original image. For a specific pixel, 254 // darker color means less confidence in correctness of the cateogry in the 255 // categoryMask for the corresponding pixel. Black means no confidence and 256 // white means complete confidence. 257 string confidence_mask = 2; 258} 259 260// Prediction output format for Video Action Recognition. 261message VideoActionRecognitionPredictionResult { 262 // Each IdentifiedAction is one particular identification of an action 263 // specified with the AnnotationSpec id, display_name and the associated 264 // confidence score. 265 message IdentifiedAction { 266 // The resource ID of the AnnotationSpec that had been identified. 267 string id = 1; 268 269 // The display name of the AnnotationSpec that had been identified. 270 string display_name = 2; 271 272 // The Model's confidence in correction of this identification, higher 273 // value means higher confidence. 274 float confidence = 3; 275 } 276 277 // The beginning, inclusive, of the video's time segment in which the 278 // actions have been identified. 279 google.protobuf.Timestamp segment_start_time = 1; 280 281 // The end, inclusive, of the video's time segment in which the actions have 282 // been identified. Particularly, if the end is the same as the start, it 283 // means the identification happens on a specific video frame. 284 google.protobuf.Timestamp segment_end_time = 2; 285 286 // All of the actions identified in the time range. 287 repeated IdentifiedAction actions = 3; 288} 289 290// Prediction output format for Video Object Tracking. 291message VideoObjectTrackingPredictionResult { 292 // Boundingbox for detected object. I.e. the rectangle over the video frame 293 // pinpointing the found AnnotationSpec. The coordinates are relative to the 294 // frame size, and the point 0,0 is in the top left of the frame. 295 message BoundingBox { 296 // The leftmost coordinate of the bounding box. 297 float x_min = 1; 298 299 // The rightmost coordinate of the bounding box. 300 float x_max = 2; 301 302 // The topmost coordinate of the bounding box. 303 float y_min = 3; 304 305 // The bottommost coordinate of the bounding box. 306 float y_max = 4; 307 } 308 309 // Each DetectedObject is one particular identification of an object 310 // specified with the AnnotationSpec id and display_name, the bounding box, 311 // the associated confidence score and the corresponding track_id. 312 message DetectedObject { 313 // The resource ID of the AnnotationSpec that had been identified. 314 string id = 1; 315 316 // The display name of the AnnotationSpec that had been identified. 317 string display_name = 2; 318 319 // Boundingbox. 320 BoundingBox bounding_box = 3; 321 322 // The Model's confidence in correction of this identification, higher 323 // value means higher confidence. 324 float confidence = 4; 325 326 // The same object may be identified on muitiple frames which are typical 327 // adjacent. The set of frames where a particular object has been detected 328 // form a track. This track_id can be used to trace down all frames for an 329 // detected object. 330 int64 track_id = 5; 331 } 332 333 // The beginning, inclusive, of the video's time segment in which the 334 // current identifications happens. 335 google.protobuf.Timestamp segment_start_time = 1; 336 337 // The end, inclusive, of the video's time segment in which the current 338 // identifications happen. Particularly, if the end is the same as the start, 339 // it means the identifications happen on a specific video frame. 340 google.protobuf.Timestamp segment_end_time = 2; 341 342 // All of the objects detected in the specified time range. 343 repeated DetectedObject objects = 3; 344} 345 346// Prediction output format for Video Classification. 347message VideoClassificationPredictionResult { 348 // Each IdentifiedClassification is one particular identification of an 349 // classification specified with the AnnotationSpec id and display_name, 350 // and the associated confidence score. 351 message IdentifiedClassification { 352 // The resource ID of the AnnotationSpec that had been identified. 353 string id = 1; 354 355 // The display name of the AnnotationSpec that had been identified. 356 string display_name = 2; 357 358 // The Model's confidence in correction of this identification, higher 359 // value means higher confidence. 360 float confidence = 3; 361 } 362 363 // The beginning, inclusive, of the video's time segment in which the 364 // classifications have been identified. 365 google.protobuf.Timestamp segment_start_time = 1; 366 367 // The end, inclusive, of the video's time segment in which the 368 // classifications have been identified. Particularly, if the end is the same 369 // as the start, it means the identification happens on a specific video 370 // frame. 371 google.protobuf.Timestamp segment_end_time = 2; 372 373 // All of the classifications identified in the time range. 374 repeated IdentifiedClassification classifications = 3; 375} 376 377// The prediction result proto for occupancy counting. 378message OccupancyCountingPredictionResult { 379 // The entity info for annotations from occupancy counting operator. 380 message Entity { 381 // Label id. 382 int64 label_id = 1; 383 384 // Human readable string of the label. 385 string label_string = 2; 386 } 387 388 // Identified box contains location and the entity of the object. 389 message IdentifiedBox { 390 // Bounding Box in the normalized coordinates. 391 message NormalizedBoundingBox { 392 // Min in x coordinate. 393 float xmin = 1; 394 395 // Min in y coordinate. 396 float ymin = 2; 397 398 // Width of the bounding box. 399 float width = 3; 400 401 // Height of the bounding box. 402 float height = 4; 403 } 404 405 // An unique id for this box. 406 int64 box_id = 1; 407 408 // Bounding Box in the normalized coordinates. 409 NormalizedBoundingBox normalized_bounding_box = 2; 410 411 // Confidence score associated with this box. 412 float score = 3; 413 414 // Entity of this box. 415 Entity entity = 4; 416 417 // An unique id to identify a track. It should be consistent across frames. 418 // It only exists if tracking is enabled. 419 int64 track_id = 5; 420 } 421 422 // The statistics info for annotations from occupancy counting operator. 423 message Stats { 424 // The object info and instant count for annotations from occupancy counting 425 // operator. 426 message ObjectCount { 427 // Entity of this object. 428 Entity entity = 1; 429 430 // Count of the object. 431 int32 count = 2; 432 } 433 434 // The object info and accumulated count for annotations from occupancy 435 // counting operator. 436 message AccumulatedObjectCount { 437 // The start time of the accumulated count. 438 google.protobuf.Timestamp start_time = 1; 439 440 // The object count for the accumulated count. 441 ObjectCount object_count = 2; 442 } 443 444 // Message for Crossing line count. 445 message CrossingLineCount { 446 // Line annotation from the user. 447 StreamAnnotation annotation = 1; 448 449 // The direction that follows the right hand rule. 450 repeated ObjectCount positive_direction_counts = 2; 451 452 // The direction that is opposite to the right hand rule. 453 repeated ObjectCount negative_direction_counts = 3; 454 455 // The accumulated positive count. 456 repeated AccumulatedObjectCount accumulated_positive_direction_counts = 4; 457 458 // The accumulated negative count. 459 repeated AccumulatedObjectCount accumulated_negative_direction_counts = 5; 460 } 461 462 // Message for the active zone count. 463 message ActiveZoneCount { 464 // Active zone annotation from the user. 465 StreamAnnotation annotation = 1; 466 467 // Counts in the zone. 468 repeated ObjectCount counts = 2; 469 } 470 471 // Counts of the full frame. 472 repeated ObjectCount full_frame_count = 1; 473 474 // Crossing line counts. 475 repeated CrossingLineCount crossing_line_counts = 2; 476 477 // Active zone counts. 478 repeated ActiveZoneCount active_zone_counts = 3; 479 } 480 481 // The track info for annotations from occupancy counting operator. 482 message TrackInfo { 483 // An unique id to identify a track. It should be consistent across frames. 484 string track_id = 1; 485 486 // Start timestamp of this track. 487 google.protobuf.Timestamp start_time = 2; 488 } 489 490 // The dwell time info for annotations from occupancy counting operator. 491 message DwellTimeInfo { 492 // An unique id to identify a track. It should be consistent across frames. 493 string track_id = 1; 494 495 // The unique id for the zone in which the object is dwelling/waiting. 496 string zone_id = 2; 497 498 // The beginning time when a dwelling object has been identified in a zone. 499 google.protobuf.Timestamp dwell_start_time = 3; 500 501 // The end time when a dwelling object has exited in a zone. 502 google.protobuf.Timestamp dwell_end_time = 4; 503 } 504 505 // Current timestamp. 506 google.protobuf.Timestamp current_time = 1; 507 508 // A list of identified boxes. 509 repeated IdentifiedBox identified_boxes = 2; 510 511 // Detection statistics. 512 Stats stats = 3; 513 514 // Track related information. All the tracks that are live at this timestamp. 515 // It only exists if tracking is enabled. 516 repeated TrackInfo track_info = 4; 517 518 // Dwell time related information. All the tracks that are live in a given 519 // zone with a start and end dwell time timestamp 520 repeated DwellTimeInfo dwell_time_info = 5; 521 522 // The presentation timestamp of the frame. 523 optional int64 pts = 6; 524} 525 526// message about annotations about Vision AI stream resource. 527message StreamAnnotation { 528 oneof annotation_payload { 529 // Annotation for type ACTIVE_ZONE 530 NormalizedPolygon active_zone = 5; 531 532 // Annotation for type CROSSING_LINE 533 NormalizedPolyline crossing_line = 6; 534 } 535 536 // ID of the annotation. It must be unique when used in the certain context. 537 // For example, all the annotations to one input streams of a Vision AI 538 // application. 539 string id = 1; 540 541 // User-friendly name for the annotation. 542 string display_name = 2; 543 544 // The Vision AI stream resource name. 545 string source_stream = 3; 546 547 // The actual type of Annotation. 548 StreamAnnotationType type = 4; 549} 550 551// A wrapper of repeated StreamAnnotation. 552message StreamAnnotations { 553 // Multiple annotations. 554 repeated StreamAnnotation stream_annotations = 1; 555} 556 557// Normalized Polygon. 558message NormalizedPolygon { 559 // The bounding polygon normalized vertices. Top left corner of the image 560 // will be [0, 0]. 561 repeated NormalizedVertex normalized_vertices = 1; 562} 563 564// Normalized Pplyline, which represents a curve consisting of connected 565// straight-line segments. 566message NormalizedPolyline { 567 // A sequence of vertices connected by straight lines. 568 repeated NormalizedVertex normalized_vertices = 1; 569} 570 571// A vertex represents a 2D point in the image. 572// NOTE: the normalized vertex coordinates are relative to the original image 573// and range from 0 to 1. 574message NormalizedVertex { 575 // X coordinate. 576 float x = 1; 577 578 // Y coordinate. 579 float y = 2; 580} 581 582// Message of essential metadata of App Platform. 583// This message is usually attached to a certain processor output annotation for 584// customer to identify the source of the data. 585message AppPlatformMetadata { 586 // The application resource name. 587 string application = 1; 588 589 // The instance resource id. Instance is the nested resource of application 590 // under collection 'instances'. 591 string instance_id = 2; 592 593 // The node name of the application graph. 594 string node = 3; 595 596 // The referred processor resource name of the application node. 597 string processor = 4; 598} 599 600// For any cloud function based customer processing logic, customer's cloud 601// function is expected to receive AppPlatformCloudFunctionRequest as request 602// and send back AppPlatformCloudFunctionResponse as response. 603// Message of request from AppPlatform to Cloud Function. 604message AppPlatformCloudFunctionRequest { 605 // A general annotation message that uses struct format to represent different 606 // concrete annotation protobufs. 607 message StructedInputAnnotation { 608 // The ingestion time of the current annotation. 609 int64 ingestion_time_micros = 1; 610 611 // The struct format of the actual annotation. 612 google.protobuf.Struct annotation = 2; 613 } 614 615 // The metadata of the AppPlatform for customer to identify the source of the 616 // payload. 617 AppPlatformMetadata app_platform_metadata = 1; 618 619 // The actual annotations to be processed by the customized Cloud Function. 620 repeated StructedInputAnnotation annotations = 2; 621} 622 623// Message of the response from customer's Cloud Function to AppPlatform. 624message AppPlatformCloudFunctionResponse { 625 // A general annotation message that uses struct format to represent different 626 // concrete annotation protobufs. 627 message StructedOutputAnnotation { 628 // The struct format of the actual annotation. 629 google.protobuf.Struct annotation = 1; 630 } 631 632 // The modified annotations that is returned back to AppPlatform. 633 // If the annotations fields are empty, then those annotations will be dropped 634 // by AppPlatform. 635 repeated StructedOutputAnnotation annotations = 2; 636 637 // If set to true, AppPlatform will use original annotations instead of 638 // dropping them, even if it is empty in the annotations filed. 639 bool annotation_passthrough = 3; 640 641 // The event notifications that is returned back to AppPlatform. Typically it 642 // will then be configured to be consumed/forwared to a operator that handles 643 // events, such as Pub/Sub operator. 644 repeated AppPlatformEventBody events = 4; 645} 646 647// Message of content of appPlatform event 648message AppPlatformEventBody { 649 // Human readable string of the event like "There are more than 6 people in 650 // the scene". or "Shelf is empty!". 651 string event_message = 1; 652 653 // For the case of Pub/Sub, it will be stored in the message attributes. 654 // pubsub.proto 655 google.protobuf.Struct payload = 2; 656 657 // User defined Event Id, used to classify event, within a delivery interval, 658 // events from the same application instance with the same id will be 659 // de-duplicated & only first one will be sent out. Empty event_id will be 660 // treated as "". 661 string event_id = 3; 662} 663