xref: /aosp_15_r20/external/googleapis/google/cloud/visionai/v1/annotations.proto (revision d5c09012810ac0c9f33fe448fb6da8260d444cc9)
1// Copyright 2023 Google LLC
2//
3// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at
6//
7//     http://www.apache.org/licenses/LICENSE-2.0
8//
9// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS,
11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12// See the License for the specific language governing permissions and
13// limitations under the License.
14
15syntax = "proto3";
16
17package google.cloud.visionai.v1;
18
19import "google/protobuf/struct.proto";
20import "google/protobuf/timestamp.proto";
21
22option csharp_namespace = "Google.Cloud.VisionAI.V1";
23option go_package = "cloud.google.com/go/visionai/apiv1/visionaipb;visionaipb";
24option java_multiple_files = true;
25option java_outer_classname = "AnnotationsProto";
26option java_package = "com.google.cloud.visionai.v1";
27option php_namespace = "Google\\Cloud\\VisionAI\\V1";
28option ruby_package = "Google::Cloud::VisionAI::V1";
29
30// Enum describing all possible types of a stream annotation.
31enum StreamAnnotationType {
32  // Type UNSPECIFIED.
33  STREAM_ANNOTATION_TYPE_UNSPECIFIED = 0;
34
35  // active_zone annotation defines a polygon on top of the content from an
36  // image/video based stream, following processing will only focus on the
37  // content inside the active zone.
38  STREAM_ANNOTATION_TYPE_ACTIVE_ZONE = 1;
39
40  // crossing_line annotation defines a polyline on top of the content from an
41  // image/video based Vision AI stream, events happening across the line will
42  // be captured. For example, the counts of people who goes acroos the line
43  // in Occupancy Analytic Processor.
44  STREAM_ANNOTATION_TYPE_CROSSING_LINE = 2;
45}
46
47// Output format for Personal Protective Equipment Detection Operator.
48message PersonalProtectiveEquipmentDetectionOutput {
49  // The entity info for annotations from person detection prediction result.
50  message PersonEntity {
51    // Entity id.
52    int64 person_entity_id = 1;
53  }
54
55  // The entity info for annotations from PPE detection prediction result.
56  message PPEEntity {
57    // Label id.
58    int64 ppe_label_id = 1;
59
60    // Human readable string of the label (Examples: helmet, glove, mask).
61    string ppe_label_string = 2;
62
63    // Human readable string of the super category label (Examples: head_cover,
64    // hands_cover, face_cover).
65    string ppe_supercategory_label_string = 3;
66
67    // Entity id.
68    int64 ppe_entity_id = 4;
69  }
70
71  // Bounding Box in the normalized coordinates.
72  message NormalizedBoundingBox {
73    // Min in x coordinate.
74    float xmin = 1;
75
76    // Min in y coordinate.
77    float ymin = 2;
78
79    // Width of the bounding box.
80    float width = 3;
81
82    // Height of the bounding box.
83    float height = 4;
84  }
85
86  // PersonIdentified box contains the location and the entity info of the
87  // person.
88  message PersonIdentifiedBox {
89    // An unique id for this box.
90    int64 box_id = 1;
91
92    // Bounding Box in the normalized coordinates.
93    NormalizedBoundingBox normalized_bounding_box = 2;
94
95    // Confidence score associated with this box.
96    float confidence_score = 3;
97
98    // Person entity info.
99    PersonEntity person_entity = 4;
100  }
101
102  // PPEIdentified box contains the location and the entity info of the PPE.
103  message PPEIdentifiedBox {
104    // An unique id for this box.
105    int64 box_id = 1;
106
107    // Bounding Box in the normalized coordinates.
108    NormalizedBoundingBox normalized_bounding_box = 2;
109
110    // Confidence score associated with this box.
111    float confidence_score = 3;
112
113    // PPE entity info.
114    PPEEntity ppe_entity = 4;
115  }
116
117  // Detected Person contains the detected person and their associated
118  // ppes and their protecting information.
119  message DetectedPerson {
120    // The id of detected person.
121    int64 person_id = 1;
122
123    // The info of detected person identified box.
124    PersonIdentifiedBox detected_person_identified_box = 2;
125
126    // The info of detected person associated ppe identified boxes.
127    repeated PPEIdentifiedBox detected_ppe_identified_boxes = 3;
128
129    // Coverage score for each body part.
130    // Coverage score for face.
131    optional float face_coverage_score = 4;
132
133    // Coverage score for eyes.
134    optional float eyes_coverage_score = 5;
135
136    // Coverage score for head.
137    optional float head_coverage_score = 6;
138
139    // Coverage score for hands.
140    optional float hands_coverage_score = 7;
141
142    // Coverage score for body.
143    optional float body_coverage_score = 8;
144
145    // Coverage score for feet.
146    optional float feet_coverage_score = 9;
147  }
148
149  // Current timestamp.
150  google.protobuf.Timestamp current_time = 1;
151
152  // A list of DetectedPersons.
153  repeated DetectedPerson detected_persons = 2;
154}
155
156// Prediction output format for Generic Object Detection.
157message ObjectDetectionPredictionResult {
158  // The entity info for annotations from object detection prediction result.
159  message Entity {
160    // Label id.
161    int64 label_id = 1;
162
163    // Human readable string of the label.
164    string label_string = 2;
165  }
166
167  // Identified box contains location and the entity of the object.
168  message IdentifiedBox {
169    // Bounding Box in the normalized coordinates.
170    message NormalizedBoundingBox {
171      // Min in x coordinate.
172      float xmin = 1;
173
174      // Min in y coordinate.
175      float ymin = 2;
176
177      // Width of the bounding box.
178      float width = 3;
179
180      // Height of the bounding box.
181      float height = 4;
182    }
183
184    // An unique id for this box.
185    int64 box_id = 1;
186
187    // Bounding Box in the normalized coordinates.
188    NormalizedBoundingBox normalized_bounding_box = 2;
189
190    // Confidence score associated with this box.
191    float confidence_score = 3;
192
193    // Entity of this box.
194    Entity entity = 4;
195  }
196
197  // Current timestamp.
198  google.protobuf.Timestamp current_time = 1;
199
200  // A list of identified boxes.
201  repeated IdentifiedBox identified_boxes = 2;
202}
203
204// Prediction output format for Image Object Detection.
205message ImageObjectDetectionPredictionResult {
206  // The resource IDs of the AnnotationSpecs that had been identified, ordered
207  // by the confidence score descendingly. It is the id segment instead of full
208  // resource name.
209  repeated int64 ids = 1;
210
211  // The display names of the AnnotationSpecs that had been identified, order
212  // matches the IDs.
213  repeated string display_names = 2;
214
215  // The Model's confidences in correctness of the predicted IDs, higher value
216  // means higher confidence. Order matches the Ids.
217  repeated float confidences = 3;
218
219  // Bounding boxes, i.e. the rectangles over the image, that pinpoint
220  // the found AnnotationSpecs. Given in order that matches the IDs. Each
221  // bounding box is an array of 4 numbers `xMin`, `xMax`, `yMin`, and
222  // `yMax`, which represent the extremal coordinates of the box. They are
223  // relative to the image size, and the point 0,0 is in the top left
224  // of the image.
225  repeated google.protobuf.ListValue bboxes = 4;
226}
227
228// Prediction output format for Image and Text Classification.
229message ClassificationPredictionResult {
230  // The resource IDs of the AnnotationSpecs that had been identified.
231  repeated int64 ids = 1;
232
233  // The display names of the AnnotationSpecs that had been identified, order
234  // matches the IDs.
235  repeated string display_names = 2;
236
237  // The Model's confidences in correctness of the predicted IDs, higher value
238  // means higher confidence. Order matches the Ids.
239  repeated float confidences = 3;
240}
241
242// Prediction output format for Image Segmentation.
243message ImageSegmentationPredictionResult {
244  // A PNG image where each pixel in the mask represents the category in which
245  // the pixel in the original image was predicted to belong to. The size of
246  // this image will be the same as the original image. The mapping between the
247  // AnntoationSpec and the color can be found in model's metadata. The model
248  // will choose the most likely category and if none of the categories reach
249  // the confidence threshold, the pixel will be marked as background.
250  string category_mask = 1;
251
252  // A one channel image which is encoded as an 8bit lossless PNG. The size of
253  // the image will be the same as the original image. For a specific pixel,
254  // darker color means less confidence in correctness of the cateogry in the
255  // categoryMask for the corresponding pixel. Black means no confidence and
256  // white means complete confidence.
257  string confidence_mask = 2;
258}
259
260// Prediction output format for Video Action Recognition.
261message VideoActionRecognitionPredictionResult {
262  // Each IdentifiedAction is one particular identification of an action
263  // specified with the AnnotationSpec id, display_name and the associated
264  // confidence score.
265  message IdentifiedAction {
266    // The resource ID of the AnnotationSpec that had been identified.
267    string id = 1;
268
269    // The display name of the AnnotationSpec that had been identified.
270    string display_name = 2;
271
272    // The Model's confidence in correction of this identification, higher
273    // value means higher confidence.
274    float confidence = 3;
275  }
276
277  // The beginning, inclusive, of the video's time segment in which the
278  // actions have been identified.
279  google.protobuf.Timestamp segment_start_time = 1;
280
281  // The end, inclusive, of the video's time segment in which the actions have
282  // been identified. Particularly, if the end is the same as the start, it
283  // means the identification happens on a specific video frame.
284  google.protobuf.Timestamp segment_end_time = 2;
285
286  // All of the actions identified in the time range.
287  repeated IdentifiedAction actions = 3;
288}
289
290// Prediction output format for Video Object Tracking.
291message VideoObjectTrackingPredictionResult {
292  // Boundingbox for detected object. I.e. the rectangle over the video frame
293  // pinpointing the found AnnotationSpec. The coordinates are relative to the
294  // frame size, and the point 0,0 is in the top left of the frame.
295  message BoundingBox {
296    // The leftmost coordinate of the bounding box.
297    float x_min = 1;
298
299    // The rightmost coordinate of the bounding box.
300    float x_max = 2;
301
302    // The topmost coordinate of the bounding box.
303    float y_min = 3;
304
305    // The bottommost coordinate of the bounding box.
306    float y_max = 4;
307  }
308
309  // Each DetectedObject is one particular identification of an object
310  // specified with the AnnotationSpec id and display_name, the bounding box,
311  // the associated confidence score and the corresponding track_id.
312  message DetectedObject {
313    // The resource ID of the AnnotationSpec that had been identified.
314    string id = 1;
315
316    // The display name of the AnnotationSpec that had been identified.
317    string display_name = 2;
318
319    // Boundingbox.
320    BoundingBox bounding_box = 3;
321
322    // The Model's confidence in correction of this identification, higher
323    // value means higher confidence.
324    float confidence = 4;
325
326    // The same object may be identified on muitiple frames which are typical
327    // adjacent. The set of frames where a particular object has been detected
328    // form a track. This track_id can be used to trace down all frames for an
329    // detected object.
330    int64 track_id = 5;
331  }
332
333  // The beginning, inclusive, of the video's time segment in which the
334  // current identifications happens.
335  google.protobuf.Timestamp segment_start_time = 1;
336
337  // The end, inclusive, of the video's time segment in which the current
338  // identifications happen. Particularly, if the end is the same as the start,
339  // it means the identifications happen on a specific video frame.
340  google.protobuf.Timestamp segment_end_time = 2;
341
342  // All of the objects detected in the specified time range.
343  repeated DetectedObject objects = 3;
344}
345
346// Prediction output format for Video Classification.
347message VideoClassificationPredictionResult {
348  // Each IdentifiedClassification is one particular identification of an
349  // classification specified with the AnnotationSpec id and display_name,
350  // and the associated confidence score.
351  message IdentifiedClassification {
352    // The resource ID of the AnnotationSpec that had been identified.
353    string id = 1;
354
355    // The display name of the AnnotationSpec that had been identified.
356    string display_name = 2;
357
358    // The Model's confidence in correction of this identification, higher
359    // value means higher confidence.
360    float confidence = 3;
361  }
362
363  // The beginning, inclusive, of the video's time segment in which the
364  // classifications have been identified.
365  google.protobuf.Timestamp segment_start_time = 1;
366
367  // The end, inclusive, of the video's time segment in which the
368  // classifications have been identified. Particularly, if the end is the same
369  // as the start, it means the identification happens on a specific video
370  // frame.
371  google.protobuf.Timestamp segment_end_time = 2;
372
373  // All of the classifications identified in the time range.
374  repeated IdentifiedClassification classifications = 3;
375}
376
377// The prediction result proto for occupancy counting.
378message OccupancyCountingPredictionResult {
379  // The entity info for annotations from occupancy counting operator.
380  message Entity {
381    // Label id.
382    int64 label_id = 1;
383
384    // Human readable string of the label.
385    string label_string = 2;
386  }
387
388  // Identified box contains location and the entity of the object.
389  message IdentifiedBox {
390    // Bounding Box in the normalized coordinates.
391    message NormalizedBoundingBox {
392      // Min in x coordinate.
393      float xmin = 1;
394
395      // Min in y coordinate.
396      float ymin = 2;
397
398      // Width of the bounding box.
399      float width = 3;
400
401      // Height of the bounding box.
402      float height = 4;
403    }
404
405    // An unique id for this box.
406    int64 box_id = 1;
407
408    // Bounding Box in the normalized coordinates.
409    NormalizedBoundingBox normalized_bounding_box = 2;
410
411    // Confidence score associated with this box.
412    float score = 3;
413
414    // Entity of this box.
415    Entity entity = 4;
416
417    // An unique id to identify a track. It should be consistent across frames.
418    // It only exists if tracking is enabled.
419    int64 track_id = 5;
420  }
421
422  // The statistics info for annotations from occupancy counting operator.
423  message Stats {
424    // The object info and instant count for annotations from occupancy counting
425    // operator.
426    message ObjectCount {
427      // Entity of this object.
428      Entity entity = 1;
429
430      // Count of the object.
431      int32 count = 2;
432    }
433
434    // The object info and accumulated count for annotations from occupancy
435    // counting operator.
436    message AccumulatedObjectCount {
437      // The start time of the accumulated count.
438      google.protobuf.Timestamp start_time = 1;
439
440      // The object count for the accumulated count.
441      ObjectCount object_count = 2;
442    }
443
444    // Message for Crossing line count.
445    message CrossingLineCount {
446      // Line annotation from the user.
447      StreamAnnotation annotation = 1;
448
449      // The direction that follows the right hand rule.
450      repeated ObjectCount positive_direction_counts = 2;
451
452      // The direction that is opposite to the right hand rule.
453      repeated ObjectCount negative_direction_counts = 3;
454
455      // The accumulated positive count.
456      repeated AccumulatedObjectCount accumulated_positive_direction_counts = 4;
457
458      // The accumulated negative count.
459      repeated AccumulatedObjectCount accumulated_negative_direction_counts = 5;
460    }
461
462    // Message for the active zone count.
463    message ActiveZoneCount {
464      // Active zone annotation from the user.
465      StreamAnnotation annotation = 1;
466
467      // Counts in the zone.
468      repeated ObjectCount counts = 2;
469    }
470
471    // Counts of the full frame.
472    repeated ObjectCount full_frame_count = 1;
473
474    // Crossing line counts.
475    repeated CrossingLineCount crossing_line_counts = 2;
476
477    // Active zone counts.
478    repeated ActiveZoneCount active_zone_counts = 3;
479  }
480
481  // The track info for annotations from occupancy counting operator.
482  message TrackInfo {
483    // An unique id to identify a track. It should be consistent across frames.
484    string track_id = 1;
485
486    // Start timestamp of this track.
487    google.protobuf.Timestamp start_time = 2;
488  }
489
490  // The dwell time info for annotations from occupancy counting operator.
491  message DwellTimeInfo {
492    // An unique id to identify a track. It should be consistent across frames.
493    string track_id = 1;
494
495    // The unique id for the zone in which the object is dwelling/waiting.
496    string zone_id = 2;
497
498    // The beginning time when a dwelling object has been identified in a zone.
499    google.protobuf.Timestamp dwell_start_time = 3;
500
501    // The end time when a dwelling object has exited in a zone.
502    google.protobuf.Timestamp dwell_end_time = 4;
503  }
504
505  // Current timestamp.
506  google.protobuf.Timestamp current_time = 1;
507
508  // A list of identified boxes.
509  repeated IdentifiedBox identified_boxes = 2;
510
511  // Detection statistics.
512  Stats stats = 3;
513
514  // Track related information. All the tracks that are live at this timestamp.
515  // It only exists if tracking is enabled.
516  repeated TrackInfo track_info = 4;
517
518  // Dwell time related information. All the tracks that are live in a given
519  // zone with a start and end dwell time timestamp
520  repeated DwellTimeInfo dwell_time_info = 5;
521
522  // The presentation timestamp of the frame.
523  optional int64 pts = 6;
524}
525
526// message about annotations about Vision AI stream resource.
527message StreamAnnotation {
528  oneof annotation_payload {
529    // Annotation for type ACTIVE_ZONE
530    NormalizedPolygon active_zone = 5;
531
532    // Annotation for type CROSSING_LINE
533    NormalizedPolyline crossing_line = 6;
534  }
535
536  // ID of the annotation. It must be unique when used in the certain context.
537  // For example, all the annotations to one input streams of a Vision AI
538  // application.
539  string id = 1;
540
541  // User-friendly name for the annotation.
542  string display_name = 2;
543
544  // The Vision AI stream resource name.
545  string source_stream = 3;
546
547  // The actual type of Annotation.
548  StreamAnnotationType type = 4;
549}
550
551// A wrapper of repeated StreamAnnotation.
552message StreamAnnotations {
553  // Multiple annotations.
554  repeated StreamAnnotation stream_annotations = 1;
555}
556
557// Normalized Polygon.
558message NormalizedPolygon {
559  // The bounding polygon normalized vertices. Top left corner of the image
560  // will be [0, 0].
561  repeated NormalizedVertex normalized_vertices = 1;
562}
563
564// Normalized Pplyline, which represents a curve consisting of connected
565// straight-line segments.
566message NormalizedPolyline {
567  // A sequence of vertices connected by straight lines.
568  repeated NormalizedVertex normalized_vertices = 1;
569}
570
571// A vertex represents a 2D point in the image.
572// NOTE: the normalized vertex coordinates are relative to the original image
573// and range from 0 to 1.
574message NormalizedVertex {
575  // X coordinate.
576  float x = 1;
577
578  // Y coordinate.
579  float y = 2;
580}
581
582// Message of essential metadata of App Platform.
583// This message is usually attached to a certain processor output annotation for
584// customer to identify the source of the data.
585message AppPlatformMetadata {
586  // The application resource name.
587  string application = 1;
588
589  // The instance resource id. Instance is the nested resource of application
590  // under collection 'instances'.
591  string instance_id = 2;
592
593  // The node name of the application graph.
594  string node = 3;
595
596  // The referred processor resource name of the application node.
597  string processor = 4;
598}
599
600// For any cloud function based customer processing logic, customer's cloud
601// function is expected to receive AppPlatformCloudFunctionRequest as request
602// and send back AppPlatformCloudFunctionResponse as response.
603// Message of request from AppPlatform to Cloud Function.
604message AppPlatformCloudFunctionRequest {
605  // A general annotation message that uses struct format to represent different
606  // concrete annotation protobufs.
607  message StructedInputAnnotation {
608    // The ingestion time of the current annotation.
609    int64 ingestion_time_micros = 1;
610
611    // The struct format of the actual annotation.
612    google.protobuf.Struct annotation = 2;
613  }
614
615  // The metadata of the AppPlatform for customer to identify the source of the
616  // payload.
617  AppPlatformMetadata app_platform_metadata = 1;
618
619  // The actual annotations to be processed by the customized Cloud Function.
620  repeated StructedInputAnnotation annotations = 2;
621}
622
623// Message of the response from customer's Cloud Function to AppPlatform.
624message AppPlatformCloudFunctionResponse {
625  // A general annotation message that uses struct format to represent different
626  // concrete annotation protobufs.
627  message StructedOutputAnnotation {
628    // The struct format of the actual annotation.
629    google.protobuf.Struct annotation = 1;
630  }
631
632  // The modified annotations that is returned back to AppPlatform.
633  // If the annotations fields are empty, then those annotations will be dropped
634  // by AppPlatform.
635  repeated StructedOutputAnnotation annotations = 2;
636
637  // If set to true, AppPlatform will use original annotations instead of
638  // dropping them, even if it is empty in the annotations filed.
639  bool annotation_passthrough = 3;
640
641  // The event notifications that is returned back to AppPlatform. Typically it
642  // will then be configured to be consumed/forwared to a operator that handles
643  // events, such as Pub/Sub operator.
644  repeated AppPlatformEventBody events = 4;
645}
646
647// Message of content of appPlatform event
648message AppPlatformEventBody {
649  // Human readable string of the event like "There are more than 6 people in
650  // the scene". or "Shelf is empty!".
651  string event_message = 1;
652
653  // For the case of Pub/Sub, it will be stored in the message attributes.
654  // ​​pubsub.proto
655  google.protobuf.Struct payload = 2;
656
657  // User defined Event Id, used to classify event, within a delivery interval,
658  // events from the same application instance with the same id will be
659  // de-duplicated & only first one will be sent out. Empty event_id will be
660  // treated as "".
661  string event_id = 3;
662}
663