1// Copyright 2019 Google LLC. 2// 3// Licensed under the Apache License, Version 2.0 (the "License"); 4// you may not use this file except in compliance with the License. 5// You may obtain a copy of the License at 6// 7// http://www.apache.org/licenses/LICENSE-2.0 8// 9// Unless required by applicable law or agreed to in writing, software 10// distributed under the License is distributed on an "AS IS" BASIS, 11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12// See the License for the specific language governing permissions and 13// limitations under the License. 14// 15 16syntax = "proto3"; 17 18package google.cloud.videointelligence.v1beta2; 19 20import "google/api/annotations.proto"; 21import "google/api/client.proto"; 22import "google/api/field_behavior.proto"; 23import "google/longrunning/operations.proto"; 24import "google/protobuf/duration.proto"; 25import "google/protobuf/timestamp.proto"; 26import "google/rpc/status.proto"; 27 28option csharp_namespace = "Google.Cloud.VideoIntelligence.V1Beta2"; 29option go_package = "cloud.google.com/go/videointelligence/apiv1beta2/videointelligencepb;videointelligencepb"; 30option java_multiple_files = true; 31option java_outer_classname = "VideoIntelligenceServiceProto"; 32option java_package = "com.google.cloud.videointelligence.v1beta2"; 33option php_namespace = "Google\\Cloud\\VideoIntelligence\\V1beta2"; 34option ruby_package = "Google::Cloud::VideoIntelligence::V1beta2"; 35 36// Service that implements Google Cloud Video Intelligence API. 37service VideoIntelligenceService { 38 option (google.api.default_host) = "videointelligence.googleapis.com"; 39 option (google.api.oauth_scopes) = 40 "https://www.googleapis.com/auth/cloud-platform"; 41 42 // Performs asynchronous video annotation. Progress and results can be 43 // retrieved through the `google.longrunning.Operations` interface. 44 // `Operation.metadata` contains `AnnotateVideoProgress` (progress). 45 // `Operation.response` contains `AnnotateVideoResponse` (results). 46 rpc AnnotateVideo(AnnotateVideoRequest) 47 returns (google.longrunning.Operation) { 48 option (google.api.http) = { 49 post: "/v1beta2/videos:annotate" 50 body: "*" 51 }; 52 option (google.api.method_signature) = "input_uri,features"; 53 option (google.longrunning.operation_info) = { 54 response_type: "AnnotateVideoResponse" 55 metadata_type: "AnnotateVideoProgress" 56 }; 57 } 58} 59 60// Video annotation request. 61message AnnotateVideoRequest { 62 // Input video location. Currently, only 63 // [Google Cloud Storage](https://cloud.google.com/storage/) URIs are 64 // supported, which must be specified in the following format: 65 // `gs://bucket-id/object-id` (other URI formats return 66 // [google.rpc.Code.INVALID_ARGUMENT][google.rpc.Code.INVALID_ARGUMENT]). For 67 // more information, see [Request 68 // URIs](https://cloud.google.com/storage/docs/request-endpoints). A video URI 69 // may include wildcards in `object-id`, and thus identify multiple videos. 70 // Supported wildcards: '*' to match 0 or more characters; 71 // '?' to match 1 character. If unset, the input video should be embedded 72 // in the request as `input_content`. If set, `input_content` should be unset. 73 string input_uri = 1; 74 75 // The video data bytes. 76 // If unset, the input video(s) should be specified via `input_uri`. 77 // If set, `input_uri` should be unset. 78 bytes input_content = 6; 79 80 // Required. Requested video annotation features. 81 repeated Feature features = 2 [(google.api.field_behavior) = REQUIRED]; 82 83 // Additional video context and/or feature-specific parameters. 84 VideoContext video_context = 3; 85 86 // Optional. Location where the output (in JSON format) should be stored. 87 // Currently, only [Google Cloud Storage](https://cloud.google.com/storage/) 88 // URIs are supported, which must be specified in the following format: 89 // `gs://bucket-id/object-id` (other URI formats return 90 // [google.rpc.Code.INVALID_ARGUMENT][google.rpc.Code.INVALID_ARGUMENT]). For 91 // more information, see [Request 92 // URIs](https://cloud.google.com/storage/docs/request-endpoints). 93 string output_uri = 4 [(google.api.field_behavior) = OPTIONAL]; 94 95 // Optional. Cloud region where annotation should take place. Supported cloud 96 // regions: `us-east1`, `us-west1`, `europe-west1`, `asia-east1`. If no region 97 // is specified, a region will be determined based on video file location. 98 string location_id = 5 [(google.api.field_behavior) = OPTIONAL]; 99} 100 101// Video context and/or feature-specific parameters. 102message VideoContext { 103 // Video segments to annotate. The segments may overlap and are not required 104 // to be contiguous or span the whole video. If unspecified, each video is 105 // treated as a single segment. 106 repeated VideoSegment segments = 1; 107 108 // Config for LABEL_DETECTION. 109 LabelDetectionConfig label_detection_config = 2; 110 111 // Config for SHOT_CHANGE_DETECTION. 112 ShotChangeDetectionConfig shot_change_detection_config = 3; 113 114 // Config for EXPLICIT_CONTENT_DETECTION. 115 ExplicitContentDetectionConfig explicit_content_detection_config = 4; 116 117 // Config for FACE_DETECTION. 118 FaceDetectionConfig face_detection_config = 5; 119} 120 121// Config for LABEL_DETECTION. 122message LabelDetectionConfig { 123 // What labels should be detected with LABEL_DETECTION, in addition to 124 // video-level labels or segment-level labels. 125 // If unspecified, defaults to `SHOT_MODE`. 126 LabelDetectionMode label_detection_mode = 1; 127 128 // Whether the video has been shot from a stationary (i.e. non-moving) camera. 129 // When set to true, might improve detection accuracy for moving objects. 130 // Should be used with `SHOT_AND_FRAME_MODE` enabled. 131 bool stationary_camera = 2; 132 133 // Model to use for label detection. 134 // Supported values: "builtin/stable" (the default if unset) and 135 // "builtin/latest". 136 string model = 3; 137} 138 139// Config for SHOT_CHANGE_DETECTION. 140message ShotChangeDetectionConfig { 141 // Model to use for shot change detection. 142 // Supported values: "builtin/stable" (the default if unset) and 143 // "builtin/latest". 144 string model = 1; 145} 146 147// Config for EXPLICIT_CONTENT_DETECTION. 148message ExplicitContentDetectionConfig { 149 // Model to use for explicit content detection. 150 // Supported values: "builtin/stable" (the default if unset) and 151 // "builtin/latest". 152 string model = 1; 153} 154 155// Config for FACE_DETECTION. 156message FaceDetectionConfig { 157 // Model to use for face detection. 158 // Supported values: "builtin/stable" (the default if unset) and 159 // "builtin/latest". 160 string model = 1; 161 162 // Whether bounding boxes be included in the face annotation output. 163 bool include_bounding_boxes = 2; 164} 165 166// Video segment. 167message VideoSegment { 168 // Time-offset, relative to the beginning of the video, 169 // corresponding to the start of the segment (inclusive). 170 google.protobuf.Duration start_time_offset = 1; 171 172 // Time-offset, relative to the beginning of the video, 173 // corresponding to the end of the segment (inclusive). 174 google.protobuf.Duration end_time_offset = 2; 175} 176 177// Video segment level annotation results for label detection. 178message LabelSegment { 179 // Video segment where a label was detected. 180 VideoSegment segment = 1; 181 182 // Confidence that the label is accurate. Range: [0, 1]. 183 float confidence = 2; 184} 185 186// Video frame level annotation results for label detection. 187message LabelFrame { 188 // Time-offset, relative to the beginning of the video, corresponding to the 189 // video frame for this location. 190 google.protobuf.Duration time_offset = 1; 191 192 // Confidence that the label is accurate. Range: [0, 1]. 193 float confidence = 2; 194} 195 196// Detected entity from video analysis. 197message Entity { 198 // Opaque entity ID. Some IDs may be available in 199 // [Google Knowledge Graph Search 200 // API](https://developers.google.com/knowledge-graph/). 201 string entity_id = 1; 202 203 // Textual description, e.g. `Fixed-gear bicycle`. 204 string description = 2; 205 206 // Language code for `description` in BCP-47 format. 207 string language_code = 3; 208} 209 210// Label annotation. 211message LabelAnnotation { 212 // Detected entity. 213 Entity entity = 1; 214 215 // Common categories for the detected entity. 216 // E.g. when the label is `Terrier` the category is likely `dog`. And in some 217 // cases there might be more than one categories e.g. `Terrier` could also be 218 // a `pet`. 219 repeated Entity category_entities = 2; 220 221 // All video segments where a label was detected. 222 repeated LabelSegment segments = 3; 223 224 // All video frames where a label was detected. 225 repeated LabelFrame frames = 4; 226} 227 228// Video frame level annotation results for explicit content. 229message ExplicitContentFrame { 230 // Time-offset, relative to the beginning of the video, corresponding to the 231 // video frame for this location. 232 google.protobuf.Duration time_offset = 1; 233 234 // Likelihood of the pornography content.. 235 Likelihood pornography_likelihood = 2; 236} 237 238// Explicit content annotation (based on per-frame visual signals only). 239// If no explicit content has been detected in a frame, no annotations are 240// present for that frame. 241message ExplicitContentAnnotation { 242 // All video frames where explicit content was detected. 243 repeated ExplicitContentFrame frames = 1; 244} 245 246// Normalized bounding box. 247// The normalized vertex coordinates are relative to the original image. 248// Range: [0, 1]. 249message NormalizedBoundingBox { 250 // Left X coordinate. 251 float left = 1; 252 253 // Top Y coordinate. 254 float top = 2; 255 256 // Right X coordinate. 257 float right = 3; 258 259 // Bottom Y coordinate. 260 float bottom = 4; 261} 262 263// Video segment level annotation results for face detection. 264message FaceSegment { 265 // Video segment where a face was detected. 266 VideoSegment segment = 1; 267} 268 269// Video frame level annotation results for face detection. 270message FaceFrame { 271 // Normalized Bounding boxes in a frame. 272 // There can be more than one boxes if the same face is detected in multiple 273 // locations within the current frame. 274 repeated NormalizedBoundingBox normalized_bounding_boxes = 1; 275 276 // Time-offset, relative to the beginning of the video, 277 // corresponding to the video frame for this location. 278 google.protobuf.Duration time_offset = 2; 279} 280 281// Face annotation. 282message FaceAnnotation { 283 // Thumbnail of a representative face view (in JPEG format). 284 bytes thumbnail = 1; 285 286 // All video segments where a face was detected. 287 repeated FaceSegment segments = 2; 288 289 // All video frames where a face was detected. 290 repeated FaceFrame frames = 3; 291} 292 293// Annotation results for a single video. 294message VideoAnnotationResults { 295 // Video file location in 296 // [Google Cloud Storage](https://cloud.google.com/storage/). 297 string input_uri = 1; 298 299 // Label annotations on video level or user specified segment level. 300 // There is exactly one element for each unique label. 301 repeated LabelAnnotation segment_label_annotations = 2; 302 303 // Label annotations on shot level. 304 // There is exactly one element for each unique label. 305 repeated LabelAnnotation shot_label_annotations = 3; 306 307 // Label annotations on frame level. 308 // There is exactly one element for each unique label. 309 repeated LabelAnnotation frame_label_annotations = 4; 310 311 // Face annotations. There is exactly one element for each unique face. 312 repeated FaceAnnotation face_annotations = 5; 313 314 // Shot annotations. Each shot is represented as a video segment. 315 repeated VideoSegment shot_annotations = 6; 316 317 // Explicit content annotation. 318 ExplicitContentAnnotation explicit_annotation = 7; 319 320 // If set, indicates an error. Note that for a single `AnnotateVideoRequest` 321 // some videos may succeed and some may fail. 322 google.rpc.Status error = 9; 323} 324 325// Video annotation response. Included in the `response` 326// field of the `Operation` returned by the `GetOperation` 327// call of the `google::longrunning::Operations` service. 328message AnnotateVideoResponse { 329 // Annotation results for all videos specified in `AnnotateVideoRequest`. 330 repeated VideoAnnotationResults annotation_results = 1; 331} 332 333// Annotation progress for a single video. 334message VideoAnnotationProgress { 335 // Video file location in 336 // [Google Cloud Storage](https://cloud.google.com/storage/). 337 string input_uri = 1; 338 339 // Approximate percentage processed thus far. 340 // Guaranteed to be 100 when fully processed. 341 int32 progress_percent = 2; 342 343 // Time when the request was received. 344 google.protobuf.Timestamp start_time = 3; 345 346 // Time of the most recent update. 347 google.protobuf.Timestamp update_time = 4; 348} 349 350// Video annotation progress. Included in the `metadata` 351// field of the `Operation` returned by the `GetOperation` 352// call of the `google::longrunning::Operations` service. 353message AnnotateVideoProgress { 354 // Progress metadata for all videos specified in `AnnotateVideoRequest`. 355 repeated VideoAnnotationProgress annotation_progress = 1; 356} 357 358// Video annotation feature. 359enum Feature { 360 // Unspecified. 361 FEATURE_UNSPECIFIED = 0; 362 363 // Label detection. Detect objects, such as dog or flower. 364 LABEL_DETECTION = 1; 365 366 // Shot change detection. 367 SHOT_CHANGE_DETECTION = 2; 368 369 // Explicit content detection. 370 EXPLICIT_CONTENT_DETECTION = 3; 371 372 // Human face detection and tracking. 373 FACE_DETECTION = 4; 374} 375 376// Label detection mode. 377enum LabelDetectionMode { 378 // Unspecified. 379 LABEL_DETECTION_MODE_UNSPECIFIED = 0; 380 381 // Detect shot-level labels. 382 SHOT_MODE = 1; 383 384 // Detect frame-level labels. 385 FRAME_MODE = 2; 386 387 // Detect both shot-level and frame-level labels. 388 SHOT_AND_FRAME_MODE = 3; 389} 390 391// Bucketized representation of likelihood. 392enum Likelihood { 393 // Unspecified likelihood. 394 LIKELIHOOD_UNSPECIFIED = 0; 395 396 // Very unlikely. 397 VERY_UNLIKELY = 1; 398 399 // Unlikely. 400 UNLIKELY = 2; 401 402 // Possible. 403 POSSIBLE = 3; 404 405 // Likely. 406 LIKELY = 4; 407 408 // Very likely. 409 VERY_LIKELY = 5; 410} 411