1 /*
2 * Copyright (c) 2018 The WebRTC project authors. All Rights Reserved.
3 *
4 * Use of this source code is governed by a BSD-style license
5 * that can be found in the LICENSE file in the root of the source
6 * tree. An additional intellectual property rights grant can be found
7 * in the file PATENTS. All contributing project authors may
8 * be found in the AUTHORS file in the root of the source tree.
9 */
10
11 #include "call/rtp_payload_params.h"
12
13 #include <stddef.h>
14
15 #include <algorithm>
16
17 #include "absl/container/inlined_vector.h"
18 #include "absl/strings/match.h"
19 #include "absl/types/variant.h"
20 #include "api/video/video_timing.h"
21 #include "modules/video_coding/codecs/h264/include/h264_globals.h"
22 #include "modules/video_coding/codecs/interface/common_constants.h"
23 #include "modules/video_coding/codecs/vp8/include/vp8_globals.h"
24 #include "modules/video_coding/codecs/vp9/include/vp9_globals.h"
25 #include "modules/video_coding/frame_dependencies_calculator.h"
26 #include "rtc_base/arraysize.h"
27 #include "rtc_base/checks.h"
28 #include "rtc_base/logging.h"
29 #include "rtc_base/random.h"
30 #include "rtc_base/time_utils.h"
31
32 namespace webrtc {
33 namespace {
34
35 constexpr int kMaxSimulatedSpatialLayers = 3;
36
PopulateRtpWithCodecSpecifics(const CodecSpecificInfo & info,absl::optional<int> spatial_index,RTPVideoHeader * rtp)37 void PopulateRtpWithCodecSpecifics(const CodecSpecificInfo& info,
38 absl::optional<int> spatial_index,
39 RTPVideoHeader* rtp) {
40 rtp->codec = info.codecType;
41 rtp->is_last_frame_in_picture = info.end_of_picture;
42 switch (info.codecType) {
43 case kVideoCodecVP8: {
44 auto& vp8_header = rtp->video_type_header.emplace<RTPVideoHeaderVP8>();
45 vp8_header.InitRTPVideoHeaderVP8();
46 vp8_header.nonReference = info.codecSpecific.VP8.nonReference;
47 vp8_header.temporalIdx = info.codecSpecific.VP8.temporalIdx;
48 vp8_header.layerSync = info.codecSpecific.VP8.layerSync;
49 vp8_header.keyIdx = info.codecSpecific.VP8.keyIdx;
50 rtp->simulcastIdx = spatial_index.value_or(0);
51 return;
52 }
53 case kVideoCodecVP9: {
54 auto& vp9_header = rtp->video_type_header.emplace<RTPVideoHeaderVP9>();
55 vp9_header.InitRTPVideoHeaderVP9();
56 vp9_header.inter_pic_predicted =
57 info.codecSpecific.VP9.inter_pic_predicted;
58 vp9_header.flexible_mode = info.codecSpecific.VP9.flexible_mode;
59 vp9_header.ss_data_available = info.codecSpecific.VP9.ss_data_available;
60 vp9_header.non_ref_for_inter_layer_pred =
61 info.codecSpecific.VP9.non_ref_for_inter_layer_pred;
62 vp9_header.temporal_idx = info.codecSpecific.VP9.temporal_idx;
63 vp9_header.temporal_up_switch = info.codecSpecific.VP9.temporal_up_switch;
64 vp9_header.inter_layer_predicted =
65 info.codecSpecific.VP9.inter_layer_predicted;
66 vp9_header.gof_idx = info.codecSpecific.VP9.gof_idx;
67 vp9_header.num_spatial_layers = info.codecSpecific.VP9.num_spatial_layers;
68 vp9_header.first_active_layer = info.codecSpecific.VP9.first_active_layer;
69 if (vp9_header.num_spatial_layers > 1) {
70 vp9_header.spatial_idx = spatial_index.value_or(kNoSpatialIdx);
71 } else {
72 vp9_header.spatial_idx = kNoSpatialIdx;
73 }
74 if (info.codecSpecific.VP9.ss_data_available) {
75 vp9_header.spatial_layer_resolution_present =
76 info.codecSpecific.VP9.spatial_layer_resolution_present;
77 if (info.codecSpecific.VP9.spatial_layer_resolution_present) {
78 for (size_t i = 0; i < info.codecSpecific.VP9.num_spatial_layers;
79 ++i) {
80 vp9_header.width[i] = info.codecSpecific.VP9.width[i];
81 vp9_header.height[i] = info.codecSpecific.VP9.height[i];
82 }
83 }
84 vp9_header.gof.CopyGofInfoVP9(info.codecSpecific.VP9.gof);
85 }
86
87 vp9_header.num_ref_pics = info.codecSpecific.VP9.num_ref_pics;
88 for (int i = 0; i < info.codecSpecific.VP9.num_ref_pics; ++i) {
89 vp9_header.pid_diff[i] = info.codecSpecific.VP9.p_diff[i];
90 }
91 vp9_header.end_of_picture = info.end_of_picture;
92 return;
93 }
94 case kVideoCodecH264: {
95 auto& h264_header = rtp->video_type_header.emplace<RTPVideoHeaderH264>();
96 h264_header.packetization_mode =
97 info.codecSpecific.H264.packetization_mode;
98 rtp->simulcastIdx = spatial_index.value_or(0);
99 return;
100 }
101 case kVideoCodecMultiplex:
102 case kVideoCodecGeneric:
103 rtp->codec = kVideoCodecGeneric;
104 rtp->simulcastIdx = spatial_index.value_or(0);
105 return;
106 default:
107 return;
108 }
109 }
110
SetVideoTiming(const EncodedImage & image,VideoSendTiming * timing)111 void SetVideoTiming(const EncodedImage& image, VideoSendTiming* timing) {
112 if (image.timing_.flags == VideoSendTiming::TimingFrameFlags::kInvalid ||
113 image.timing_.flags == VideoSendTiming::TimingFrameFlags::kNotTriggered) {
114 timing->flags = VideoSendTiming::TimingFrameFlags::kInvalid;
115 return;
116 }
117
118 timing->encode_start_delta_ms = VideoSendTiming::GetDeltaCappedMs(
119 image.capture_time_ms_, image.timing_.encode_start_ms);
120 timing->encode_finish_delta_ms = VideoSendTiming::GetDeltaCappedMs(
121 image.capture_time_ms_, image.timing_.encode_finish_ms);
122 timing->packetization_finish_delta_ms = 0;
123 timing->pacer_exit_delta_ms = 0;
124 timing->network_timestamp_delta_ms = 0;
125 timing->network2_timestamp_delta_ms = 0;
126 timing->flags = image.timing_.flags;
127 }
128
129 // Returns structure that aligns with simulated generic info. The templates
130 // allow to produce valid dependency descriptor for any stream where
131 // `num_spatial_layers` * `num_temporal_layers` <= 32 (limited by
132 // https://aomediacodec.github.io/av1-rtp-spec/#a82-syntax, see
133 // template_fdiffs()). The set of the templates is not tuned for any paricular
134 // structure thus dependency descriptor would use more bytes on the wire than
135 // with tuned templates.
MinimalisticStructure(int num_spatial_layers,int num_temporal_layers)136 FrameDependencyStructure MinimalisticStructure(int num_spatial_layers,
137 int num_temporal_layers) {
138 RTC_DCHECK_LE(num_spatial_layers, DependencyDescriptor::kMaxSpatialIds);
139 RTC_DCHECK_LE(num_temporal_layers, DependencyDescriptor::kMaxTemporalIds);
140 RTC_DCHECK_LE(num_spatial_layers * num_temporal_layers, 32);
141 FrameDependencyStructure structure;
142 structure.num_decode_targets = num_spatial_layers * num_temporal_layers;
143 structure.num_chains = num_spatial_layers;
144 structure.templates.reserve(num_spatial_layers * num_temporal_layers);
145 for (int sid = 0; sid < num_spatial_layers; ++sid) {
146 for (int tid = 0; tid < num_temporal_layers; ++tid) {
147 FrameDependencyTemplate a_template;
148 a_template.spatial_id = sid;
149 a_template.temporal_id = tid;
150 for (int s = 0; s < num_spatial_layers; ++s) {
151 for (int t = 0; t < num_temporal_layers; ++t) {
152 // Prefer kSwitch indication for frames that is part of the decode
153 // target because dependency descriptor information generated in this
154 // class use kSwitch indications more often that kRequired, increasing
155 // the chance of a good (or complete) template match.
156 a_template.decode_target_indications.push_back(
157 sid <= s && tid <= t ? DecodeTargetIndication::kSwitch
158 : DecodeTargetIndication::kNotPresent);
159 }
160 }
161 a_template.frame_diffs.push_back(tid == 0 ? num_spatial_layers *
162 num_temporal_layers
163 : num_spatial_layers);
164 a_template.chain_diffs.assign(structure.num_chains, 1);
165 structure.templates.push_back(a_template);
166
167 structure.decode_target_protected_by_chain.push_back(sid);
168 }
169 }
170 return structure;
171 }
172 } // namespace
173
RtpPayloadParams(const uint32_t ssrc,const RtpPayloadState * state,const FieldTrialsView & trials)174 RtpPayloadParams::RtpPayloadParams(const uint32_t ssrc,
175 const RtpPayloadState* state,
176 const FieldTrialsView& trials)
177 : ssrc_(ssrc),
178 generic_picture_id_experiment_(
179 absl::StartsWith(trials.Lookup("WebRTC-GenericPictureId"),
180 "Enabled")),
181 simulate_generic_structure_(absl::StartsWith(
182 trials.Lookup("WebRTC-GenericCodecDependencyDescriptor"),
183 "Enabled")) {
184 for (auto& spatial_layer : last_shared_frame_id_)
185 spatial_layer.fill(-1);
186
187 chain_last_frame_id_.fill(-1);
188 buffer_id_to_frame_id_.fill(-1);
189
190 Random random(rtc::TimeMicros());
191 state_.picture_id =
192 state ? state->picture_id : (random.Rand<int16_t>() & 0x7FFF);
193 state_.tl0_pic_idx = state ? state->tl0_pic_idx : (random.Rand<uint8_t>());
194 }
195
196 RtpPayloadParams::RtpPayloadParams(const RtpPayloadParams& other) = default;
197
~RtpPayloadParams()198 RtpPayloadParams::~RtpPayloadParams() {}
199
GetRtpVideoHeader(const EncodedImage & image,const CodecSpecificInfo * codec_specific_info,int64_t shared_frame_id)200 RTPVideoHeader RtpPayloadParams::GetRtpVideoHeader(
201 const EncodedImage& image,
202 const CodecSpecificInfo* codec_specific_info,
203 int64_t shared_frame_id) {
204 RTPVideoHeader rtp_video_header;
205 if (codec_specific_info) {
206 PopulateRtpWithCodecSpecifics(*codec_specific_info, image.SpatialIndex(),
207 &rtp_video_header);
208 }
209 rtp_video_header.frame_type = image._frameType;
210 rtp_video_header.rotation = image.rotation_;
211 rtp_video_header.content_type = image.content_type_;
212 rtp_video_header.playout_delay = image.playout_delay_;
213 rtp_video_header.width = image._encodedWidth;
214 rtp_video_header.height = image._encodedHeight;
215 rtp_video_header.color_space = image.ColorSpace()
216 ? absl::make_optional(*image.ColorSpace())
217 : absl::nullopt;
218 rtp_video_header.video_frame_tracking_id = image.VideoFrameTrackingId();
219 SetVideoTiming(image, &rtp_video_header.video_timing);
220
221 const bool is_keyframe = image._frameType == VideoFrameType::kVideoFrameKey;
222 const bool first_frame_in_picture =
223 (codec_specific_info && codec_specific_info->codecType == kVideoCodecVP9)
224 ? codec_specific_info->codecSpecific.VP9.first_frame_in_picture
225 : true;
226
227 SetCodecSpecific(&rtp_video_header, first_frame_in_picture);
228
229 SetGeneric(codec_specific_info, shared_frame_id, is_keyframe,
230 &rtp_video_header);
231
232 return rtp_video_header;
233 }
234
ssrc() const235 uint32_t RtpPayloadParams::ssrc() const {
236 return ssrc_;
237 }
238
state() const239 RtpPayloadState RtpPayloadParams::state() const {
240 return state_;
241 }
242
SetCodecSpecific(RTPVideoHeader * rtp_video_header,bool first_frame_in_picture)243 void RtpPayloadParams::SetCodecSpecific(RTPVideoHeader* rtp_video_header,
244 bool first_frame_in_picture) {
245 // Always set picture id. Set tl0_pic_idx iff temporal index is set.
246 if (first_frame_in_picture) {
247 state_.picture_id = (static_cast<uint16_t>(state_.picture_id) + 1) & 0x7FFF;
248 }
249 if (rtp_video_header->codec == kVideoCodecVP8) {
250 auto& vp8_header =
251 absl::get<RTPVideoHeaderVP8>(rtp_video_header->video_type_header);
252 vp8_header.pictureId = state_.picture_id;
253
254 if (vp8_header.temporalIdx != kNoTemporalIdx) {
255 if (vp8_header.temporalIdx == 0) {
256 ++state_.tl0_pic_idx;
257 }
258 vp8_header.tl0PicIdx = state_.tl0_pic_idx;
259 }
260 }
261 if (rtp_video_header->codec == kVideoCodecVP9) {
262 auto& vp9_header =
263 absl::get<RTPVideoHeaderVP9>(rtp_video_header->video_type_header);
264 vp9_header.picture_id = state_.picture_id;
265
266 // Note that in the case that we have no temporal layers but we do have
267 // spatial layers, packets will carry layering info with a temporal_idx of
268 // zero, and we then have to set and increment tl0_pic_idx.
269 if (vp9_header.temporal_idx != kNoTemporalIdx ||
270 vp9_header.spatial_idx != kNoSpatialIdx) {
271 if (first_frame_in_picture &&
272 (vp9_header.temporal_idx == 0 ||
273 vp9_header.temporal_idx == kNoTemporalIdx)) {
274 ++state_.tl0_pic_idx;
275 }
276 vp9_header.tl0_pic_idx = state_.tl0_pic_idx;
277 }
278 }
279 if (generic_picture_id_experiment_ &&
280 rtp_video_header->codec == kVideoCodecGeneric) {
281 rtp_video_header->video_type_header.emplace<RTPVideoHeaderLegacyGeneric>()
282 .picture_id = state_.picture_id;
283 }
284 }
285
286 RTPVideoHeader::GenericDescriptorInfo
GenericDescriptorFromFrameInfo(const GenericFrameInfo & frame_info,int64_t frame_id)287 RtpPayloadParams::GenericDescriptorFromFrameInfo(
288 const GenericFrameInfo& frame_info,
289 int64_t frame_id) {
290 RTPVideoHeader::GenericDescriptorInfo generic;
291 generic.frame_id = frame_id;
292 generic.dependencies = dependencies_calculator_.FromBuffersUsage(
293 frame_id, frame_info.encoder_buffers);
294 generic.chain_diffs =
295 chains_calculator_.From(frame_id, frame_info.part_of_chain);
296 generic.spatial_index = frame_info.spatial_id;
297 generic.temporal_index = frame_info.temporal_id;
298 generic.decode_target_indications = frame_info.decode_target_indications;
299 generic.active_decode_targets = frame_info.active_decode_targets;
300 return generic;
301 }
302
SetGeneric(const CodecSpecificInfo * codec_specific_info,int64_t frame_id,bool is_keyframe,RTPVideoHeader * rtp_video_header)303 void RtpPayloadParams::SetGeneric(const CodecSpecificInfo* codec_specific_info,
304 int64_t frame_id,
305 bool is_keyframe,
306 RTPVideoHeader* rtp_video_header) {
307 if (codec_specific_info && codec_specific_info->generic_frame_info &&
308 !codec_specific_info->generic_frame_info->encoder_buffers.empty()) {
309 if (is_keyframe) {
310 // Key frame resets all chains it is in.
311 chains_calculator_.Reset(
312 codec_specific_info->generic_frame_info->part_of_chain);
313 }
314 rtp_video_header->generic = GenericDescriptorFromFrameInfo(
315 *codec_specific_info->generic_frame_info, frame_id);
316 return;
317 }
318
319 switch (rtp_video_header->codec) {
320 case VideoCodecType::kVideoCodecGeneric:
321 GenericToGeneric(frame_id, is_keyframe, rtp_video_header);
322 return;
323 case VideoCodecType::kVideoCodecVP8:
324 if (codec_specific_info) {
325 Vp8ToGeneric(codec_specific_info->codecSpecific.VP8, frame_id,
326 is_keyframe, rtp_video_header);
327 }
328 return;
329 case VideoCodecType::kVideoCodecVP9:
330 if (codec_specific_info != nullptr) {
331 Vp9ToGeneric(codec_specific_info->codecSpecific.VP9, frame_id,
332 *rtp_video_header);
333 }
334 return;
335 case VideoCodecType::kVideoCodecAV1:
336 // TODO(philipel): Implement AV1 to generic descriptor.
337 return;
338 case VideoCodecType::kVideoCodecH264:
339 if (codec_specific_info) {
340 H264ToGeneric(codec_specific_info->codecSpecific.H264, frame_id,
341 is_keyframe, rtp_video_header);
342 }
343 return;
344 case VideoCodecType::kVideoCodecMultiplex:
345 return;
346 }
347 RTC_DCHECK_NOTREACHED() << "Unsupported codec.";
348 }
349
GenericStructure(const CodecSpecificInfo * codec_specific_info)350 absl::optional<FrameDependencyStructure> RtpPayloadParams::GenericStructure(
351 const CodecSpecificInfo* codec_specific_info) {
352 if (codec_specific_info == nullptr) {
353 return absl::nullopt;
354 }
355 // This helper shouldn't be used when template structure is specified
356 // explicetly.
357 RTC_DCHECK(!codec_specific_info->template_structure.has_value());
358 switch (codec_specific_info->codecType) {
359 case VideoCodecType::kVideoCodecGeneric:
360 if (simulate_generic_structure_) {
361 return MinimalisticStructure(/*num_spatial_layers=*/1,
362 /*num_temporal_layer=*/1);
363 }
364 return absl::nullopt;
365 case VideoCodecType::kVideoCodecVP8:
366 return MinimalisticStructure(/*num_spatial_layers=*/1,
367 /*num_temporal_layer=*/kMaxTemporalStreams);
368 case VideoCodecType::kVideoCodecVP9: {
369 absl::optional<FrameDependencyStructure> structure =
370 MinimalisticStructure(
371 /*num_spatial_layers=*/kMaxSimulatedSpatialLayers,
372 /*num_temporal_layer=*/kMaxTemporalStreams);
373 const CodecSpecificInfoVP9& vp9 = codec_specific_info->codecSpecific.VP9;
374 if (vp9.ss_data_available && vp9.spatial_layer_resolution_present) {
375 RenderResolution first_valid;
376 RenderResolution last_valid;
377 for (size_t i = 0; i < vp9.num_spatial_layers; ++i) {
378 RenderResolution r(vp9.width[i], vp9.height[i]);
379 if (r.Valid()) {
380 if (!first_valid.Valid()) {
381 first_valid = r;
382 }
383 last_valid = r;
384 }
385 structure->resolutions.push_back(r);
386 }
387 if (!last_valid.Valid()) {
388 // No valid resolution found. Do not send resolutions.
389 structure->resolutions.clear();
390 } else {
391 structure->resolutions.resize(kMaxSimulatedSpatialLayers, last_valid);
392 // VP9 encoder wrapper may disable first few spatial layers by
393 // setting invalid resolution (0,0). `structure->resolutions`
394 // doesn't support invalid resolution, so reset them to something
395 // valid.
396 for (RenderResolution& r : structure->resolutions) {
397 if (!r.Valid()) {
398 r = first_valid;
399 }
400 }
401 }
402 }
403 return structure;
404 }
405 case VideoCodecType::kVideoCodecAV1:
406 case VideoCodecType::kVideoCodecH264:
407 case VideoCodecType::kVideoCodecMultiplex:
408 return absl::nullopt;
409 }
410 RTC_DCHECK_NOTREACHED() << "Unsupported codec.";
411 }
412
GenericToGeneric(int64_t shared_frame_id,bool is_keyframe,RTPVideoHeader * rtp_video_header)413 void RtpPayloadParams::GenericToGeneric(int64_t shared_frame_id,
414 bool is_keyframe,
415 RTPVideoHeader* rtp_video_header) {
416 RTPVideoHeader::GenericDescriptorInfo& generic =
417 rtp_video_header->generic.emplace();
418
419 generic.frame_id = shared_frame_id;
420 generic.decode_target_indications.push_back(DecodeTargetIndication::kSwitch);
421
422 if (is_keyframe) {
423 generic.chain_diffs.push_back(0);
424 last_shared_frame_id_[0].fill(-1);
425 } else {
426 int64_t frame_id = last_shared_frame_id_[0][0];
427 RTC_DCHECK_NE(frame_id, -1);
428 RTC_DCHECK_LT(frame_id, shared_frame_id);
429 generic.chain_diffs.push_back(shared_frame_id - frame_id);
430 generic.dependencies.push_back(frame_id);
431 }
432
433 last_shared_frame_id_[0][0] = shared_frame_id;
434 }
435
H264ToGeneric(const CodecSpecificInfoH264 & h264_info,int64_t shared_frame_id,bool is_keyframe,RTPVideoHeader * rtp_video_header)436 void RtpPayloadParams::H264ToGeneric(const CodecSpecificInfoH264& h264_info,
437 int64_t shared_frame_id,
438 bool is_keyframe,
439 RTPVideoHeader* rtp_video_header) {
440 const int temporal_index =
441 h264_info.temporal_idx != kNoTemporalIdx ? h264_info.temporal_idx : 0;
442
443 if (temporal_index >= RtpGenericFrameDescriptor::kMaxTemporalLayers) {
444 RTC_LOG(LS_WARNING) << "Temporal and/or spatial index is too high to be "
445 "used with generic frame descriptor.";
446 return;
447 }
448
449 RTPVideoHeader::GenericDescriptorInfo& generic =
450 rtp_video_header->generic.emplace();
451
452 generic.frame_id = shared_frame_id;
453 generic.temporal_index = temporal_index;
454
455 if (is_keyframe) {
456 RTC_DCHECK_EQ(temporal_index, 0);
457 last_shared_frame_id_[/*spatial index*/ 0].fill(-1);
458 last_shared_frame_id_[/*spatial index*/ 0][temporal_index] =
459 shared_frame_id;
460 return;
461 }
462
463 if (h264_info.base_layer_sync) {
464 int64_t tl0_frame_id = last_shared_frame_id_[/*spatial index*/ 0][0];
465
466 for (int i = 1; i < RtpGenericFrameDescriptor::kMaxTemporalLayers; ++i) {
467 if (last_shared_frame_id_[/*spatial index*/ 0][i] < tl0_frame_id) {
468 last_shared_frame_id_[/*spatial index*/ 0][i] = -1;
469 }
470 }
471
472 RTC_DCHECK_GE(tl0_frame_id, 0);
473 RTC_DCHECK_LT(tl0_frame_id, shared_frame_id);
474 generic.dependencies.push_back(tl0_frame_id);
475 } else {
476 for (int i = 0; i <= temporal_index; ++i) {
477 int64_t frame_id = last_shared_frame_id_[/*spatial index*/ 0][i];
478
479 if (frame_id != -1) {
480 RTC_DCHECK_LT(frame_id, shared_frame_id);
481 generic.dependencies.push_back(frame_id);
482 }
483 }
484 }
485
486 last_shared_frame_id_[/*spatial_index*/ 0][temporal_index] = shared_frame_id;
487 }
488
Vp8ToGeneric(const CodecSpecificInfoVP8 & vp8_info,int64_t shared_frame_id,bool is_keyframe,RTPVideoHeader * rtp_video_header)489 void RtpPayloadParams::Vp8ToGeneric(const CodecSpecificInfoVP8& vp8_info,
490 int64_t shared_frame_id,
491 bool is_keyframe,
492 RTPVideoHeader* rtp_video_header) {
493 const auto& vp8_header =
494 absl::get<RTPVideoHeaderVP8>(rtp_video_header->video_type_header);
495 const int spatial_index = 0;
496 const int temporal_index =
497 vp8_header.temporalIdx != kNoTemporalIdx ? vp8_header.temporalIdx : 0;
498
499 if (temporal_index >= RtpGenericFrameDescriptor::kMaxTemporalLayers ||
500 spatial_index >= RtpGenericFrameDescriptor::kMaxSpatialLayers) {
501 RTC_LOG(LS_WARNING) << "Temporal and/or spatial index is too high to be "
502 "used with generic frame descriptor.";
503 return;
504 }
505
506 RTPVideoHeader::GenericDescriptorInfo& generic =
507 rtp_video_header->generic.emplace();
508
509 generic.frame_id = shared_frame_id;
510 generic.spatial_index = spatial_index;
511 generic.temporal_index = temporal_index;
512
513 // Generate decode target indications.
514 RTC_DCHECK_LT(temporal_index, kMaxTemporalStreams);
515 generic.decode_target_indications.resize(kMaxTemporalStreams);
516 auto it = std::fill_n(generic.decode_target_indications.begin(),
517 temporal_index, DecodeTargetIndication::kNotPresent);
518 std::fill(it, generic.decode_target_indications.end(),
519 DecodeTargetIndication::kSwitch);
520
521 // Frame dependencies.
522 if (vp8_info.useExplicitDependencies) {
523 SetDependenciesVp8New(vp8_info, shared_frame_id, is_keyframe,
524 vp8_header.layerSync, &generic);
525 } else {
526 SetDependenciesVp8Deprecated(vp8_info, shared_frame_id, is_keyframe,
527 spatial_index, temporal_index,
528 vp8_header.layerSync, &generic);
529 }
530
531 // Calculate chains.
532 generic.chain_diffs = {
533 (is_keyframe || chain_last_frame_id_[0] < 0)
534 ? 0
535 : static_cast<int>(shared_frame_id - chain_last_frame_id_[0])};
536 if (temporal_index == 0) {
537 chain_last_frame_id_[0] = shared_frame_id;
538 }
539 }
540
Vp9ToGeneric(const CodecSpecificInfoVP9 & vp9_info,int64_t shared_frame_id,RTPVideoHeader & rtp_video_header)541 void RtpPayloadParams::Vp9ToGeneric(const CodecSpecificInfoVP9& vp9_info,
542 int64_t shared_frame_id,
543 RTPVideoHeader& rtp_video_header) {
544 const auto& vp9_header =
545 absl::get<RTPVideoHeaderVP9>(rtp_video_header.video_type_header);
546 const int num_spatial_layers = kMaxSimulatedSpatialLayers;
547 const int num_active_spatial_layers = vp9_header.num_spatial_layers;
548 const int num_temporal_layers = kMaxTemporalStreams;
549 static_assert(num_spatial_layers <=
550 RtpGenericFrameDescriptor::kMaxSpatialLayers);
551 static_assert(num_temporal_layers <=
552 RtpGenericFrameDescriptor::kMaxTemporalLayers);
553 static_assert(num_spatial_layers <= DependencyDescriptor::kMaxSpatialIds);
554 static_assert(num_temporal_layers <= DependencyDescriptor::kMaxTemporalIds);
555
556 int spatial_index =
557 vp9_header.spatial_idx != kNoSpatialIdx ? vp9_header.spatial_idx : 0;
558 int temporal_index =
559 vp9_header.temporal_idx != kNoTemporalIdx ? vp9_header.temporal_idx : 0;
560
561 if (spatial_index >= num_spatial_layers ||
562 temporal_index >= num_temporal_layers ||
563 num_active_spatial_layers > num_spatial_layers) {
564 // Prefer to generate no generic layering than an inconsistent one.
565 return;
566 }
567
568 RTPVideoHeader::GenericDescriptorInfo& result =
569 rtp_video_header.generic.emplace();
570
571 result.frame_id = shared_frame_id;
572 result.spatial_index = spatial_index;
573 result.temporal_index = temporal_index;
574
575 result.decode_target_indications.reserve(num_spatial_layers *
576 num_temporal_layers);
577 for (int sid = 0; sid < num_spatial_layers; ++sid) {
578 for (int tid = 0; tid < num_temporal_layers; ++tid) {
579 DecodeTargetIndication dti;
580 if (sid < spatial_index || tid < temporal_index) {
581 dti = DecodeTargetIndication::kNotPresent;
582 } else if (spatial_index != sid &&
583 vp9_header.non_ref_for_inter_layer_pred) {
584 dti = DecodeTargetIndication::kNotPresent;
585 } else if (sid == spatial_index && tid == temporal_index) {
586 // Assume that if frame is decodable, all of its own layer is decodable.
587 dti = DecodeTargetIndication::kSwitch;
588 } else if (sid == spatial_index && vp9_header.temporal_up_switch) {
589 dti = DecodeTargetIndication::kSwitch;
590 } else if (!vp9_header.inter_pic_predicted) {
591 // Key frame or spatial upswitch
592 dti = DecodeTargetIndication::kSwitch;
593 } else {
594 // Make no other assumptions. That should be safe, though suboptimal.
595 // To provide more accurate dti, encoder wrapper should fill in
596 // CodecSpecificInfo::generic_frame_info
597 dti = DecodeTargetIndication::kRequired;
598 }
599 result.decode_target_indications.push_back(dti);
600 }
601 }
602
603 // Calculate frame dependencies.
604 static constexpr int kPictureDiffLimit = 128;
605 if (last_vp9_frame_id_.empty()) {
606 // Create the array only if it is ever used.
607 last_vp9_frame_id_.resize(kPictureDiffLimit);
608 }
609 if (vp9_header.inter_layer_predicted && spatial_index > 0) {
610 result.dependencies.push_back(
611 last_vp9_frame_id_[vp9_header.picture_id % kPictureDiffLimit]
612 [spatial_index - 1]);
613 }
614 if (vp9_header.inter_pic_predicted) {
615 for (size_t i = 0; i < vp9_header.num_ref_pics; ++i) {
616 // picture_id is 15 bit number that wraps around. Though undeflow may
617 // produce picture that exceeds 2^15, it is ok because in this
618 // code block only last 7 bits of the picture_id are used.
619 uint16_t depend_on = vp9_header.picture_id - vp9_header.pid_diff[i];
620 result.dependencies.push_back(
621 last_vp9_frame_id_[depend_on % kPictureDiffLimit][spatial_index]);
622 }
623 }
624 last_vp9_frame_id_[vp9_header.picture_id % kPictureDiffLimit][spatial_index] =
625 shared_frame_id;
626
627 result.active_decode_targets =
628 ((uint32_t{1} << num_temporal_layers * num_active_spatial_layers) - 1);
629
630 // Calculate chains, asuming chain includes all frames with temporal_id = 0
631 if (!vp9_header.inter_pic_predicted && !vp9_header.inter_layer_predicted) {
632 // Assume frames without dependencies also reset chains.
633 for (int sid = spatial_index; sid < num_spatial_layers; ++sid) {
634 chain_last_frame_id_[sid] = -1;
635 }
636 }
637 result.chain_diffs.resize(num_spatial_layers, 0);
638 for (int sid = 0; sid < num_active_spatial_layers; ++sid) {
639 if (chain_last_frame_id_[sid] == -1) {
640 result.chain_diffs[sid] = 0;
641 continue;
642 }
643 result.chain_diffs[sid] = shared_frame_id - chain_last_frame_id_[sid];
644 }
645
646 if (temporal_index == 0) {
647 chain_last_frame_id_[spatial_index] = shared_frame_id;
648 if (!vp9_header.non_ref_for_inter_layer_pred) {
649 for (int sid = spatial_index + 1; sid < num_spatial_layers; ++sid) {
650 chain_last_frame_id_[sid] = shared_frame_id;
651 }
652 }
653 }
654 }
655
SetDependenciesVp8Deprecated(const CodecSpecificInfoVP8 & vp8_info,int64_t shared_frame_id,bool is_keyframe,int spatial_index,int temporal_index,bool layer_sync,RTPVideoHeader::GenericDescriptorInfo * generic)656 void RtpPayloadParams::SetDependenciesVp8Deprecated(
657 const CodecSpecificInfoVP8& vp8_info,
658 int64_t shared_frame_id,
659 bool is_keyframe,
660 int spatial_index,
661 int temporal_index,
662 bool layer_sync,
663 RTPVideoHeader::GenericDescriptorInfo* generic) {
664 RTC_DCHECK(!vp8_info.useExplicitDependencies);
665 RTC_DCHECK(!new_version_used_.has_value() || !new_version_used_.value());
666 new_version_used_ = false;
667
668 if (is_keyframe) {
669 RTC_DCHECK_EQ(temporal_index, 0);
670 last_shared_frame_id_[spatial_index].fill(-1);
671 last_shared_frame_id_[spatial_index][temporal_index] = shared_frame_id;
672 return;
673 }
674
675 if (layer_sync) {
676 int64_t tl0_frame_id = last_shared_frame_id_[spatial_index][0];
677
678 for (int i = 1; i < RtpGenericFrameDescriptor::kMaxTemporalLayers; ++i) {
679 if (last_shared_frame_id_[spatial_index][i] < tl0_frame_id) {
680 last_shared_frame_id_[spatial_index][i] = -1;
681 }
682 }
683
684 RTC_DCHECK_GE(tl0_frame_id, 0);
685 RTC_DCHECK_LT(tl0_frame_id, shared_frame_id);
686 generic->dependencies.push_back(tl0_frame_id);
687 } else {
688 for (int i = 0; i <= temporal_index; ++i) {
689 int64_t frame_id = last_shared_frame_id_[spatial_index][i];
690
691 if (frame_id != -1) {
692 RTC_DCHECK_LT(frame_id, shared_frame_id);
693 generic->dependencies.push_back(frame_id);
694 }
695 }
696 }
697
698 last_shared_frame_id_[spatial_index][temporal_index] = shared_frame_id;
699 }
700
SetDependenciesVp8New(const CodecSpecificInfoVP8 & vp8_info,int64_t shared_frame_id,bool is_keyframe,bool layer_sync,RTPVideoHeader::GenericDescriptorInfo * generic)701 void RtpPayloadParams::SetDependenciesVp8New(
702 const CodecSpecificInfoVP8& vp8_info,
703 int64_t shared_frame_id,
704 bool is_keyframe,
705 bool layer_sync,
706 RTPVideoHeader::GenericDescriptorInfo* generic) {
707 RTC_DCHECK(vp8_info.useExplicitDependencies);
708 RTC_DCHECK(!new_version_used_.has_value() || new_version_used_.value());
709 new_version_used_ = true;
710
711 if (is_keyframe) {
712 RTC_DCHECK_EQ(vp8_info.referencedBuffersCount, 0u);
713 buffer_id_to_frame_id_.fill(shared_frame_id);
714 return;
715 }
716
717 constexpr size_t kBuffersCountVp8 = CodecSpecificInfoVP8::kBuffersCount;
718
719 RTC_DCHECK_GT(vp8_info.referencedBuffersCount, 0u);
720 RTC_DCHECK_LE(vp8_info.referencedBuffersCount,
721 arraysize(vp8_info.referencedBuffers));
722
723 for (size_t i = 0; i < vp8_info.referencedBuffersCount; ++i) {
724 const size_t referenced_buffer = vp8_info.referencedBuffers[i];
725 RTC_DCHECK_LT(referenced_buffer, kBuffersCountVp8);
726 RTC_DCHECK_LT(referenced_buffer, buffer_id_to_frame_id_.size());
727
728 const int64_t dependency_frame_id =
729 buffer_id_to_frame_id_[referenced_buffer];
730 RTC_DCHECK_GE(dependency_frame_id, 0);
731 RTC_DCHECK_LT(dependency_frame_id, shared_frame_id);
732
733 const bool is_new_dependency =
734 std::find(generic->dependencies.begin(), generic->dependencies.end(),
735 dependency_frame_id) == generic->dependencies.end();
736 if (is_new_dependency) {
737 generic->dependencies.push_back(dependency_frame_id);
738 }
739 }
740
741 RTC_DCHECK_LE(vp8_info.updatedBuffersCount, kBuffersCountVp8);
742 for (size_t i = 0; i < vp8_info.updatedBuffersCount; ++i) {
743 const size_t updated_id = vp8_info.updatedBuffers[i];
744 buffer_id_to_frame_id_[updated_id] = shared_frame_id;
745 }
746
747 RTC_DCHECK_LE(buffer_id_to_frame_id_.size(), kBuffersCountVp8);
748 }
749
750 } // namespace webrtc
751