xref: /aosp_15_r20/external/webrtc/call/rtp_payload_params.cc (revision d9f758449e529ab9291ac668be2861e7a55c2422)
1 /*
2  *  Copyright (c) 2018 The WebRTC project authors. All Rights Reserved.
3  *
4  *  Use of this source code is governed by a BSD-style license
5  *  that can be found in the LICENSE file in the root of the source
6  *  tree. An additional intellectual property rights grant can be found
7  *  in the file PATENTS.  All contributing project authors may
8  *  be found in the AUTHORS file in the root of the source tree.
9  */
10 
11 #include "call/rtp_payload_params.h"
12 
13 #include <stddef.h>
14 
15 #include <algorithm>
16 
17 #include "absl/container/inlined_vector.h"
18 #include "absl/strings/match.h"
19 #include "absl/types/variant.h"
20 #include "api/video/video_timing.h"
21 #include "modules/video_coding/codecs/h264/include/h264_globals.h"
22 #include "modules/video_coding/codecs/interface/common_constants.h"
23 #include "modules/video_coding/codecs/vp8/include/vp8_globals.h"
24 #include "modules/video_coding/codecs/vp9/include/vp9_globals.h"
25 #include "modules/video_coding/frame_dependencies_calculator.h"
26 #include "rtc_base/arraysize.h"
27 #include "rtc_base/checks.h"
28 #include "rtc_base/logging.h"
29 #include "rtc_base/random.h"
30 #include "rtc_base/time_utils.h"
31 
32 namespace webrtc {
33 namespace {
34 
35 constexpr int kMaxSimulatedSpatialLayers = 3;
36 
PopulateRtpWithCodecSpecifics(const CodecSpecificInfo & info,absl::optional<int> spatial_index,RTPVideoHeader * rtp)37 void PopulateRtpWithCodecSpecifics(const CodecSpecificInfo& info,
38                                    absl::optional<int> spatial_index,
39                                    RTPVideoHeader* rtp) {
40   rtp->codec = info.codecType;
41   rtp->is_last_frame_in_picture = info.end_of_picture;
42   switch (info.codecType) {
43     case kVideoCodecVP8: {
44       auto& vp8_header = rtp->video_type_header.emplace<RTPVideoHeaderVP8>();
45       vp8_header.InitRTPVideoHeaderVP8();
46       vp8_header.nonReference = info.codecSpecific.VP8.nonReference;
47       vp8_header.temporalIdx = info.codecSpecific.VP8.temporalIdx;
48       vp8_header.layerSync = info.codecSpecific.VP8.layerSync;
49       vp8_header.keyIdx = info.codecSpecific.VP8.keyIdx;
50       rtp->simulcastIdx = spatial_index.value_or(0);
51       return;
52     }
53     case kVideoCodecVP9: {
54       auto& vp9_header = rtp->video_type_header.emplace<RTPVideoHeaderVP9>();
55       vp9_header.InitRTPVideoHeaderVP9();
56       vp9_header.inter_pic_predicted =
57           info.codecSpecific.VP9.inter_pic_predicted;
58       vp9_header.flexible_mode = info.codecSpecific.VP9.flexible_mode;
59       vp9_header.ss_data_available = info.codecSpecific.VP9.ss_data_available;
60       vp9_header.non_ref_for_inter_layer_pred =
61           info.codecSpecific.VP9.non_ref_for_inter_layer_pred;
62       vp9_header.temporal_idx = info.codecSpecific.VP9.temporal_idx;
63       vp9_header.temporal_up_switch = info.codecSpecific.VP9.temporal_up_switch;
64       vp9_header.inter_layer_predicted =
65           info.codecSpecific.VP9.inter_layer_predicted;
66       vp9_header.gof_idx = info.codecSpecific.VP9.gof_idx;
67       vp9_header.num_spatial_layers = info.codecSpecific.VP9.num_spatial_layers;
68       vp9_header.first_active_layer = info.codecSpecific.VP9.first_active_layer;
69       if (vp9_header.num_spatial_layers > 1) {
70         vp9_header.spatial_idx = spatial_index.value_or(kNoSpatialIdx);
71       } else {
72         vp9_header.spatial_idx = kNoSpatialIdx;
73       }
74       if (info.codecSpecific.VP9.ss_data_available) {
75         vp9_header.spatial_layer_resolution_present =
76             info.codecSpecific.VP9.spatial_layer_resolution_present;
77         if (info.codecSpecific.VP9.spatial_layer_resolution_present) {
78           for (size_t i = 0; i < info.codecSpecific.VP9.num_spatial_layers;
79                ++i) {
80             vp9_header.width[i] = info.codecSpecific.VP9.width[i];
81             vp9_header.height[i] = info.codecSpecific.VP9.height[i];
82           }
83         }
84         vp9_header.gof.CopyGofInfoVP9(info.codecSpecific.VP9.gof);
85       }
86 
87       vp9_header.num_ref_pics = info.codecSpecific.VP9.num_ref_pics;
88       for (int i = 0; i < info.codecSpecific.VP9.num_ref_pics; ++i) {
89         vp9_header.pid_diff[i] = info.codecSpecific.VP9.p_diff[i];
90       }
91       vp9_header.end_of_picture = info.end_of_picture;
92       return;
93     }
94     case kVideoCodecH264: {
95       auto& h264_header = rtp->video_type_header.emplace<RTPVideoHeaderH264>();
96       h264_header.packetization_mode =
97           info.codecSpecific.H264.packetization_mode;
98       rtp->simulcastIdx = spatial_index.value_or(0);
99       return;
100     }
101     case kVideoCodecMultiplex:
102     case kVideoCodecGeneric:
103       rtp->codec = kVideoCodecGeneric;
104       rtp->simulcastIdx = spatial_index.value_or(0);
105       return;
106     default:
107       return;
108   }
109 }
110 
SetVideoTiming(const EncodedImage & image,VideoSendTiming * timing)111 void SetVideoTiming(const EncodedImage& image, VideoSendTiming* timing) {
112   if (image.timing_.flags == VideoSendTiming::TimingFrameFlags::kInvalid ||
113       image.timing_.flags == VideoSendTiming::TimingFrameFlags::kNotTriggered) {
114     timing->flags = VideoSendTiming::TimingFrameFlags::kInvalid;
115     return;
116   }
117 
118   timing->encode_start_delta_ms = VideoSendTiming::GetDeltaCappedMs(
119       image.capture_time_ms_, image.timing_.encode_start_ms);
120   timing->encode_finish_delta_ms = VideoSendTiming::GetDeltaCappedMs(
121       image.capture_time_ms_, image.timing_.encode_finish_ms);
122   timing->packetization_finish_delta_ms = 0;
123   timing->pacer_exit_delta_ms = 0;
124   timing->network_timestamp_delta_ms = 0;
125   timing->network2_timestamp_delta_ms = 0;
126   timing->flags = image.timing_.flags;
127 }
128 
129 // Returns structure that aligns with simulated generic info. The templates
130 // allow to produce valid dependency descriptor for any stream where
131 // `num_spatial_layers` * `num_temporal_layers` <= 32 (limited by
132 // https://aomediacodec.github.io/av1-rtp-spec/#a82-syntax, see
133 // template_fdiffs()). The set of the templates is not tuned for any paricular
134 // structure thus dependency descriptor would use more bytes on the wire than
135 // with tuned templates.
MinimalisticStructure(int num_spatial_layers,int num_temporal_layers)136 FrameDependencyStructure MinimalisticStructure(int num_spatial_layers,
137                                                int num_temporal_layers) {
138   RTC_DCHECK_LE(num_spatial_layers, DependencyDescriptor::kMaxSpatialIds);
139   RTC_DCHECK_LE(num_temporal_layers, DependencyDescriptor::kMaxTemporalIds);
140   RTC_DCHECK_LE(num_spatial_layers * num_temporal_layers, 32);
141   FrameDependencyStructure structure;
142   structure.num_decode_targets = num_spatial_layers * num_temporal_layers;
143   structure.num_chains = num_spatial_layers;
144   structure.templates.reserve(num_spatial_layers * num_temporal_layers);
145   for (int sid = 0; sid < num_spatial_layers; ++sid) {
146     for (int tid = 0; tid < num_temporal_layers; ++tid) {
147       FrameDependencyTemplate a_template;
148       a_template.spatial_id = sid;
149       a_template.temporal_id = tid;
150       for (int s = 0; s < num_spatial_layers; ++s) {
151         for (int t = 0; t < num_temporal_layers; ++t) {
152           // Prefer kSwitch indication for frames that is part of the decode
153           // target because dependency descriptor information generated in this
154           // class use kSwitch indications more often that kRequired, increasing
155           // the chance of a good (or complete) template match.
156           a_template.decode_target_indications.push_back(
157               sid <= s && tid <= t ? DecodeTargetIndication::kSwitch
158                                    : DecodeTargetIndication::kNotPresent);
159         }
160       }
161       a_template.frame_diffs.push_back(tid == 0 ? num_spatial_layers *
162                                                       num_temporal_layers
163                                                 : num_spatial_layers);
164       a_template.chain_diffs.assign(structure.num_chains, 1);
165       structure.templates.push_back(a_template);
166 
167       structure.decode_target_protected_by_chain.push_back(sid);
168     }
169   }
170   return structure;
171 }
172 }  // namespace
173 
RtpPayloadParams(const uint32_t ssrc,const RtpPayloadState * state,const FieldTrialsView & trials)174 RtpPayloadParams::RtpPayloadParams(const uint32_t ssrc,
175                                    const RtpPayloadState* state,
176                                    const FieldTrialsView& trials)
177     : ssrc_(ssrc),
178       generic_picture_id_experiment_(
179           absl::StartsWith(trials.Lookup("WebRTC-GenericPictureId"),
180                            "Enabled")),
181       simulate_generic_structure_(absl::StartsWith(
182           trials.Lookup("WebRTC-GenericCodecDependencyDescriptor"),
183           "Enabled")) {
184   for (auto& spatial_layer : last_shared_frame_id_)
185     spatial_layer.fill(-1);
186 
187   chain_last_frame_id_.fill(-1);
188   buffer_id_to_frame_id_.fill(-1);
189 
190   Random random(rtc::TimeMicros());
191   state_.picture_id =
192       state ? state->picture_id : (random.Rand<int16_t>() & 0x7FFF);
193   state_.tl0_pic_idx = state ? state->tl0_pic_idx : (random.Rand<uint8_t>());
194 }
195 
196 RtpPayloadParams::RtpPayloadParams(const RtpPayloadParams& other) = default;
197 
~RtpPayloadParams()198 RtpPayloadParams::~RtpPayloadParams() {}
199 
GetRtpVideoHeader(const EncodedImage & image,const CodecSpecificInfo * codec_specific_info,int64_t shared_frame_id)200 RTPVideoHeader RtpPayloadParams::GetRtpVideoHeader(
201     const EncodedImage& image,
202     const CodecSpecificInfo* codec_specific_info,
203     int64_t shared_frame_id) {
204   RTPVideoHeader rtp_video_header;
205   if (codec_specific_info) {
206     PopulateRtpWithCodecSpecifics(*codec_specific_info, image.SpatialIndex(),
207                                   &rtp_video_header);
208   }
209   rtp_video_header.frame_type = image._frameType;
210   rtp_video_header.rotation = image.rotation_;
211   rtp_video_header.content_type = image.content_type_;
212   rtp_video_header.playout_delay = image.playout_delay_;
213   rtp_video_header.width = image._encodedWidth;
214   rtp_video_header.height = image._encodedHeight;
215   rtp_video_header.color_space = image.ColorSpace()
216                                      ? absl::make_optional(*image.ColorSpace())
217                                      : absl::nullopt;
218   rtp_video_header.video_frame_tracking_id = image.VideoFrameTrackingId();
219   SetVideoTiming(image, &rtp_video_header.video_timing);
220 
221   const bool is_keyframe = image._frameType == VideoFrameType::kVideoFrameKey;
222   const bool first_frame_in_picture =
223       (codec_specific_info && codec_specific_info->codecType == kVideoCodecVP9)
224           ? codec_specific_info->codecSpecific.VP9.first_frame_in_picture
225           : true;
226 
227   SetCodecSpecific(&rtp_video_header, first_frame_in_picture);
228 
229   SetGeneric(codec_specific_info, shared_frame_id, is_keyframe,
230              &rtp_video_header);
231 
232   return rtp_video_header;
233 }
234 
ssrc() const235 uint32_t RtpPayloadParams::ssrc() const {
236   return ssrc_;
237 }
238 
state() const239 RtpPayloadState RtpPayloadParams::state() const {
240   return state_;
241 }
242 
SetCodecSpecific(RTPVideoHeader * rtp_video_header,bool first_frame_in_picture)243 void RtpPayloadParams::SetCodecSpecific(RTPVideoHeader* rtp_video_header,
244                                         bool first_frame_in_picture) {
245   // Always set picture id. Set tl0_pic_idx iff temporal index is set.
246   if (first_frame_in_picture) {
247     state_.picture_id = (static_cast<uint16_t>(state_.picture_id) + 1) & 0x7FFF;
248   }
249   if (rtp_video_header->codec == kVideoCodecVP8) {
250     auto& vp8_header =
251         absl::get<RTPVideoHeaderVP8>(rtp_video_header->video_type_header);
252     vp8_header.pictureId = state_.picture_id;
253 
254     if (vp8_header.temporalIdx != kNoTemporalIdx) {
255       if (vp8_header.temporalIdx == 0) {
256         ++state_.tl0_pic_idx;
257       }
258       vp8_header.tl0PicIdx = state_.tl0_pic_idx;
259     }
260   }
261   if (rtp_video_header->codec == kVideoCodecVP9) {
262     auto& vp9_header =
263         absl::get<RTPVideoHeaderVP9>(rtp_video_header->video_type_header);
264     vp9_header.picture_id = state_.picture_id;
265 
266     // Note that in the case that we have no temporal layers but we do have
267     // spatial layers, packets will carry layering info with a temporal_idx of
268     // zero, and we then have to set and increment tl0_pic_idx.
269     if (vp9_header.temporal_idx != kNoTemporalIdx ||
270         vp9_header.spatial_idx != kNoSpatialIdx) {
271       if (first_frame_in_picture &&
272           (vp9_header.temporal_idx == 0 ||
273            vp9_header.temporal_idx == kNoTemporalIdx)) {
274         ++state_.tl0_pic_idx;
275       }
276       vp9_header.tl0_pic_idx = state_.tl0_pic_idx;
277     }
278   }
279   if (generic_picture_id_experiment_ &&
280       rtp_video_header->codec == kVideoCodecGeneric) {
281     rtp_video_header->video_type_header.emplace<RTPVideoHeaderLegacyGeneric>()
282         .picture_id = state_.picture_id;
283   }
284 }
285 
286 RTPVideoHeader::GenericDescriptorInfo
GenericDescriptorFromFrameInfo(const GenericFrameInfo & frame_info,int64_t frame_id)287 RtpPayloadParams::GenericDescriptorFromFrameInfo(
288     const GenericFrameInfo& frame_info,
289     int64_t frame_id) {
290   RTPVideoHeader::GenericDescriptorInfo generic;
291   generic.frame_id = frame_id;
292   generic.dependencies = dependencies_calculator_.FromBuffersUsage(
293       frame_id, frame_info.encoder_buffers);
294   generic.chain_diffs =
295       chains_calculator_.From(frame_id, frame_info.part_of_chain);
296   generic.spatial_index = frame_info.spatial_id;
297   generic.temporal_index = frame_info.temporal_id;
298   generic.decode_target_indications = frame_info.decode_target_indications;
299   generic.active_decode_targets = frame_info.active_decode_targets;
300   return generic;
301 }
302 
SetGeneric(const CodecSpecificInfo * codec_specific_info,int64_t frame_id,bool is_keyframe,RTPVideoHeader * rtp_video_header)303 void RtpPayloadParams::SetGeneric(const CodecSpecificInfo* codec_specific_info,
304                                   int64_t frame_id,
305                                   bool is_keyframe,
306                                   RTPVideoHeader* rtp_video_header) {
307   if (codec_specific_info && codec_specific_info->generic_frame_info &&
308       !codec_specific_info->generic_frame_info->encoder_buffers.empty()) {
309     if (is_keyframe) {
310       // Key frame resets all chains it is in.
311       chains_calculator_.Reset(
312           codec_specific_info->generic_frame_info->part_of_chain);
313     }
314     rtp_video_header->generic = GenericDescriptorFromFrameInfo(
315         *codec_specific_info->generic_frame_info, frame_id);
316     return;
317   }
318 
319   switch (rtp_video_header->codec) {
320     case VideoCodecType::kVideoCodecGeneric:
321       GenericToGeneric(frame_id, is_keyframe, rtp_video_header);
322       return;
323     case VideoCodecType::kVideoCodecVP8:
324       if (codec_specific_info) {
325         Vp8ToGeneric(codec_specific_info->codecSpecific.VP8, frame_id,
326                      is_keyframe, rtp_video_header);
327       }
328       return;
329     case VideoCodecType::kVideoCodecVP9:
330       if (codec_specific_info != nullptr) {
331         Vp9ToGeneric(codec_specific_info->codecSpecific.VP9, frame_id,
332                      *rtp_video_header);
333       }
334       return;
335     case VideoCodecType::kVideoCodecAV1:
336       // TODO(philipel): Implement AV1 to generic descriptor.
337       return;
338     case VideoCodecType::kVideoCodecH264:
339       if (codec_specific_info) {
340         H264ToGeneric(codec_specific_info->codecSpecific.H264, frame_id,
341                       is_keyframe, rtp_video_header);
342       }
343       return;
344     case VideoCodecType::kVideoCodecMultiplex:
345       return;
346   }
347   RTC_DCHECK_NOTREACHED() << "Unsupported codec.";
348 }
349 
GenericStructure(const CodecSpecificInfo * codec_specific_info)350 absl::optional<FrameDependencyStructure> RtpPayloadParams::GenericStructure(
351     const CodecSpecificInfo* codec_specific_info) {
352   if (codec_specific_info == nullptr) {
353     return absl::nullopt;
354   }
355   // This helper shouldn't be used when template structure is specified
356   // explicetly.
357   RTC_DCHECK(!codec_specific_info->template_structure.has_value());
358   switch (codec_specific_info->codecType) {
359     case VideoCodecType::kVideoCodecGeneric:
360       if (simulate_generic_structure_) {
361         return MinimalisticStructure(/*num_spatial_layers=*/1,
362                                      /*num_temporal_layer=*/1);
363       }
364       return absl::nullopt;
365     case VideoCodecType::kVideoCodecVP8:
366       return MinimalisticStructure(/*num_spatial_layers=*/1,
367                                    /*num_temporal_layer=*/kMaxTemporalStreams);
368     case VideoCodecType::kVideoCodecVP9: {
369       absl::optional<FrameDependencyStructure> structure =
370           MinimalisticStructure(
371               /*num_spatial_layers=*/kMaxSimulatedSpatialLayers,
372               /*num_temporal_layer=*/kMaxTemporalStreams);
373       const CodecSpecificInfoVP9& vp9 = codec_specific_info->codecSpecific.VP9;
374       if (vp9.ss_data_available && vp9.spatial_layer_resolution_present) {
375         RenderResolution first_valid;
376         RenderResolution last_valid;
377         for (size_t i = 0; i < vp9.num_spatial_layers; ++i) {
378           RenderResolution r(vp9.width[i], vp9.height[i]);
379           if (r.Valid()) {
380             if (!first_valid.Valid()) {
381               first_valid = r;
382             }
383             last_valid = r;
384           }
385           structure->resolutions.push_back(r);
386         }
387         if (!last_valid.Valid()) {
388           // No valid resolution found. Do not send resolutions.
389           structure->resolutions.clear();
390         } else {
391           structure->resolutions.resize(kMaxSimulatedSpatialLayers, last_valid);
392           // VP9 encoder wrapper may disable first few spatial layers by
393           // setting invalid resolution (0,0). `structure->resolutions`
394           // doesn't support invalid resolution, so reset them to something
395           // valid.
396           for (RenderResolution& r : structure->resolutions) {
397             if (!r.Valid()) {
398               r = first_valid;
399             }
400           }
401         }
402       }
403       return structure;
404     }
405     case VideoCodecType::kVideoCodecAV1:
406     case VideoCodecType::kVideoCodecH264:
407     case VideoCodecType::kVideoCodecMultiplex:
408       return absl::nullopt;
409   }
410   RTC_DCHECK_NOTREACHED() << "Unsupported codec.";
411 }
412 
GenericToGeneric(int64_t shared_frame_id,bool is_keyframe,RTPVideoHeader * rtp_video_header)413 void RtpPayloadParams::GenericToGeneric(int64_t shared_frame_id,
414                                         bool is_keyframe,
415                                         RTPVideoHeader* rtp_video_header) {
416   RTPVideoHeader::GenericDescriptorInfo& generic =
417       rtp_video_header->generic.emplace();
418 
419   generic.frame_id = shared_frame_id;
420   generic.decode_target_indications.push_back(DecodeTargetIndication::kSwitch);
421 
422   if (is_keyframe) {
423     generic.chain_diffs.push_back(0);
424     last_shared_frame_id_[0].fill(-1);
425   } else {
426     int64_t frame_id = last_shared_frame_id_[0][0];
427     RTC_DCHECK_NE(frame_id, -1);
428     RTC_DCHECK_LT(frame_id, shared_frame_id);
429     generic.chain_diffs.push_back(shared_frame_id - frame_id);
430     generic.dependencies.push_back(frame_id);
431   }
432 
433   last_shared_frame_id_[0][0] = shared_frame_id;
434 }
435 
H264ToGeneric(const CodecSpecificInfoH264 & h264_info,int64_t shared_frame_id,bool is_keyframe,RTPVideoHeader * rtp_video_header)436 void RtpPayloadParams::H264ToGeneric(const CodecSpecificInfoH264& h264_info,
437                                      int64_t shared_frame_id,
438                                      bool is_keyframe,
439                                      RTPVideoHeader* rtp_video_header) {
440   const int temporal_index =
441       h264_info.temporal_idx != kNoTemporalIdx ? h264_info.temporal_idx : 0;
442 
443   if (temporal_index >= RtpGenericFrameDescriptor::kMaxTemporalLayers) {
444     RTC_LOG(LS_WARNING) << "Temporal and/or spatial index is too high to be "
445                            "used with generic frame descriptor.";
446     return;
447   }
448 
449   RTPVideoHeader::GenericDescriptorInfo& generic =
450       rtp_video_header->generic.emplace();
451 
452   generic.frame_id = shared_frame_id;
453   generic.temporal_index = temporal_index;
454 
455   if (is_keyframe) {
456     RTC_DCHECK_EQ(temporal_index, 0);
457     last_shared_frame_id_[/*spatial index*/ 0].fill(-1);
458     last_shared_frame_id_[/*spatial index*/ 0][temporal_index] =
459         shared_frame_id;
460     return;
461   }
462 
463   if (h264_info.base_layer_sync) {
464     int64_t tl0_frame_id = last_shared_frame_id_[/*spatial index*/ 0][0];
465 
466     for (int i = 1; i < RtpGenericFrameDescriptor::kMaxTemporalLayers; ++i) {
467       if (last_shared_frame_id_[/*spatial index*/ 0][i] < tl0_frame_id) {
468         last_shared_frame_id_[/*spatial index*/ 0][i] = -1;
469       }
470     }
471 
472     RTC_DCHECK_GE(tl0_frame_id, 0);
473     RTC_DCHECK_LT(tl0_frame_id, shared_frame_id);
474     generic.dependencies.push_back(tl0_frame_id);
475   } else {
476     for (int i = 0; i <= temporal_index; ++i) {
477       int64_t frame_id = last_shared_frame_id_[/*spatial index*/ 0][i];
478 
479       if (frame_id != -1) {
480         RTC_DCHECK_LT(frame_id, shared_frame_id);
481         generic.dependencies.push_back(frame_id);
482       }
483     }
484   }
485 
486   last_shared_frame_id_[/*spatial_index*/ 0][temporal_index] = shared_frame_id;
487 }
488 
Vp8ToGeneric(const CodecSpecificInfoVP8 & vp8_info,int64_t shared_frame_id,bool is_keyframe,RTPVideoHeader * rtp_video_header)489 void RtpPayloadParams::Vp8ToGeneric(const CodecSpecificInfoVP8& vp8_info,
490                                     int64_t shared_frame_id,
491                                     bool is_keyframe,
492                                     RTPVideoHeader* rtp_video_header) {
493   const auto& vp8_header =
494       absl::get<RTPVideoHeaderVP8>(rtp_video_header->video_type_header);
495   const int spatial_index = 0;
496   const int temporal_index =
497       vp8_header.temporalIdx != kNoTemporalIdx ? vp8_header.temporalIdx : 0;
498 
499   if (temporal_index >= RtpGenericFrameDescriptor::kMaxTemporalLayers ||
500       spatial_index >= RtpGenericFrameDescriptor::kMaxSpatialLayers) {
501     RTC_LOG(LS_WARNING) << "Temporal and/or spatial index is too high to be "
502                            "used with generic frame descriptor.";
503     return;
504   }
505 
506   RTPVideoHeader::GenericDescriptorInfo& generic =
507       rtp_video_header->generic.emplace();
508 
509   generic.frame_id = shared_frame_id;
510   generic.spatial_index = spatial_index;
511   generic.temporal_index = temporal_index;
512 
513   // Generate decode target indications.
514   RTC_DCHECK_LT(temporal_index, kMaxTemporalStreams);
515   generic.decode_target_indications.resize(kMaxTemporalStreams);
516   auto it = std::fill_n(generic.decode_target_indications.begin(),
517                         temporal_index, DecodeTargetIndication::kNotPresent);
518   std::fill(it, generic.decode_target_indications.end(),
519             DecodeTargetIndication::kSwitch);
520 
521   // Frame dependencies.
522   if (vp8_info.useExplicitDependencies) {
523     SetDependenciesVp8New(vp8_info, shared_frame_id, is_keyframe,
524                           vp8_header.layerSync, &generic);
525   } else {
526     SetDependenciesVp8Deprecated(vp8_info, shared_frame_id, is_keyframe,
527                                  spatial_index, temporal_index,
528                                  vp8_header.layerSync, &generic);
529   }
530 
531   // Calculate chains.
532   generic.chain_diffs = {
533       (is_keyframe || chain_last_frame_id_[0] < 0)
534           ? 0
535           : static_cast<int>(shared_frame_id - chain_last_frame_id_[0])};
536   if (temporal_index == 0) {
537     chain_last_frame_id_[0] = shared_frame_id;
538   }
539 }
540 
Vp9ToGeneric(const CodecSpecificInfoVP9 & vp9_info,int64_t shared_frame_id,RTPVideoHeader & rtp_video_header)541 void RtpPayloadParams::Vp9ToGeneric(const CodecSpecificInfoVP9& vp9_info,
542                                     int64_t shared_frame_id,
543                                     RTPVideoHeader& rtp_video_header) {
544   const auto& vp9_header =
545       absl::get<RTPVideoHeaderVP9>(rtp_video_header.video_type_header);
546   const int num_spatial_layers = kMaxSimulatedSpatialLayers;
547   const int num_active_spatial_layers = vp9_header.num_spatial_layers;
548   const int num_temporal_layers = kMaxTemporalStreams;
549   static_assert(num_spatial_layers <=
550                 RtpGenericFrameDescriptor::kMaxSpatialLayers);
551   static_assert(num_temporal_layers <=
552                 RtpGenericFrameDescriptor::kMaxTemporalLayers);
553   static_assert(num_spatial_layers <= DependencyDescriptor::kMaxSpatialIds);
554   static_assert(num_temporal_layers <= DependencyDescriptor::kMaxTemporalIds);
555 
556   int spatial_index =
557       vp9_header.spatial_idx != kNoSpatialIdx ? vp9_header.spatial_idx : 0;
558   int temporal_index =
559       vp9_header.temporal_idx != kNoTemporalIdx ? vp9_header.temporal_idx : 0;
560 
561   if (spatial_index >= num_spatial_layers ||
562       temporal_index >= num_temporal_layers ||
563       num_active_spatial_layers > num_spatial_layers) {
564     // Prefer to generate no generic layering than an inconsistent one.
565     return;
566   }
567 
568   RTPVideoHeader::GenericDescriptorInfo& result =
569       rtp_video_header.generic.emplace();
570 
571   result.frame_id = shared_frame_id;
572   result.spatial_index = spatial_index;
573   result.temporal_index = temporal_index;
574 
575   result.decode_target_indications.reserve(num_spatial_layers *
576                                            num_temporal_layers);
577   for (int sid = 0; sid < num_spatial_layers; ++sid) {
578     for (int tid = 0; tid < num_temporal_layers; ++tid) {
579       DecodeTargetIndication dti;
580       if (sid < spatial_index || tid < temporal_index) {
581         dti = DecodeTargetIndication::kNotPresent;
582       } else if (spatial_index != sid &&
583                  vp9_header.non_ref_for_inter_layer_pred) {
584         dti = DecodeTargetIndication::kNotPresent;
585       } else if (sid == spatial_index && tid == temporal_index) {
586         // Assume that if frame is decodable, all of its own layer is decodable.
587         dti = DecodeTargetIndication::kSwitch;
588       } else if (sid == spatial_index && vp9_header.temporal_up_switch) {
589         dti = DecodeTargetIndication::kSwitch;
590       } else if (!vp9_header.inter_pic_predicted) {
591         // Key frame or spatial upswitch
592         dti = DecodeTargetIndication::kSwitch;
593       } else {
594         // Make no other assumptions. That should be safe, though suboptimal.
595         // To provide more accurate dti, encoder wrapper should fill in
596         // CodecSpecificInfo::generic_frame_info
597         dti = DecodeTargetIndication::kRequired;
598       }
599       result.decode_target_indications.push_back(dti);
600     }
601   }
602 
603   // Calculate frame dependencies.
604   static constexpr int kPictureDiffLimit = 128;
605   if (last_vp9_frame_id_.empty()) {
606     // Create the array only if it is ever used.
607     last_vp9_frame_id_.resize(kPictureDiffLimit);
608   }
609   if (vp9_header.inter_layer_predicted && spatial_index > 0) {
610     result.dependencies.push_back(
611         last_vp9_frame_id_[vp9_header.picture_id % kPictureDiffLimit]
612                           [spatial_index - 1]);
613   }
614   if (vp9_header.inter_pic_predicted) {
615     for (size_t i = 0; i < vp9_header.num_ref_pics; ++i) {
616       // picture_id is 15 bit number that wraps around. Though undeflow may
617       // produce picture that exceeds 2^15, it is ok because in this
618       // code block only last 7 bits of the picture_id are used.
619       uint16_t depend_on = vp9_header.picture_id - vp9_header.pid_diff[i];
620       result.dependencies.push_back(
621           last_vp9_frame_id_[depend_on % kPictureDiffLimit][spatial_index]);
622     }
623   }
624   last_vp9_frame_id_[vp9_header.picture_id % kPictureDiffLimit][spatial_index] =
625       shared_frame_id;
626 
627   result.active_decode_targets =
628       ((uint32_t{1} << num_temporal_layers * num_active_spatial_layers) - 1);
629 
630   // Calculate chains, asuming chain includes all frames with temporal_id = 0
631   if (!vp9_header.inter_pic_predicted && !vp9_header.inter_layer_predicted) {
632     // Assume frames without dependencies also reset chains.
633     for (int sid = spatial_index; sid < num_spatial_layers; ++sid) {
634       chain_last_frame_id_[sid] = -1;
635     }
636   }
637   result.chain_diffs.resize(num_spatial_layers, 0);
638   for (int sid = 0; sid < num_active_spatial_layers; ++sid) {
639     if (chain_last_frame_id_[sid] == -1) {
640       result.chain_diffs[sid] = 0;
641       continue;
642     }
643     result.chain_diffs[sid] = shared_frame_id - chain_last_frame_id_[sid];
644   }
645 
646   if (temporal_index == 0) {
647     chain_last_frame_id_[spatial_index] = shared_frame_id;
648     if (!vp9_header.non_ref_for_inter_layer_pred) {
649       for (int sid = spatial_index + 1; sid < num_spatial_layers; ++sid) {
650         chain_last_frame_id_[sid] = shared_frame_id;
651       }
652     }
653   }
654 }
655 
SetDependenciesVp8Deprecated(const CodecSpecificInfoVP8 & vp8_info,int64_t shared_frame_id,bool is_keyframe,int spatial_index,int temporal_index,bool layer_sync,RTPVideoHeader::GenericDescriptorInfo * generic)656 void RtpPayloadParams::SetDependenciesVp8Deprecated(
657     const CodecSpecificInfoVP8& vp8_info,
658     int64_t shared_frame_id,
659     bool is_keyframe,
660     int spatial_index,
661     int temporal_index,
662     bool layer_sync,
663     RTPVideoHeader::GenericDescriptorInfo* generic) {
664   RTC_DCHECK(!vp8_info.useExplicitDependencies);
665   RTC_DCHECK(!new_version_used_.has_value() || !new_version_used_.value());
666   new_version_used_ = false;
667 
668   if (is_keyframe) {
669     RTC_DCHECK_EQ(temporal_index, 0);
670     last_shared_frame_id_[spatial_index].fill(-1);
671     last_shared_frame_id_[spatial_index][temporal_index] = shared_frame_id;
672     return;
673   }
674 
675   if (layer_sync) {
676     int64_t tl0_frame_id = last_shared_frame_id_[spatial_index][0];
677 
678     for (int i = 1; i < RtpGenericFrameDescriptor::kMaxTemporalLayers; ++i) {
679       if (last_shared_frame_id_[spatial_index][i] < tl0_frame_id) {
680         last_shared_frame_id_[spatial_index][i] = -1;
681       }
682     }
683 
684     RTC_DCHECK_GE(tl0_frame_id, 0);
685     RTC_DCHECK_LT(tl0_frame_id, shared_frame_id);
686     generic->dependencies.push_back(tl0_frame_id);
687   } else {
688     for (int i = 0; i <= temporal_index; ++i) {
689       int64_t frame_id = last_shared_frame_id_[spatial_index][i];
690 
691       if (frame_id != -1) {
692         RTC_DCHECK_LT(frame_id, shared_frame_id);
693         generic->dependencies.push_back(frame_id);
694       }
695     }
696   }
697 
698   last_shared_frame_id_[spatial_index][temporal_index] = shared_frame_id;
699 }
700 
SetDependenciesVp8New(const CodecSpecificInfoVP8 & vp8_info,int64_t shared_frame_id,bool is_keyframe,bool layer_sync,RTPVideoHeader::GenericDescriptorInfo * generic)701 void RtpPayloadParams::SetDependenciesVp8New(
702     const CodecSpecificInfoVP8& vp8_info,
703     int64_t shared_frame_id,
704     bool is_keyframe,
705     bool layer_sync,
706     RTPVideoHeader::GenericDescriptorInfo* generic) {
707   RTC_DCHECK(vp8_info.useExplicitDependencies);
708   RTC_DCHECK(!new_version_used_.has_value() || new_version_used_.value());
709   new_version_used_ = true;
710 
711   if (is_keyframe) {
712     RTC_DCHECK_EQ(vp8_info.referencedBuffersCount, 0u);
713     buffer_id_to_frame_id_.fill(shared_frame_id);
714     return;
715   }
716 
717   constexpr size_t kBuffersCountVp8 = CodecSpecificInfoVP8::kBuffersCount;
718 
719   RTC_DCHECK_GT(vp8_info.referencedBuffersCount, 0u);
720   RTC_DCHECK_LE(vp8_info.referencedBuffersCount,
721                 arraysize(vp8_info.referencedBuffers));
722 
723   for (size_t i = 0; i < vp8_info.referencedBuffersCount; ++i) {
724     const size_t referenced_buffer = vp8_info.referencedBuffers[i];
725     RTC_DCHECK_LT(referenced_buffer, kBuffersCountVp8);
726     RTC_DCHECK_LT(referenced_buffer, buffer_id_to_frame_id_.size());
727 
728     const int64_t dependency_frame_id =
729         buffer_id_to_frame_id_[referenced_buffer];
730     RTC_DCHECK_GE(dependency_frame_id, 0);
731     RTC_DCHECK_LT(dependency_frame_id, shared_frame_id);
732 
733     const bool is_new_dependency =
734         std::find(generic->dependencies.begin(), generic->dependencies.end(),
735                   dependency_frame_id) == generic->dependencies.end();
736     if (is_new_dependency) {
737       generic->dependencies.push_back(dependency_frame_id);
738     }
739   }
740 
741   RTC_DCHECK_LE(vp8_info.updatedBuffersCount, kBuffersCountVp8);
742   for (size_t i = 0; i < vp8_info.updatedBuffersCount; ++i) {
743     const size_t updated_id = vp8_info.updatedBuffers[i];
744     buffer_id_to_frame_id_[updated_id] = shared_frame_id;
745   }
746 
747   RTC_DCHECK_LE(buffer_id_to_frame_id_.size(), kBuffersCountVp8);
748 }
749 
750 }  // namespace webrtc
751