1 /*
2 * Copyright (c) 2017 The WebRTC project authors. All Rights Reserved.
3 *
4 * Use of this source code is governed by a BSD-style license
5 * that can be found in the LICENSE file in the root of the source
6 * tree. An additional intellectual property rights grant can be found
7 * in the file PATENTS. All contributing project authors may
8 * be found in the AUTHORS file in the root of the source tree.
9 */
10
11 #include "modules/audio_mixer/frame_combiner.h"
12
13 #include <algorithm>
14 #include <array>
15 #include <cstdint>
16 #include <iterator>
17 #include <memory>
18 #include <string>
19 #include <utility>
20 #include <vector>
21
22 #include "api/array_view.h"
23 #include "api/rtp_packet_info.h"
24 #include "api/rtp_packet_infos.h"
25 #include "common_audio/include/audio_util.h"
26 #include "modules/audio_mixer/audio_frame_manipulator.h"
27 #include "modules/audio_mixer/audio_mixer_impl.h"
28 #include "modules/audio_processing/include/audio_frame_view.h"
29 #include "modules/audio_processing/include/audio_processing.h"
30 #include "modules/audio_processing/logging/apm_data_dumper.h"
31 #include "rtc_base/arraysize.h"
32 #include "rtc_base/checks.h"
33 #include "rtc_base/numerics/safe_conversions.h"
34 #include "system_wrappers/include/metrics.h"
35
36 namespace webrtc {
37 namespace {
38
39 using MixingBuffer =
40 std::array<std::array<float, FrameCombiner::kMaximumChannelSize>,
41 FrameCombiner::kMaximumNumberOfChannels>;
42
SetAudioFrameFields(rtc::ArrayView<const AudioFrame * const> mix_list,size_t number_of_channels,int sample_rate,size_t number_of_streams,AudioFrame * audio_frame_for_mixing)43 void SetAudioFrameFields(rtc::ArrayView<const AudioFrame* const> mix_list,
44 size_t number_of_channels,
45 int sample_rate,
46 size_t number_of_streams,
47 AudioFrame* audio_frame_for_mixing) {
48 const size_t samples_per_channel = static_cast<size_t>(
49 (sample_rate * webrtc::AudioMixerImpl::kFrameDurationInMs) / 1000);
50
51 // TODO(minyue): Issue bugs.webrtc.org/3390.
52 // Audio frame timestamp. The 'timestamp_' field is set to dummy
53 // value '0', because it is only supported in the one channel case and
54 // is then updated in the helper functions.
55 audio_frame_for_mixing->UpdateFrame(
56 0, nullptr, samples_per_channel, sample_rate, AudioFrame::kUndefined,
57 AudioFrame::kVadUnknown, number_of_channels);
58
59 if (mix_list.empty()) {
60 audio_frame_for_mixing->elapsed_time_ms_ = -1;
61 } else {
62 audio_frame_for_mixing->timestamp_ = mix_list[0]->timestamp_;
63 audio_frame_for_mixing->elapsed_time_ms_ = mix_list[0]->elapsed_time_ms_;
64 audio_frame_for_mixing->ntp_time_ms_ = mix_list[0]->ntp_time_ms_;
65 std::vector<RtpPacketInfo> packet_infos;
66 for (const auto& frame : mix_list) {
67 audio_frame_for_mixing->timestamp_ =
68 std::min(audio_frame_for_mixing->timestamp_, frame->timestamp_);
69 audio_frame_for_mixing->ntp_time_ms_ =
70 std::min(audio_frame_for_mixing->ntp_time_ms_, frame->ntp_time_ms_);
71 audio_frame_for_mixing->elapsed_time_ms_ = std::max(
72 audio_frame_for_mixing->elapsed_time_ms_, frame->elapsed_time_ms_);
73 packet_infos.insert(packet_infos.end(), frame->packet_infos_.begin(),
74 frame->packet_infos_.end());
75 }
76 audio_frame_for_mixing->packet_infos_ =
77 RtpPacketInfos(std::move(packet_infos));
78 }
79 }
80
MixFewFramesWithNoLimiter(rtc::ArrayView<const AudioFrame * const> mix_list,AudioFrame * audio_frame_for_mixing)81 void MixFewFramesWithNoLimiter(rtc::ArrayView<const AudioFrame* const> mix_list,
82 AudioFrame* audio_frame_for_mixing) {
83 if (mix_list.empty()) {
84 audio_frame_for_mixing->Mute();
85 return;
86 }
87 RTC_DCHECK_LE(mix_list.size(), 1);
88 std::copy(mix_list[0]->data(),
89 mix_list[0]->data() +
90 mix_list[0]->num_channels_ * mix_list[0]->samples_per_channel_,
91 audio_frame_for_mixing->mutable_data());
92 }
93
MixToFloatFrame(rtc::ArrayView<const AudioFrame * const> mix_list,size_t samples_per_channel,size_t number_of_channels,MixingBuffer * mixing_buffer)94 void MixToFloatFrame(rtc::ArrayView<const AudioFrame* const> mix_list,
95 size_t samples_per_channel,
96 size_t number_of_channels,
97 MixingBuffer* mixing_buffer) {
98 RTC_DCHECK_LE(samples_per_channel, FrameCombiner::kMaximumChannelSize);
99 RTC_DCHECK_LE(number_of_channels, FrameCombiner::kMaximumNumberOfChannels);
100 // Clear the mixing buffer.
101 *mixing_buffer = {};
102
103 // Convert to FloatS16 and mix.
104 for (size_t i = 0; i < mix_list.size(); ++i) {
105 const AudioFrame* const frame = mix_list[i];
106 const int16_t* const frame_data = frame->data();
107 for (size_t j = 0; j < std::min(number_of_channels,
108 FrameCombiner::kMaximumNumberOfChannels);
109 ++j) {
110 for (size_t k = 0; k < std::min(samples_per_channel,
111 FrameCombiner::kMaximumChannelSize);
112 ++k) {
113 (*mixing_buffer)[j][k] += frame_data[number_of_channels * k + j];
114 }
115 }
116 }
117 }
118
RunLimiter(AudioFrameView<float> mixing_buffer_view,Limiter * limiter)119 void RunLimiter(AudioFrameView<float> mixing_buffer_view, Limiter* limiter) {
120 const size_t sample_rate = mixing_buffer_view.samples_per_channel() * 1000 /
121 AudioMixerImpl::kFrameDurationInMs;
122 // TODO(alessiob): Avoid calling SetSampleRate every time.
123 limiter->SetSampleRate(sample_rate);
124 limiter->Process(mixing_buffer_view);
125 }
126
127 // Both interleaves and rounds.
InterleaveToAudioFrame(AudioFrameView<const float> mixing_buffer_view,AudioFrame * audio_frame_for_mixing)128 void InterleaveToAudioFrame(AudioFrameView<const float> mixing_buffer_view,
129 AudioFrame* audio_frame_for_mixing) {
130 const size_t number_of_channels = mixing_buffer_view.num_channels();
131 const size_t samples_per_channel = mixing_buffer_view.samples_per_channel();
132 int16_t* const mixing_data = audio_frame_for_mixing->mutable_data();
133 // Put data in the result frame.
134 for (size_t i = 0; i < number_of_channels; ++i) {
135 for (size_t j = 0; j < samples_per_channel; ++j) {
136 mixing_data[number_of_channels * j + i] =
137 FloatS16ToS16(mixing_buffer_view.channel(i)[j]);
138 }
139 }
140 }
141 } // namespace
142
143 constexpr size_t FrameCombiner::kMaximumNumberOfChannels;
144 constexpr size_t FrameCombiner::kMaximumChannelSize;
145
FrameCombiner(bool use_limiter)146 FrameCombiner::FrameCombiner(bool use_limiter)
147 : data_dumper_(new ApmDataDumper(0)),
148 mixing_buffer_(
149 std::make_unique<std::array<std::array<float, kMaximumChannelSize>,
150 kMaximumNumberOfChannels>>()),
151 limiter_(static_cast<size_t>(48000), data_dumper_.get(), "AudioMixer"),
152 use_limiter_(use_limiter) {
153 static_assert(kMaximumChannelSize * kMaximumNumberOfChannels <=
154 AudioFrame::kMaxDataSizeSamples,
155 "");
156 }
157
158 FrameCombiner::~FrameCombiner() = default;
159
Combine(rtc::ArrayView<AudioFrame * const> mix_list,size_t number_of_channels,int sample_rate,size_t number_of_streams,AudioFrame * audio_frame_for_mixing)160 void FrameCombiner::Combine(rtc::ArrayView<AudioFrame* const> mix_list,
161 size_t number_of_channels,
162 int sample_rate,
163 size_t number_of_streams,
164 AudioFrame* audio_frame_for_mixing) {
165 RTC_DCHECK(audio_frame_for_mixing);
166
167 SetAudioFrameFields(mix_list, number_of_channels, sample_rate,
168 number_of_streams, audio_frame_for_mixing);
169
170 const size_t samples_per_channel = static_cast<size_t>(
171 (sample_rate * webrtc::AudioMixerImpl::kFrameDurationInMs) / 1000);
172
173 for (const auto* frame : mix_list) {
174 RTC_DCHECK_EQ(samples_per_channel, frame->samples_per_channel_);
175 RTC_DCHECK_EQ(sample_rate, frame->sample_rate_hz_);
176 }
177
178 // The 'num_channels_' field of frames in 'mix_list' could be
179 // different from 'number_of_channels'.
180 for (auto* frame : mix_list) {
181 RemixFrame(number_of_channels, frame);
182 }
183
184 if (number_of_streams <= 1) {
185 MixFewFramesWithNoLimiter(mix_list, audio_frame_for_mixing);
186 return;
187 }
188
189 MixToFloatFrame(mix_list, samples_per_channel, number_of_channels,
190 mixing_buffer_.get());
191
192 const size_t output_number_of_channels =
193 std::min(number_of_channels, kMaximumNumberOfChannels);
194 const size_t output_samples_per_channel =
195 std::min(samples_per_channel, kMaximumChannelSize);
196
197 // Put float data in an AudioFrameView.
198 std::array<float*, kMaximumNumberOfChannels> channel_pointers{};
199 for (size_t i = 0; i < output_number_of_channels; ++i) {
200 channel_pointers[i] = &(*mixing_buffer_.get())[i][0];
201 }
202 AudioFrameView<float> mixing_buffer_view(&channel_pointers[0],
203 output_number_of_channels,
204 output_samples_per_channel);
205
206 if (use_limiter_) {
207 RunLimiter(mixing_buffer_view, &limiter_);
208 }
209
210 InterleaveToAudioFrame(mixing_buffer_view, audio_frame_for_mixing);
211 }
212
213 } // namespace webrtc
214