xref: /aosp_15_r20/external/webrtc/audio/voip/test/audio_egress_unittest.cc (revision d9f758449e529ab9291ac668be2861e7a55c2422)
1 /*
2  *  Copyright (c) 2020 The WebRTC project authors. All Rights Reserved.
3  *
4  *  Use of this source code is governed by a BSD-style license
5  *  that can be found in the LICENSE file in the root of the source
6  *  tree. An additional intellectual property rights grant can be found
7  *  in the file PATENTS.  All contributing project authors may
8  *  be found in the AUTHORS file in the root of the source tree.
9  */
10 
11 #include "audio/voip/audio_egress.h"
12 
13 #include "api/audio_codecs/builtin_audio_encoder_factory.h"
14 #include "api/call/transport.h"
15 #include "api/task_queue/default_task_queue_factory.h"
16 #include "modules/audio_mixer/sine_wave_generator.h"
17 #include "modules/rtp_rtcp/source/rtp_packet_received.h"
18 #include "modules/rtp_rtcp/source/rtp_rtcp_impl2.h"
19 #include "rtc_base/event.h"
20 #include "rtc_base/logging.h"
21 #include "test/gmock.h"
22 #include "test/gtest.h"
23 #include "test/mock_transport.h"
24 #include "test/run_loop.h"
25 
26 namespace webrtc {
27 namespace {
28 
29 using ::testing::Invoke;
30 using ::testing::NiceMock;
31 using ::testing::Unused;
32 
CreateRtpStack(Clock * clock,Transport * transport,uint32_t remote_ssrc)33 std::unique_ptr<ModuleRtpRtcpImpl2> CreateRtpStack(Clock* clock,
34                                                    Transport* transport,
35                                                    uint32_t remote_ssrc) {
36   RtpRtcpInterface::Configuration rtp_config;
37   rtp_config.clock = clock;
38   rtp_config.audio = true;
39   rtp_config.rtcp_report_interval_ms = 5000;
40   rtp_config.outgoing_transport = transport;
41   rtp_config.local_media_ssrc = remote_ssrc;
42   auto rtp_rtcp = ModuleRtpRtcpImpl2::Create(rtp_config);
43   rtp_rtcp->SetSendingMediaStatus(false);
44   rtp_rtcp->SetRTCPStatus(RtcpMode::kCompound);
45   return rtp_rtcp;
46 }
47 
48 constexpr int16_t kAudioLevel = 3004;  // Used for sine wave level.
49 
50 // AudioEgressTest configures audio egress by using Rtp Stack, fake clock,
51 // and task queue factory.  Encoder factory is needed to create codec and
52 // configure the RTP stack in audio egress.
53 class AudioEgressTest : public ::testing::Test {
54  public:
55   static constexpr uint16_t kSeqNum = 12345;
56   static constexpr uint64_t kStartTime = 123456789;
57   static constexpr uint32_t kRemoteSsrc = 0xDEADBEEF;
58   const SdpAudioFormat kPcmuFormat = {"pcmu", 8000, 1};
59 
AudioEgressTest()60   AudioEgressTest()
61       : fake_clock_(kStartTime), wave_generator_(1000.0, kAudioLevel) {
62     task_queue_factory_ = CreateDefaultTaskQueueFactory();
63     encoder_factory_ = CreateBuiltinAudioEncoderFactory();
64   }
65 
66   // Prepare test on audio egress by using PCMu codec with specific
67   // sequence number and its status to be running.
SetUp()68   void SetUp() override {
69     rtp_rtcp_ = CreateRtpStack(&fake_clock_, &transport_, kRemoteSsrc);
70     egress_ = std::make_unique<AudioEgress>(rtp_rtcp_.get(), &fake_clock_,
71                                             task_queue_factory_.get());
72     constexpr int kPcmuPayload = 0;
73     egress_->SetEncoder(kPcmuPayload, kPcmuFormat,
74                         encoder_factory_->MakeAudioEncoder(
75                             kPcmuPayload, kPcmuFormat, absl::nullopt));
76     egress_->StartSend();
77     rtp_rtcp_->SetSequenceNumber(kSeqNum);
78     rtp_rtcp_->SetSendingStatus(true);
79   }
80 
81   // Make sure we have shut down rtp stack and reset egress for each test.
TearDown()82   void TearDown() override {
83     egress_->StopSend();
84     rtp_rtcp_->SetSendingStatus(false);
85     egress_.reset();
86     rtp_rtcp_.reset();
87   }
88 
89   // Create an audio frame prepared for pcmu encoding. Timestamp is
90   // increased per RTP specification which is the number of samples it contains.
91   // Wave generator writes sine wave which has expected high level set
92   // by kAudioLevel.
GetAudioFrame(int order)93   std::unique_ptr<AudioFrame> GetAudioFrame(int order) {
94     auto frame = std::make_unique<AudioFrame>();
95     frame->sample_rate_hz_ = kPcmuFormat.clockrate_hz;
96     frame->samples_per_channel_ = kPcmuFormat.clockrate_hz / 100;  // 10 ms.
97     frame->num_channels_ = kPcmuFormat.num_channels;
98     frame->timestamp_ = frame->samples_per_channel_ * order;
99     wave_generator_.GenerateNextFrame(frame.get());
100     return frame;
101   }
102 
103   test::RunLoop run_loop_;
104   // SimulatedClock doesn't directly affect this testcase as the the
105   // AudioFrame's timestamp is driven by GetAudioFrame.
106   SimulatedClock fake_clock_;
107   NiceMock<MockTransport> transport_;
108   SineWaveGenerator wave_generator_;
109   std::unique_ptr<ModuleRtpRtcpImpl2> rtp_rtcp_;
110   std::unique_ptr<TaskQueueFactory> task_queue_factory_;
111   rtc::scoped_refptr<AudioEncoderFactory> encoder_factory_;
112   std::unique_ptr<AudioEgress> egress_;
113 };
114 
TEST_F(AudioEgressTest,SendingStatusAfterStartAndStop)115 TEST_F(AudioEgressTest, SendingStatusAfterStartAndStop) {
116   EXPECT_TRUE(egress_->IsSending());
117   egress_->StopSend();
118   EXPECT_FALSE(egress_->IsSending());
119 }
120 
TEST_F(AudioEgressTest,ProcessAudioWithMute)121 TEST_F(AudioEgressTest, ProcessAudioWithMute) {
122   constexpr int kExpected = 10;
123   rtc::Event event;
124   int rtp_count = 0;
125   RtpPacketReceived rtp;
126   auto rtp_sent = [&](const uint8_t* packet, size_t length, Unused) {
127     rtp.Parse(packet, length);
128     if (++rtp_count == kExpected) {
129       event.Set();
130     }
131     return true;
132   };
133 
134   EXPECT_CALL(transport_, SendRtp).WillRepeatedly(Invoke(rtp_sent));
135 
136   egress_->SetMute(true);
137 
138   // Two 10 ms audio frames will result in rtp packet with ptime 20.
139   for (size_t i = 0; i < kExpected * 2; i++) {
140     egress_->SendAudioData(GetAudioFrame(i));
141     fake_clock_.AdvanceTimeMilliseconds(10);
142   }
143 
144   event.Wait(TimeDelta::Seconds(1));
145   EXPECT_EQ(rtp_count, kExpected);
146 
147   // we expect on pcmu payload to result in 255 for silenced payload
148   RTPHeader header;
149   rtp.GetHeader(&header);
150   size_t packet_length = rtp.size();
151   size_t payload_length = packet_length - header.headerLength;
152   size_t payload_data_length = payload_length - header.paddingLength;
153   const uint8_t* payload = rtp.data() + header.headerLength;
154   for (size_t i = 0; i < payload_data_length; ++i) {
155     EXPECT_EQ(*payload++, 255);
156   }
157 }
158 
TEST_F(AudioEgressTest,ProcessAudioWithSineWave)159 TEST_F(AudioEgressTest, ProcessAudioWithSineWave) {
160   constexpr int kExpected = 10;
161   rtc::Event event;
162   int rtp_count = 0;
163   RtpPacketReceived rtp;
164   auto rtp_sent = [&](const uint8_t* packet, size_t length, Unused) {
165     rtp.Parse(packet, length);
166     if (++rtp_count == kExpected) {
167       event.Set();
168     }
169     return true;
170   };
171 
172   EXPECT_CALL(transport_, SendRtp).WillRepeatedly(Invoke(rtp_sent));
173 
174   // Two 10 ms audio frames will result in rtp packet with ptime 20.
175   for (size_t i = 0; i < kExpected * 2; i++) {
176     egress_->SendAudioData(GetAudioFrame(i));
177     fake_clock_.AdvanceTimeMilliseconds(10);
178   }
179 
180   event.Wait(TimeDelta::Seconds(1));
181   EXPECT_EQ(rtp_count, kExpected);
182 
183   // we expect on pcmu to result in < 255 for payload with sine wave
184   RTPHeader header;
185   rtp.GetHeader(&header);
186   size_t packet_length = rtp.size();
187   size_t payload_length = packet_length - header.headerLength;
188   size_t payload_data_length = payload_length - header.paddingLength;
189   const uint8_t* payload = rtp.data() + header.headerLength;
190   for (size_t i = 0; i < payload_data_length; ++i) {
191     EXPECT_NE(*payload++, 255);
192   }
193 }
194 
TEST_F(AudioEgressTest,SkipAudioEncodingAfterStopSend)195 TEST_F(AudioEgressTest, SkipAudioEncodingAfterStopSend) {
196   constexpr int kExpected = 10;
197   rtc::Event event;
198   int rtp_count = 0;
199   auto rtp_sent = [&](const uint8_t* packet, size_t length, Unused) {
200     if (++rtp_count == kExpected) {
201       event.Set();
202     }
203     return true;
204   };
205 
206   EXPECT_CALL(transport_, SendRtp).WillRepeatedly(Invoke(rtp_sent));
207 
208   // Two 10 ms audio frames will result in rtp packet with ptime 20.
209   for (size_t i = 0; i < kExpected * 2; i++) {
210     egress_->SendAudioData(GetAudioFrame(i));
211     fake_clock_.AdvanceTimeMilliseconds(10);
212   }
213 
214   event.Wait(TimeDelta::Seconds(1));
215   EXPECT_EQ(rtp_count, kExpected);
216 
217   // Now stop send and yet feed more data.
218   egress_->StopSend();
219 
220   // It should be safe to exit the test case while encoder_queue_ has
221   // outstanding data to process. We are making sure that this doesn't
222   // result in crahses or sanitizer errors due to remaining data.
223   for (size_t i = 0; i < kExpected * 2; i++) {
224     egress_->SendAudioData(GetAudioFrame(i));
225     fake_clock_.AdvanceTimeMilliseconds(10);
226   }
227 }
228 
TEST_F(AudioEgressTest,ChangeEncoderFromPcmuToOpus)229 TEST_F(AudioEgressTest, ChangeEncoderFromPcmuToOpus) {
230   absl::optional<SdpAudioFormat> pcmu = egress_->GetEncoderFormat();
231   EXPECT_TRUE(pcmu);
232   EXPECT_EQ(pcmu->clockrate_hz, kPcmuFormat.clockrate_hz);
233   EXPECT_EQ(pcmu->num_channels, kPcmuFormat.num_channels);
234 
235   constexpr int kOpusPayload = 120;
236   const SdpAudioFormat kOpusFormat = {"opus", 48000, 2};
237 
238   egress_->SetEncoder(kOpusPayload, kOpusFormat,
239                       encoder_factory_->MakeAudioEncoder(
240                           kOpusPayload, kOpusFormat, absl::nullopt));
241 
242   absl::optional<SdpAudioFormat> opus = egress_->GetEncoderFormat();
243   EXPECT_TRUE(opus);
244   EXPECT_EQ(opus->clockrate_hz, kOpusFormat.clockrate_hz);
245   EXPECT_EQ(opus->num_channels, kOpusFormat.num_channels);
246 }
247 
TEST_F(AudioEgressTest,SendDTMF)248 TEST_F(AudioEgressTest, SendDTMF) {
249   constexpr int kExpected = 7;
250   constexpr int kPayloadType = 100;
251   constexpr int kDurationMs = 100;
252   constexpr int kSampleRate = 8000;
253   constexpr int kEvent = 3;
254 
255   egress_->RegisterTelephoneEventType(kPayloadType, kSampleRate);
256   // 100 ms duration will produce total 7 DTMF
257   // 1 @ 20 ms, 2 @ 40 ms, 3 @ 60 ms, 4 @ 80 ms
258   // 5, 6, 7 @ 100 ms (last one sends 3 dtmf)
259   egress_->SendTelephoneEvent(kEvent, kDurationMs);
260 
261   rtc::Event event;
262   int dtmf_count = 0;
263   auto is_dtmf = [&](RtpPacketReceived& rtp) {
264     return (rtp.PayloadType() == kPayloadType &&
265             rtp.SequenceNumber() == kSeqNum + dtmf_count &&
266             rtp.padding_size() == 0 && rtp.Marker() == (dtmf_count == 0) &&
267             rtp.Ssrc() == kRemoteSsrc);
268   };
269 
270   // It's possible that we may have actual audio RTP packets along with
271   // DTMF packtets.  We are only interested in the exact number of DTMF
272   // packets rtp stack is emitting.
273   auto rtp_sent = [&](const uint8_t* packet, size_t length, Unused) {
274     RtpPacketReceived rtp;
275     rtp.Parse(packet, length);
276     if (is_dtmf(rtp) && ++dtmf_count == kExpected) {
277       event.Set();
278     }
279     return true;
280   };
281 
282   EXPECT_CALL(transport_, SendRtp).WillRepeatedly(Invoke(rtp_sent));
283 
284   // Two 10 ms audio frames will result in rtp packet with ptime 20.
285   for (size_t i = 0; i < kExpected * 2; i++) {
286     egress_->SendAudioData(GetAudioFrame(i));
287     fake_clock_.AdvanceTimeMilliseconds(10);
288   }
289 
290   event.Wait(TimeDelta::Seconds(1));
291   EXPECT_EQ(dtmf_count, kExpected);
292 }
293 
TEST_F(AudioEgressTest,TestAudioInputLevelAndEnergyDuration)294 TEST_F(AudioEgressTest, TestAudioInputLevelAndEnergyDuration) {
295   // Per audio_level's kUpdateFrequency, we need more than 10 audio samples to
296   // get audio level from input source.
297   constexpr int kExpected = 6;
298   rtc::Event event;
299   int rtp_count = 0;
300   auto rtp_sent = [&](const uint8_t* packet, size_t length, Unused) {
301     if (++rtp_count == kExpected) {
302       event.Set();
303     }
304     return true;
305   };
306 
307   EXPECT_CALL(transport_, SendRtp).WillRepeatedly(Invoke(rtp_sent));
308 
309   // Two 10 ms audio frames will result in rtp packet with ptime 20.
310   for (size_t i = 0; i < kExpected * 2; i++) {
311     egress_->SendAudioData(GetAudioFrame(i));
312     fake_clock_.AdvanceTimeMilliseconds(10);
313   }
314 
315   event.Wait(/*give_up_after=*/TimeDelta::Seconds(1));
316   EXPECT_EQ(rtp_count, kExpected);
317 
318   constexpr double kExpectedEnergy = 0.00016809565587789564;
319   constexpr double kExpectedDuration = 0.11999999999999998;
320 
321   EXPECT_EQ(egress_->GetInputAudioLevel(), kAudioLevel);
322   EXPECT_DOUBLE_EQ(egress_->GetInputTotalEnergy(), kExpectedEnergy);
323   EXPECT_DOUBLE_EQ(egress_->GetInputTotalDuration(), kExpectedDuration);
324 }
325 
326 }  // namespace
327 }  // namespace webrtc
328