adaptive_digital_gain_applier_unittest.cc (revision d9f758449e529ab9291ac668be2861e7a55c2422) - OpenGrok cross reference for /aosp_15_r20/external/webrtc/modules/audio_processing/agc2/adaptive_digital_gain_applier_unittest.cc

/*
 *  Copyright (c) 2018 The WebRTC project authors. All Rights Reserved.
 *
 *  Use of this source code is governed by a BSD-style license
 *  that can be found in the LICENSE file in the root of the source
 *  tree. An additional intellectual property rights grant can be found
 *  in the file PATENTS.  All contributing project authors may
 *  be found in the AUTHORS file in the root of the source tree.
 */

#include "modules/audio_processing/agc2/adaptive_digital_gain_applier.h"

#include <algorithm>
#include <memory>

#include "common_audio/include/audio_util.h"
#include "modules/audio_processing/agc2/agc2_common.h"
#include "modules/audio_processing/agc2/vector_float_frame.h"
#include "modules/audio_processing/include/audio_processing.h"
#include "modules/audio_processing/logging/apm_data_dumper.h"
#include "rtc_base/gunit.h"

namespace webrtc {
namespace {

constexpr int kMono = 1;
constexpr int kStereo = 2;
constexpr int kFrameLen10ms8kHz = 80;
constexpr int kFrameLen10ms48kHz = 480;

constexpr float kMaxSpeechProbability = 1.0f;

// Constants used in place of estimated noise levels.
constexpr float kNoNoiseDbfs = kMinLevelDbfs;
constexpr float kWithNoiseDbfs = -20.0f;

// Number of additional frames to process in the tests to ensure that the tested
// adaptation processes have converged.
constexpr int kNumExtraFrames = 10;

constexpr float GetMaxGainChangePerFrameDb(
    float max_gain_change_db_per_second) {
  return max_gain_change_db_per_second * kFrameDurationMs / 1000.0f;
}

using AdaptiveDigitalConfig =
    AudioProcessing::Config::GainController2::AdaptiveDigital;

constexpr AdaptiveDigitalConfig kDefaultConfig{};

// Helper to create initialized `AdaptiveDigitalGainApplier` objects.
struct GainApplierHelper {
  GainApplierHelper(const AdaptiveDigitalConfig& config,
                    int sample_rate_hz,
                    int num_channels)
      : apm_data_dumper(0),
        gain_applier(
            std::make_unique<AdaptiveDigitalGainApplier>(&apm_data_dumper,
                                                         config,
                                                         sample_rate_hz,
                                                         num_channels)) {}
  ApmDataDumper apm_data_dumper;
  std::unique_ptr<AdaptiveDigitalGainApplier> gain_applier;
};

// Returns a `FrameInfo` sample to simulate noiseless speech detected with
// maximum probability and with level, headroom and limiter envelope chosen
// so that the resulting gain equals the default initial adaptive digital gain
// i.e., no gain adaptation is expected.
AdaptiveDigitalGainApplier::FrameInfo GetFrameInfoToNotAdapt(
    const AdaptiveDigitalConfig& config) {
  AdaptiveDigitalGainApplier::FrameInfo info;
  info.speech_probability = kMaxSpeechProbability;
  info.speech_level_dbfs = -config.initial_gain_db - config.headroom_db;
  info.speech_level_reliable = true;
  info.noise_rms_dbfs = kNoNoiseDbfs;
  info.headroom_db = config.headroom_db;
  info.limiter_envelope_dbfs = -2.0f;
  return info;
}

TEST(GainController2AdaptiveGainApplier, GainApplierShouldNotCrash) {
  GainApplierHelper helper(kDefaultConfig, /*sample_rate_hz=*/48000, kStereo);
  // Make one call with reasonable audio level values and settings.
  VectorFloatFrame fake_audio(kStereo, kFrameLen10ms48kHz, 10000.0f);
  helper.gain_applier->Process(GetFrameInfoToNotAdapt(kDefaultConfig),
                               fake_audio.float_frame_view());
}

// Checks that the maximum allowed gain is applied.
TEST(GainController2AdaptiveGainApplier, MaxGainApplied) {
  constexpr int kNumFramesToAdapt =
      static_cast<int>(kDefaultConfig.max_gain_db /
                       GetMaxGainChangePerFrameDb(
                           kDefaultConfig.max_gain_change_db_per_second)) +
      kNumExtraFrames;

  GainApplierHelper helper(kDefaultConfig, /*sample_rate_hz=*/8000, kMono);
  AdaptiveDigitalGainApplier::FrameInfo info =
      GetFrameInfoToNotAdapt(kDefaultConfig);
  info.speech_level_dbfs = -60.0f;
  float applied_gain;
  for (int i = 0; i < kNumFramesToAdapt; ++i) {
    VectorFloatFrame fake_audio(kMono, kFrameLen10ms8kHz, 1.0f);
    helper.gain_applier->Process(info, fake_audio.float_frame_view());
    applied_gain = fake_audio.float_frame_view().channel(0)[0];
  }
  const float applied_gain_db = 20.0f * std::log10f(applied_gain);
  EXPECT_NEAR(applied_gain_db, kDefaultConfig.max_gain_db, 0.1f);
}

TEST(GainController2AdaptiveGainApplier, GainDoesNotChangeFast) {
  GainApplierHelper helper(kDefaultConfig, /*sample_rate_hz=*/8000, kMono);

  constexpr float initial_level_dbfs = -25.0f;
  constexpr float kMaxGainChangeDbPerFrame =
      GetMaxGainChangePerFrameDb(kDefaultConfig.max_gain_change_db_per_second);
  constexpr int kNumFramesToAdapt =
      static_cast<int>(initial_level_dbfs / kMaxGainChangeDbPerFrame) +
      kNumExtraFrames;

  const float max_change_per_frame_linear = DbToRatio(kMaxGainChangeDbPerFrame);

  float last_gain_linear = 1.f;
  for (int i = 0; i < kNumFramesToAdapt; ++i) {
    SCOPED_TRACE(i);
    VectorFloatFrame fake_audio(kMono, kFrameLen10ms8kHz, 1.0f);
    AdaptiveDigitalGainApplier::FrameInfo info =
        GetFrameInfoToNotAdapt(kDefaultConfig);
    info.speech_level_dbfs = initial_level_dbfs;
    helper.gain_applier->Process(info, fake_audio.float_frame_view());
    float current_gain_linear = fake_audio.float_frame_view().channel(0)[0];
    EXPECT_LE(std::abs(current_gain_linear - last_gain_linear),
              max_change_per_frame_linear);
    last_gain_linear = current_gain_linear;
  }

  // Check that the same is true when gain decreases as well.
  for (int i = 0; i < kNumFramesToAdapt; ++i) {
    SCOPED_TRACE(i);
    VectorFloatFrame fake_audio(kMono, kFrameLen10ms8kHz, 1.0f);
    AdaptiveDigitalGainApplier::FrameInfo info =
        GetFrameInfoToNotAdapt(kDefaultConfig);
    info.speech_level_dbfs = 0.f;
    helper.gain_applier->Process(info, fake_audio.float_frame_view());
    float current_gain_linear = fake_audio.float_frame_view().channel(0)[0];
    EXPECT_LE(std::abs(current_gain_linear - last_gain_linear),
              max_change_per_frame_linear);
    last_gain_linear = current_gain_linear;
  }
}

TEST(GainController2AdaptiveGainApplier, GainIsRampedInAFrame) {
  GainApplierHelper helper(kDefaultConfig, /*sample_rate_hz=*/48000, kMono);

  constexpr float initial_level_dbfs = -25.0f;

  VectorFloatFrame fake_audio(kMono, kFrameLen10ms48kHz, 1.0f);
  AdaptiveDigitalGainApplier::FrameInfo info =
      GetFrameInfoToNotAdapt(kDefaultConfig);
  info.speech_level_dbfs = initial_level_dbfs;
  helper.gain_applier->Process(info, fake_audio.float_frame_view());
  float maximal_difference = 0.0f;
  float current_value = 1.0f * DbToRatio(kDefaultConfig.initial_gain_db);
  for (const auto& x : fake_audio.float_frame_view().channel(0)) {
    const float difference = std::abs(x - current_value);
    maximal_difference = std::max(maximal_difference, difference);
    current_value = x;
  }

  const float max_change_per_frame_linear = DbToRatio(
      GetMaxGainChangePerFrameDb(kDefaultConfig.max_gain_change_db_per_second));
  const float max_change_per_sample =
      max_change_per_frame_linear / kFrameLen10ms48kHz;

  EXPECT_LE(maximal_difference, max_change_per_sample);
}

TEST(GainController2AdaptiveGainApplier, NoiseLimitsGain) {
  GainApplierHelper helper(kDefaultConfig, /*sample_rate_hz=*/48000, kMono);

  constexpr float initial_level_dbfs = -25.0f;
  constexpr int num_initial_frames =
      kDefaultConfig.initial_gain_db /
      GetMaxGainChangePerFrameDb(kDefaultConfig.max_gain_change_db_per_second);
  constexpr int num_frames = 50;

  ASSERT_GT(kWithNoiseDbfs, kDefaultConfig.max_output_noise_level_dbfs)
      << "kWithNoiseDbfs is too low";

  for (int i = 0; i < num_initial_frames + num_frames; ++i) {
    VectorFloatFrame fake_audio(kMono, kFrameLen10ms48kHz, 1.0f);
    AdaptiveDigitalGainApplier::FrameInfo info =
        GetFrameInfoToNotAdapt(kDefaultConfig);
    info.speech_level_dbfs = initial_level_dbfs;
    info.noise_rms_dbfs = kWithNoiseDbfs;
    helper.gain_applier->Process(info, fake_audio.float_frame_view());

    // Wait so that the adaptive gain applier has time to lower the gain.
    if (i > num_initial_frames) {
      const float maximal_ratio =
          *std::max_element(fake_audio.float_frame_view().channel(0).begin(),
                            fake_audio.float_frame_view().channel(0).end());

      EXPECT_NEAR(maximal_ratio, 1.0f, 0.001f);
    }
  }
}

TEST(GainController2GainApplier, CanHandlePositiveSpeechLevels) {
  GainApplierHelper helper(kDefaultConfig, /*sample_rate_hz=*/48000, kStereo);

  // Make one call with positive audio level values and settings.
  VectorFloatFrame fake_audio(kStereo, kFrameLen10ms48kHz, 10000.0f);
  AdaptiveDigitalGainApplier::FrameInfo info =
      GetFrameInfoToNotAdapt(kDefaultConfig);
  info.speech_level_dbfs = 5.0f;
  helper.gain_applier->Process(info, fake_audio.float_frame_view());
}

TEST(GainController2GainApplier, AudioLevelLimitsGain) {
  GainApplierHelper helper(kDefaultConfig, /*sample_rate_hz=*/48000, kMono);

  constexpr float initial_level_dbfs = -25.0f;
  constexpr int num_initial_frames =
      kDefaultConfig.initial_gain_db /
      GetMaxGainChangePerFrameDb(kDefaultConfig.max_gain_change_db_per_second);
  constexpr int num_frames = 50;

  ASSERT_GT(kWithNoiseDbfs, kDefaultConfig.max_output_noise_level_dbfs)
      << "kWithNoiseDbfs is too low";

  for (int i = 0; i < num_initial_frames + num_frames; ++i) {
    VectorFloatFrame fake_audio(kMono, kFrameLen10ms48kHz, 1.0f);
    AdaptiveDigitalGainApplier::FrameInfo info =
        GetFrameInfoToNotAdapt(kDefaultConfig);
    info.speech_level_dbfs = initial_level_dbfs;
    info.limiter_envelope_dbfs = 1.0f;
    info.speech_level_reliable = false;
    helper.gain_applier->Process(info, fake_audio.float_frame_view());

    // Wait so that the adaptive gain applier has time to lower the gain.
    if (i > num_initial_frames) {
      const float maximal_ratio =
          *std::max_element(fake_audio.float_frame_view().channel(0).begin(),
                            fake_audio.float_frame_view().channel(0).end());

      EXPECT_NEAR(maximal_ratio, 1.0f, 0.001f);
    }
  }
}

class AdaptiveDigitalGainApplierTest : public ::testing::TestWithParam<int> {
 protected:
  int adjacent_speech_frames_threshold() const { return GetParam(); }
};

TEST_P(AdaptiveDigitalGainApplierTest,
       DoNotIncreaseGainWithTooFewSpeechFrames) {
  AdaptiveDigitalConfig config;
  config.adjacent_speech_frames_threshold = adjacent_speech_frames_threshold();
  GainApplierHelper helper(config, /*sample_rate_hz=*/48000, kMono);

  // Lower the speech level so that the target gain will be increased.
  AdaptiveDigitalGainApplier::FrameInfo info = GetFrameInfoToNotAdapt(config);
  info.speech_level_dbfs -= 12.0f;

  float prev_gain = 0.0f;
  for (int i = 0; i < config.adjacent_speech_frames_threshold; ++i) {
    SCOPED_TRACE(i);
    VectorFloatFrame audio(kMono, kFrameLen10ms48kHz, 1.0f);
    helper.gain_applier->Process(info, audio.float_frame_view());
    const float gain = audio.float_frame_view().channel(0)[0];
    if (i > 0) {
      EXPECT_EQ(prev_gain, gain);  // No gain increase applied.
    }
    prev_gain = gain;
  }
}

TEST_P(AdaptiveDigitalGainApplierTest, IncreaseGainWithEnoughSpeechFrames) {
  AdaptiveDigitalConfig config;
  config.adjacent_speech_frames_threshold = adjacent_speech_frames_threshold();
  GainApplierHelper helper(config, /*sample_rate_hz=*/48000, kMono);

  // Lower the speech level so that the target gain will be increased.
  AdaptiveDigitalGainApplier::FrameInfo info = GetFrameInfoToNotAdapt(config);
  info.speech_level_dbfs -= 12.0f;

  float prev_gain = 0.0f;
  for (int i = 0; i < config.adjacent_speech_frames_threshold; ++i) {
    SCOPED_TRACE(i);
    VectorFloatFrame audio(kMono, kFrameLen10ms48kHz, 1.0f);
    helper.gain_applier->Process(info, audio.float_frame_view());
    prev_gain = audio.float_frame_view().channel(0)[0];
  }

  // Process one more speech frame.
  VectorFloatFrame audio(kMono, kFrameLen10ms48kHz, 1.0f);
  helper.gain_applier->Process(info, audio.float_frame_view());

  // An increased gain has been applied.
  EXPECT_GT(audio.float_frame_view().channel(0)[0], prev_gain);
}

INSTANTIATE_TEST_SUITE_P(GainController2,
                         AdaptiveDigitalGainApplierTest,
                         ::testing::Values(1, 7, 31));

// Checks that the input is never modified when running in dry run mode.
TEST(GainController2GainApplier, DryRunDoesNotChangeInput) {
  AdaptiveDigitalConfig config;
  config.dry_run = true;
  GainApplierHelper helper(config, /*sample_rate_hz=*/8000, kMono);

  // Simulate an input signal with log speech level.
  AdaptiveDigitalGainApplier::FrameInfo info = GetFrameInfoToNotAdapt(config);
  info.speech_level_dbfs = -60.0f;
  const int num_frames_to_adapt =
      static_cast<int>(
          config.max_gain_db /
          GetMaxGainChangePerFrameDb(config.max_gain_change_db_per_second)) +
      kNumExtraFrames;
  constexpr float kPcmSamples = 123.456f;
  // Run the gain applier and check that the PCM samples are not modified.
  for (int i = 0; i < num_frames_to_adapt; ++i) {
    SCOPED_TRACE(i);
    VectorFloatFrame fake_audio(kMono, kFrameLen10ms8kHz, kPcmSamples);
    helper.gain_applier->Process(info, fake_audio.float_frame_view());
    EXPECT_FLOAT_EQ(fake_audio.float_frame_view().channel(0)[0], kPcmSamples);
  }
}

// Checks that no sample is modified before and after the sample rate changes.
TEST(GainController2GainApplier, DryRunHandlesSampleRateChange) {
  AdaptiveDigitalConfig config;
  config.dry_run = true;
  GainApplierHelper helper(config, /*sample_rate_hz=*/8000, kMono);

  AdaptiveDigitalGainApplier::FrameInfo info = GetFrameInfoToNotAdapt(config);
  info.speech_level_dbfs = -60.0f;
  constexpr float kPcmSamples = 123.456f;
  VectorFloatFrame fake_audio_8k(kMono, kFrameLen10ms8kHz, kPcmSamples);
  helper.gain_applier->Process(info, fake_audio_8k.float_frame_view());
  EXPECT_FLOAT_EQ(fake_audio_8k.float_frame_view().channel(0)[0], kPcmSamples);
  helper.gain_applier->Initialize(/*sample_rate_hz=*/48000, kMono);
  VectorFloatFrame fake_audio_48k(kMono, kFrameLen10ms48kHz, kPcmSamples);
  helper.gain_applier->Process(info, fake_audio_48k.float_frame_view());
  EXPECT_FLOAT_EQ(fake_audio_48k.float_frame_view().channel(0)[0], kPcmSamples);
}

// Checks that no sample is modified before and after the number of channels
// changes.
TEST(GainController2GainApplier, DryRunHandlesNumChannelsChange) {
  AdaptiveDigitalConfig config;
  config.dry_run = true;
  GainApplierHelper helper(config, /*sample_rate_hz=*/8000, kMono);

  AdaptiveDigitalGainApplier::FrameInfo info = GetFrameInfoToNotAdapt(config);
  info.speech_level_dbfs = -60.0f;
  constexpr float kPcmSamples = 123.456f;
  VectorFloatFrame fake_audio_8k(kMono, kFrameLen10ms8kHz, kPcmSamples);
  helper.gain_applier->Process(info, fake_audio_8k.float_frame_view());
  EXPECT_FLOAT_EQ(fake_audio_8k.float_frame_view().channel(0)[0], kPcmSamples);
  VectorFloatFrame fake_audio_48k(kStereo, kFrameLen10ms8kHz, kPcmSamples);
  helper.gain_applier->Initialize(/*sample_rate_hz=*/8000, kStereo);
  helper.gain_applier->Process(info, fake_audio_48k.float_frame_view());
  EXPECT_FLOAT_EQ(fake_audio_48k.float_frame_view().channel(0)[0], kPcmSamples);
  EXPECT_FLOAT_EQ(fake_audio_48k.float_frame_view().channel(1)[0], kPcmSamples);
}

}  // namespace
}  // namespace webrtc