xref: /aosp_15_r20/external/webrtc/modules/audio_processing/agc2/rnn_vad/rnn.cc (revision d9f758449e529ab9291ac668be2861e7a55c2422)
1 /*
2  *  Copyright (c) 2018 The WebRTC project authors. All Rights Reserved.
3  *
4  *  Use of this source code is governed by a BSD-style license
5  *  that can be found in the LICENSE file in the root of the source
6  *  tree. An additional intellectual property rights grant can be found
7  *  in the file PATENTS.  All contributing project authors may
8  *  be found in the AUTHORS file in the root of the source tree.
9  */
10 
11 #include "modules/audio_processing/agc2/rnn_vad/rnn.h"
12 
13 #include "rtc_base/checks.h"
14 #include "third_party/rnnoise/src/rnn_vad_weights.h"
15 
16 namespace webrtc {
17 namespace rnn_vad {
18 namespace {
19 
20 using ::rnnoise::kInputLayerInputSize;
21 static_assert(kFeatureVectorSize == kInputLayerInputSize, "");
22 using ::rnnoise::kInputDenseBias;
23 using ::rnnoise::kInputDenseWeights;
24 using ::rnnoise::kInputLayerOutputSize;
25 static_assert(kInputLayerOutputSize <= kFullyConnectedLayerMaxUnits, "");
26 
27 using ::rnnoise::kHiddenGruBias;
28 using ::rnnoise::kHiddenGruRecurrentWeights;
29 using ::rnnoise::kHiddenGruWeights;
30 using ::rnnoise::kHiddenLayerOutputSize;
31 static_assert(kHiddenLayerOutputSize <= kGruLayerMaxUnits, "");
32 
33 using ::rnnoise::kOutputDenseBias;
34 using ::rnnoise::kOutputDenseWeights;
35 using ::rnnoise::kOutputLayerOutputSize;
36 static_assert(kOutputLayerOutputSize <= kFullyConnectedLayerMaxUnits, "");
37 
38 }  // namespace
39 
RnnVad(const AvailableCpuFeatures & cpu_features)40 RnnVad::RnnVad(const AvailableCpuFeatures& cpu_features)
41     : input_(kInputLayerInputSize,
42              kInputLayerOutputSize,
43              kInputDenseBias,
44              kInputDenseWeights,
45              ActivationFunction::kTansigApproximated,
46              cpu_features,
47              /*layer_name=*/"FC1"),
48       hidden_(kInputLayerOutputSize,
49               kHiddenLayerOutputSize,
50               kHiddenGruBias,
51               kHiddenGruWeights,
52               kHiddenGruRecurrentWeights,
53               cpu_features,
54               /*layer_name=*/"GRU1"),
55       output_(kHiddenLayerOutputSize,
56               kOutputLayerOutputSize,
57               kOutputDenseBias,
58               kOutputDenseWeights,
59               ActivationFunction::kSigmoidApproximated,
60               // The output layer is just 24x1. The unoptimized code is faster.
61               NoAvailableCpuFeatures(),
62               /*layer_name=*/"FC2") {
63   // Input-output chaining size checks.
64   RTC_DCHECK_EQ(input_.size(), hidden_.input_size())
65       << "The input and the hidden layers sizes do not match.";
66   RTC_DCHECK_EQ(hidden_.size(), output_.input_size())
67       << "The hidden and the output layers sizes do not match.";
68 }
69 
70 RnnVad::~RnnVad() = default;
71 
Reset()72 void RnnVad::Reset() {
73   hidden_.Reset();
74 }
75 
ComputeVadProbability(rtc::ArrayView<const float,kFeatureVectorSize> feature_vector,bool is_silence)76 float RnnVad::ComputeVadProbability(
77     rtc::ArrayView<const float, kFeatureVectorSize> feature_vector,
78     bool is_silence) {
79   if (is_silence) {
80     Reset();
81     return 0.f;
82   }
83   input_.ComputeOutput(feature_vector);
84   hidden_.ComputeOutput(input_);
85   output_.ComputeOutput(hidden_);
86   RTC_DCHECK_EQ(output_.size(), 1);
87   return output_.data()[0];
88 }
89 
90 }  // namespace rnn_vad
91 }  // namespace webrtc
92