1 /*
2 * Copyright (c) 2018 The WebRTC project authors. All Rights Reserved.
3 *
4 * Use of this source code is governed by a BSD-style license
5 * that can be found in the LICENSE file in the root of the source
6 * tree. An additional intellectual property rights grant can be found
7 * in the file PATENTS. All contributing project authors may
8 * be found in the AUTHORS file in the root of the source tree.
9 */
10
11 #include "modules/audio_processing/agc2/rnn_vad/rnn.h"
12
13 #include "rtc_base/checks.h"
14 #include "third_party/rnnoise/src/rnn_vad_weights.h"
15
16 namespace webrtc {
17 namespace rnn_vad {
18 namespace {
19
20 using ::rnnoise::kInputLayerInputSize;
21 static_assert(kFeatureVectorSize == kInputLayerInputSize, "");
22 using ::rnnoise::kInputDenseBias;
23 using ::rnnoise::kInputDenseWeights;
24 using ::rnnoise::kInputLayerOutputSize;
25 static_assert(kInputLayerOutputSize <= kFullyConnectedLayerMaxUnits, "");
26
27 using ::rnnoise::kHiddenGruBias;
28 using ::rnnoise::kHiddenGruRecurrentWeights;
29 using ::rnnoise::kHiddenGruWeights;
30 using ::rnnoise::kHiddenLayerOutputSize;
31 static_assert(kHiddenLayerOutputSize <= kGruLayerMaxUnits, "");
32
33 using ::rnnoise::kOutputDenseBias;
34 using ::rnnoise::kOutputDenseWeights;
35 using ::rnnoise::kOutputLayerOutputSize;
36 static_assert(kOutputLayerOutputSize <= kFullyConnectedLayerMaxUnits, "");
37
38 } // namespace
39
RnnVad(const AvailableCpuFeatures & cpu_features)40 RnnVad::RnnVad(const AvailableCpuFeatures& cpu_features)
41 : input_(kInputLayerInputSize,
42 kInputLayerOutputSize,
43 kInputDenseBias,
44 kInputDenseWeights,
45 ActivationFunction::kTansigApproximated,
46 cpu_features,
47 /*layer_name=*/"FC1"),
48 hidden_(kInputLayerOutputSize,
49 kHiddenLayerOutputSize,
50 kHiddenGruBias,
51 kHiddenGruWeights,
52 kHiddenGruRecurrentWeights,
53 cpu_features,
54 /*layer_name=*/"GRU1"),
55 output_(kHiddenLayerOutputSize,
56 kOutputLayerOutputSize,
57 kOutputDenseBias,
58 kOutputDenseWeights,
59 ActivationFunction::kSigmoidApproximated,
60 // The output layer is just 24x1. The unoptimized code is faster.
61 NoAvailableCpuFeatures(),
62 /*layer_name=*/"FC2") {
63 // Input-output chaining size checks.
64 RTC_DCHECK_EQ(input_.size(), hidden_.input_size())
65 << "The input and the hidden layers sizes do not match.";
66 RTC_DCHECK_EQ(hidden_.size(), output_.input_size())
67 << "The hidden and the output layers sizes do not match.";
68 }
69
70 RnnVad::~RnnVad() = default;
71
Reset()72 void RnnVad::Reset() {
73 hidden_.Reset();
74 }
75
ComputeVadProbability(rtc::ArrayView<const float,kFeatureVectorSize> feature_vector,bool is_silence)76 float RnnVad::ComputeVadProbability(
77 rtc::ArrayView<const float, kFeatureVectorSize> feature_vector,
78 bool is_silence) {
79 if (is_silence) {
80 Reset();
81 return 0.f;
82 }
83 input_.ComputeOutput(feature_vector);
84 hidden_.ComputeOutput(input_);
85 output_.ComputeOutput(hidden_);
86 RTC_DCHECK_EQ(output_.size(), 1);
87 return output_.data()[0];
88 }
89
90 } // namespace rnn_vad
91 } // namespace webrtc
92