xref: /aosp_15_r20/external/armnn/samples/SpeechRecognition/test/MFCCTest.cpp (revision 89c4ff92f2867872bb9e2354d150bf0c8c502810)
1 //
2 // Copyright © 2020 Arm Ltd and Contributors. All rights reserved.
3 // SPDX-License-Identifier: MIT
4 //
5 
6 #include <catch.hpp>
7 #include <limits>
8 
9 #include "Wav2LetterMFCC.hpp"
10 
11 const std::vector<float> testWav = std::vector<float>
12 {
13     -3.0f, 0.0f, 1.0f, -1.0f, 2.0f, 3.0f, -2.0f, 2.0f,
14             1.0f, -2.0f, 0.0f, 3.0f, -1.0f, 8.0f, 3.0f, 2.0f,
15             -1.0f, -1.0f, 2.0f, 7.0f, 3.0f, 5.0f, 6.0f, 6.0f,
16             6.0f, 12.0f, 5.0f, 6.0f, 3.0f, 3.0f, 5.0f, 4.0f,
17             4.0f, 6.0f, 7.0f, 7.0f, 7.0f, 3.0f, 7.0f, 2.0f,
18             8.0f, 4.0f, 4.0f, 2.0f, -4.0f, -1.0f, -1.0f, -4.0f,
19             2.0f, 1.0f, -1.0f, -4.0f, 0.0f, -7.0f, -6.0f, -2.0f,
20             -5.0f, 1.0f, -5.0f, -1.0f, -7.0f, -3.0f, -3.0f, -7.0f,
21             0.0f, -3.0f, 3.0f, -5.0f, 0.0f, 1.0f, -2.0f, -2.0f,
22             -3.0f, -3.0f, -7.0f, -3.0f, -2.0f, -6.0f, -5.0f, -8.0f,
23             -2.0f, -8.0f, 4.0f, -9.0f, -4.0f, -9.0f, -5.0f, -5.0f,
24             -3.0f, -9.0f, -3.0f, -9.0f, -1.0f, -7.0f, -4.0f, 1.0f,
25             -3.0f, 2.0f, -8.0f, -4.0f, -4.0f, -5.0f, 1.0f, -3.0f,
26             -1.0f, 0.0f, -1.0f, -2.0f, -3.0f, -2.0f, -4.0f, -1.0f,
27             1.0f, -1.0f, 3.0f, 0.0f, 3.0f, 2.0f, 0.0f, 0.0f,
28             0.0f, -3.0f, 1.0f, 1.0f, 0.0f, 8.0f, 3.0f, 4.0f,
29             1.0f, 5.0f, 6.0f, 4.0f, 7.0f, 3.0f, 3.0f, 0.0f,
30             3.0f, 6.0f, 7.0f, 6.0f, 4.0f, 5.0f, 9.0f, 9.0f,
31             5.0f, 5.0f, 8.0f, 1.0f, 6.0f, 9.0f, 6.0f, 6.0f,
32             7.0f, 1.0f, 8.0f, 1.0f, 5.0f, 0.0f, 5.0f, 5.0f,
33             0.0f, 3.0f, 2.0f, 7.0f, 2.0f, -3.0f, 3.0f, 0.0f,
34             3.0f, 0.0f, 0.0f, 0.0f, 2.0f, 0.0f, -1.0f, -1.0f,
35             -2.0f, -3.0f, -8.0f, 0.0f, 1.0f, 0.0f, -3.0f, -3.0f,
36             -3.0f, -2.0f, -3.0f, -3.0f, -4.0f, -6.0f, -2.0f, -8.0f,
37             -9.0f, -4.0f, -1.0f, -5.0f, -3.0f, -3.0f, -4.0f, -3.0f,
38             -6.0f, 3.0f, 0.0f, -1.0f, -2.0f, -9.0f, -4.0f, -2.0f,
39             2.0f, -1.0f, 3.0f, -5.0f, -5.0f, -2.0f, 0.0f, -2.0f,
40             0.0f, -1.0f, -3.0f, 1.0f, -2.0f, 9.0f, 4.0f, 5.0f,
41             2.0f, 2.0f, 1.0f, 0.0f, -6.0f, -2.0f, 0.0f, 0.0f,
42             0.0f, -1.0f, 4.0f, -4.0f, 3.0f, -7.0f, -1.0f, 5.0f,
43             -6.0f, -1.0f, -5.0f, 4.0f, 3.0f, 9.0f, -2.0f, 1.0f,
44             3.0f, 0.0f, 0.0f, -2.0f, 1.0f, 2.0f, 1.0f, 1.0f,
45             0.0f, 3.0f, 2.0f, -1.0f, 3.0f, -3.0f, 7.0f, 0.0f,
46             0.0f, 3.0f, 2.0f, 2.0f, -2.0f, 3.0f, -2.0f, 2.0f,
47             -3.0f, 4.0f, -1.0f, -1.0f, -5.0f, -1.0f, -3.0f, -2.0f,
48             1.0f, -1.0f, 3.0f, 2.0f, 4.0f, 1.0f, 2.0f, -2.0f,
49             0.0f, 2.0f, 7.0f, 0.0f, 8.0f, -3.0f, 6.0f, -3.0f,
50             6.0f, 1.0f, 2.0f, -3.0f, -1.0f, -1.0f, -1.0f, 1.0f,
51             -2.0f, 2.0f, 1.0f, 2.0f, 0.0f, -2.0f, 3.0f, -2.0f,
52             3.0f, -2.0f, 1.0f, 0.0f, -3.0f, -1.0f, -2.0f, -4.0f,
53             -6.0f, -5.0f, -8.0f, -1.0f, -4.0f, 0.0f, -3.0f, -1.0f,
54             -1.0f, -1.0f, 0.0f, -2.0f, -3.0f, -7.0f, -1.0f, 0.0f,
55             1.0f, 5.0f, 0.0f, 5.0f, 1.0f, 1.0f, -3.0f, 0.0f,
56             -6.0f, 3.0f, -8.0f, 4.0f, -8.0f, 6.0f, -6.0f, 1.0f,
57             -6.0f, -2.0f, -5.0f, -6.0f, 0.0f, -5.0f, 4.0f, -1.0f,
58             4.0f, -2.0f, 1.0f, 2.0f, 1.0f, 0.0f, -2.0f, 0.0f,
59             0.0f, 2.0f, -2.0f, 2.0f, -5.0f, 2.0f, 0.0f, -2.0f,
60             1.0f, -2.0f, 0.0f, 5.0f, 1.0f, 0.0f, 1.0f, 5.0f,
61             0.0f, 8.0f, 3.0f, 2.0f, 2.0f, 0.0f, 5.0f, -2.0f,
62             3.0f, 1.0f, 0.0f, 1.0f, 0.0f, -2.0f, -1.0f, -3.0f,
63             1.0f, -1.0f, 3.0f, 0.0f, 3.0f, 0.0f, -2.0f, -1.0f,
64             -4.0f, -4.0f, -4.0f, -1.0f, -4.0f, -4.0f, -3.0f, -6.0f,
65             -3.0f, -7.0f, -3.0f, -1.0f, -2.0f, 0.0f, -5.0f, -4.0f,
66             -7.0f, -3.0f, -2.0f, -2.0f, 1.0f, 2.0f, 2.0f, 8.0f,
67             5.0f, 4.0f, 2.0f, 4.0f, 3.0f, 5.0f, 0.0f, 3.0f,
68             3.0f, 6.0f, 4.0f, 2.0f, 2.0f, -2.0f, 4.0f, -2.0f,
69             3.0f, 3.0f, 2.0f, 1.0f, 1.0f, 4.0f, -5.0f, 2.0f,
70             -3.0f, 0.0f, -1.0f, 1.0f, -2.0f, 2.0f, 5.0f, 1.0f,
71             4.0f, 2.0f, 3.0f, 1.0f, -1.0f, 1.0f, 0.0f, 6.0f,
72             0.0f, -2.0f, -1.0f, 1.0f, -1.0f, 2.0f, -5.0f, -1.0f,
73             -5.0f, -1.0f, -6.0f, -3.0f, -3.0f, 2.0f, 4.0f, 0.0f,
74             -1.0f, -5.0f, 3.0f, -4.0f, -1.0f, -3.0f, -4.0f, 1.0f,
75             -4.0f, 1.0f, -1.0f, -1.0f, 0.0f, -5.0f, -4.0f, -2.0f,
76             -1.0f, -1.0f, -3.0f, -7.0f, -3.0f, -3.0f, 4.0f, 4.0f
77 };
78 
79 TEST_CASE("Test MFCC")
80 {
81     int sampFreq = 16000;
82     int frameLenMs = 32;
83     int frameLenSamples = sampFreq * frameLenMs * 0.001;
84     int numMfccFeats = 13;
85 
86     std::vector<float> fullAudioData;
87 
88     for (auto f : testWav)
89     {
90         fullAudioData.emplace_back( f / (1<<15));
91     }
92 
93     MfccParams mfccParams(sampFreq, 128, 0, 8000, numMfccFeats,
94                           frameLenSamples, false, 1);
95 
96     Wav2LetterMFCC mfccInst = Wav2LetterMFCC(mfccParams);
97     mfccInst.Init();
98     auto mfccOutput = mfccInst.MfccCompute(fullAudioData);
99 
100     std::vector<float> expected = { -834.96564f, 21.02699f, 18.62856f, 7.3412f, 18.90791f, -5.36034f, 6.52351f,
101                                     -11.27064f, 8.37522f, 12.0672f, 8.30833f, -13.50008f, -18.1761f};
102 
103     REQUIRE_THAT(mfccOutput, Catch::Approx(expected).epsilon(1.e-5) );
104 }