xref: /aosp_15_r20/external/libopus/dnn/dump_data.c (revision a58d3d2adb790c104798cd88c8a3aff4fa8b82cc)
1*a58d3d2aSXin Li /* Copyright (c) 2017-2018 Mozilla */
2*a58d3d2aSXin Li /*
3*a58d3d2aSXin Li    Redistribution and use in source and binary forms, with or without
4*a58d3d2aSXin Li    modification, are permitted provided that the following conditions
5*a58d3d2aSXin Li    are met:
6*a58d3d2aSXin Li 
7*a58d3d2aSXin Li    - Redistributions of source code must retain the above copyright
8*a58d3d2aSXin Li    notice, this list of conditions and the following disclaimer.
9*a58d3d2aSXin Li 
10*a58d3d2aSXin Li    - Redistributions in binary form must reproduce the above copyright
11*a58d3d2aSXin Li    notice, this list of conditions and the following disclaimer in the
12*a58d3d2aSXin Li    documentation and/or other materials provided with the distribution.
13*a58d3d2aSXin Li 
14*a58d3d2aSXin Li    THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
15*a58d3d2aSXin Li    ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
16*a58d3d2aSXin Li    LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
17*a58d3d2aSXin Li    A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR
18*a58d3d2aSXin Li    CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
19*a58d3d2aSXin Li    EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
20*a58d3d2aSXin Li    PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
21*a58d3d2aSXin Li    PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
22*a58d3d2aSXin Li    LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
23*a58d3d2aSXin Li    NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
24*a58d3d2aSXin Li    SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
25*a58d3d2aSXin Li */
26*a58d3d2aSXin Li 
27*a58d3d2aSXin Li #ifdef HAVE_CONFIG_H
28*a58d3d2aSXin Li #include "config.h"
29*a58d3d2aSXin Li #endif
30*a58d3d2aSXin Li 
31*a58d3d2aSXin Li #include <stdlib.h>
32*a58d3d2aSXin Li #include <string.h>
33*a58d3d2aSXin Li #include <stdio.h>
34*a58d3d2aSXin Li #include <unistd.h>
35*a58d3d2aSXin Li #include "kiss_fft.h"
36*a58d3d2aSXin Li #include "common.h"
37*a58d3d2aSXin Li #include <math.h>
38*a58d3d2aSXin Li #include "freq.h"
39*a58d3d2aSXin Li #include "pitch.h"
40*a58d3d2aSXin Li #include "arch.h"
41*a58d3d2aSXin Li #include <assert.h>
42*a58d3d2aSXin Li #include "lpcnet.h"
43*a58d3d2aSXin Li #include "lpcnet_private.h"
44*a58d3d2aSXin Li #include "os_support.h"
45*a58d3d2aSXin Li #include "cpu_support.h"
46*a58d3d2aSXin Li 
47*a58d3d2aSXin Li 
biquad(float * y,float mem[2],const float * x,const float * b,const float * a,int N)48*a58d3d2aSXin Li static void biquad(float *y, float mem[2], const float *x, const float *b, const float *a, int N) {
49*a58d3d2aSXin Li   int i;
50*a58d3d2aSXin Li   for (i=0;i<N;i++) {
51*a58d3d2aSXin Li     float xi, yi;
52*a58d3d2aSXin Li     xi = x[i];
53*a58d3d2aSXin Li     yi = x[i] + mem[0];
54*a58d3d2aSXin Li     mem[0] = mem[1] + (b[0]*(double)xi - a[0]*(double)yi);
55*a58d3d2aSXin Li     mem[1] = (b[1]*(double)xi - a[1]*(double)yi);
56*a58d3d2aSXin Li     y[i] = yi;
57*a58d3d2aSXin Li   }
58*a58d3d2aSXin Li }
59*a58d3d2aSXin Li 
uni_rand(void)60*a58d3d2aSXin Li static float uni_rand(void) {
61*a58d3d2aSXin Li   return rand()/(double)RAND_MAX-.5;
62*a58d3d2aSXin Li }
63*a58d3d2aSXin Li 
rand_resp(float * a,float * b)64*a58d3d2aSXin Li static void rand_resp(float *a, float *b) {
65*a58d3d2aSXin Li   a[0] = .75*uni_rand();
66*a58d3d2aSXin Li   a[1] = .75*uni_rand();
67*a58d3d2aSXin Li   b[0] = .75*uni_rand();
68*a58d3d2aSXin Li   b[1] = .75*uni_rand();
69*a58d3d2aSXin Li }
70*a58d3d2aSXin Li 
compute_noise(int * noise,float noise_std)71*a58d3d2aSXin Li void compute_noise(int *noise, float noise_std) {
72*a58d3d2aSXin Li   int i;
73*a58d3d2aSXin Li   for (i=0;i<FRAME_SIZE;i++) {
74*a58d3d2aSXin Li     noise[i] = (int)floor(.5 + noise_std*.707*(log_approx(rand()/(float)RAND_MAX)-log_approx(rand()/(float)RAND_MAX)));
75*a58d3d2aSXin Li   }
76*a58d3d2aSXin Li }
77*a58d3d2aSXin Li 
float2short(float x)78*a58d3d2aSXin Li static opus_int16 float2short(float x)
79*a58d3d2aSXin Li {
80*a58d3d2aSXin Li   int i;
81*a58d3d2aSXin Li   i = (int)floor(.5+x);
82*a58d3d2aSXin Li   return IMAX(-32767, IMIN(32767, i));
83*a58d3d2aSXin Li }
84*a58d3d2aSXin Li 
85*a58d3d2aSXin Li 
write_audio(LPCNetEncState * st,const opus_int16 * pcm,const int * noise,FILE * file)86*a58d3d2aSXin Li void write_audio(LPCNetEncState *st, const opus_int16 *pcm, const int *noise, FILE *file) {
87*a58d3d2aSXin Li   int i;
88*a58d3d2aSXin Li   opus_int16 data[2*FRAME_SIZE];
89*a58d3d2aSXin Li   for (i=0;i<FRAME_SIZE;i++) {
90*a58d3d2aSXin Li     float p=0;
91*a58d3d2aSXin Li     float e;
92*a58d3d2aSXin Li     int j;
93*a58d3d2aSXin Li     for (j=0;j<LPC_ORDER;j++) p -= st->features[NB_BANDS+2+j]*st->sig_mem[j];
94*a58d3d2aSXin Li     e = lin2ulaw(pcm[i] - p);
95*a58d3d2aSXin Li     /* Signal in. */
96*a58d3d2aSXin Li     data[2*i] = float2short(st->sig_mem[0]);
97*a58d3d2aSXin Li     /* Signal out. */
98*a58d3d2aSXin Li     data[2*i+1] = pcm[i];
99*a58d3d2aSXin Li     /* Simulate error on excitation. */
100*a58d3d2aSXin Li     e += noise[i];
101*a58d3d2aSXin Li     e = IMIN(255, IMAX(0, e));
102*a58d3d2aSXin Li 
103*a58d3d2aSXin Li     OPUS_MOVE(&st->sig_mem[1], &st->sig_mem[0], LPC_ORDER-1);
104*a58d3d2aSXin Li     st->sig_mem[0] = p + ulaw2lin(e);
105*a58d3d2aSXin Li   }
106*a58d3d2aSXin Li   fwrite(data, 4*FRAME_SIZE, 1, file);
107*a58d3d2aSXin Li }
108*a58d3d2aSXin Li 
main(int argc,char ** argv)109*a58d3d2aSXin Li int main(int argc, char **argv) {
110*a58d3d2aSXin Li   int i;
111*a58d3d2aSXin Li   char *argv0;
112*a58d3d2aSXin Li   int count=0;
113*a58d3d2aSXin Li   static const float a_hp[2] = {-1.99599, 0.99600};
114*a58d3d2aSXin Li   static const float b_hp[2] = {-2, 1};
115*a58d3d2aSXin Li   float a_sig[2] = {0};
116*a58d3d2aSXin Li   float b_sig[2] = {0};
117*a58d3d2aSXin Li   float mem_hp_x[2]={0};
118*a58d3d2aSXin Li   float mem_resp_x[2]={0};
119*a58d3d2aSXin Li   float mem_preemph=0;
120*a58d3d2aSXin Li   float x[FRAME_SIZE];
121*a58d3d2aSXin Li   int gain_change_count=0;
122*a58d3d2aSXin Li   FILE *f1;
123*a58d3d2aSXin Li   FILE *ffeat;
124*a58d3d2aSXin Li   FILE *fpcm=NULL;
125*a58d3d2aSXin Li   opus_int16 pcm[FRAME_SIZE]={0};
126*a58d3d2aSXin Li   int noisebuf[FRAME_SIZE]={0};
127*a58d3d2aSXin Li   opus_int16 tmp[FRAME_SIZE] = {0};
128*a58d3d2aSXin Li   float speech_gain=1;
129*a58d3d2aSXin Li   float old_speech_gain = 1;
130*a58d3d2aSXin Li   int one_pass_completed = 0;
131*a58d3d2aSXin Li   LPCNetEncState *st;
132*a58d3d2aSXin Li   float noise_std=0;
133*a58d3d2aSXin Li   int training = -1;
134*a58d3d2aSXin Li   int burg = 0;
135*a58d3d2aSXin Li   int pitch = 0;
136*a58d3d2aSXin Li   FILE *fnoise = NULL;
137*a58d3d2aSXin Li   float noise_gain = 0;
138*a58d3d2aSXin Li   long noise_size=0;
139*a58d3d2aSXin Li   int arch;
140*a58d3d2aSXin Li   srand(getpid());
141*a58d3d2aSXin Li   arch = opus_select_arch();
142*a58d3d2aSXin Li   st = lpcnet_encoder_create();
143*a58d3d2aSXin Li   argv0=argv[0];
144*a58d3d2aSXin Li   if (argc == 5 && strcmp(argv[1], "-btrain")==0) {
145*a58d3d2aSXin Li       burg = 1;
146*a58d3d2aSXin Li       training = 1;
147*a58d3d2aSXin Li   }
148*a58d3d2aSXin Li   else if (argc == 4 && strcmp(argv[1], "-btest")==0) {
149*a58d3d2aSXin Li       burg = 1;
150*a58d3d2aSXin Li       training = 0;
151*a58d3d2aSXin Li   }
152*a58d3d2aSXin Li   else if (argc == 5 && strcmp(argv[1], "-ptrain")==0) {
153*a58d3d2aSXin Li       pitch = 1;
154*a58d3d2aSXin Li       training = 1;
155*a58d3d2aSXin Li       fnoise = fopen(argv[2], "rb");
156*a58d3d2aSXin Li       fseek(fnoise, 0, SEEK_END);
157*a58d3d2aSXin Li       noise_size = ftell(fnoise);
158*a58d3d2aSXin Li       fseek(fnoise, 0, SEEK_SET);
159*a58d3d2aSXin Li       argv++;
160*a58d3d2aSXin Li   }
161*a58d3d2aSXin Li   else if (argc == 4 && strcmp(argv[1], "-ptest")==0) {
162*a58d3d2aSXin Li       pitch = 1;
163*a58d3d2aSXin Li       training = 0;
164*a58d3d2aSXin Li   }
165*a58d3d2aSXin Li   else if (argc == 5 && strcmp(argv[1], "-train")==0) training = 1;
166*a58d3d2aSXin Li   else if (argc == 4 && strcmp(argv[1], "-test")==0) training = 0;
167*a58d3d2aSXin Li   if (training == -1) {
168*a58d3d2aSXin Li     fprintf(stderr, "usage: %s -train <speech> <features out> <pcm out>\n", argv0);
169*a58d3d2aSXin Li     fprintf(stderr, "  or   %s -test <speech> <features out>\n", argv0);
170*a58d3d2aSXin Li     return 1;
171*a58d3d2aSXin Li   }
172*a58d3d2aSXin Li   f1 = fopen(argv[2], "r");
173*a58d3d2aSXin Li   if (f1 == NULL) {
174*a58d3d2aSXin Li     fprintf(stderr,"Error opening input .s16 16kHz speech input file: %s\n", argv[2]);
175*a58d3d2aSXin Li     exit(1);
176*a58d3d2aSXin Li   }
177*a58d3d2aSXin Li   ffeat = fopen(argv[3], "wb");
178*a58d3d2aSXin Li   if (ffeat == NULL) {
179*a58d3d2aSXin Li     fprintf(stderr,"Error opening output feature file: %s\n", argv[3]);
180*a58d3d2aSXin Li     exit(1);
181*a58d3d2aSXin Li   }
182*a58d3d2aSXin Li   if (training && !pitch) {
183*a58d3d2aSXin Li     fpcm = fopen(argv[4], "wb");
184*a58d3d2aSXin Li     if (fpcm == NULL) {
185*a58d3d2aSXin Li       fprintf(stderr,"Error opening output PCM file: %s\n", argv[4]);
186*a58d3d2aSXin Li       exit(1);
187*a58d3d2aSXin Li     }
188*a58d3d2aSXin Li   }
189*a58d3d2aSXin Li   while (1) {
190*a58d3d2aSXin Li     size_t ret;
191*a58d3d2aSXin Li     ret = fread(tmp, sizeof(opus_int16), FRAME_SIZE, f1);
192*a58d3d2aSXin Li     if (feof(f1) || ret != FRAME_SIZE) {
193*a58d3d2aSXin Li       if (!training) break;
194*a58d3d2aSXin Li       rewind(f1);
195*a58d3d2aSXin Li       ret = fread(tmp, sizeof(opus_int16), FRAME_SIZE, f1);
196*a58d3d2aSXin Li       if (ret != FRAME_SIZE) {
197*a58d3d2aSXin Li         fprintf(stderr, "error reading\n");
198*a58d3d2aSXin Li         exit(1);
199*a58d3d2aSXin Li       }
200*a58d3d2aSXin Li       one_pass_completed = 1;
201*a58d3d2aSXin Li     }
202*a58d3d2aSXin Li     for (i=0;i<FRAME_SIZE;i++) x[i] = tmp[i];
203*a58d3d2aSXin Li     if (count*FRAME_SIZE_5MS>=10000000 && one_pass_completed) break;
204*a58d3d2aSXin Li     if (training && ++gain_change_count > 2821) {
205*a58d3d2aSXin Li       float tmp1, tmp2;
206*a58d3d2aSXin Li       speech_gain = pow(10., (-30+(rand()%40))/20.);
207*a58d3d2aSXin Li       if (rand()&1) speech_gain = -speech_gain;
208*a58d3d2aSXin Li       if (rand()%20==0) speech_gain *= .01;
209*a58d3d2aSXin Li       if (!pitch && rand()%100==0) speech_gain = 0;
210*a58d3d2aSXin Li       gain_change_count = 0;
211*a58d3d2aSXin Li       rand_resp(a_sig, b_sig);
212*a58d3d2aSXin Li       tmp1 = rand()/(float)RAND_MAX;
213*a58d3d2aSXin Li       tmp2 = rand()/(float)RAND_MAX;
214*a58d3d2aSXin Li       noise_std = ABS16(-1.5*log(1e-4+tmp1)-.5*log(1e-4+tmp2));
215*a58d3d2aSXin Li       if (fnoise != NULL) {
216*a58d3d2aSXin Li         long pos;
217*a58d3d2aSXin Li         /* Randomize the fraction because rand() only gives us 31 bits. */
218*a58d3d2aSXin Li         float frac_pos = rand()/(float)RAND_MAX;
219*a58d3d2aSXin Li         pos = (long)(frac_pos*noise_size);
220*a58d3d2aSXin Li         /* 32-bit alignment. */
221*a58d3d2aSXin Li         pos = pos/4 * 4;
222*a58d3d2aSXin Li         if (pos > noise_size-500000) pos = noise_size-500000;
223*a58d3d2aSXin Li         noise_gain = pow(10., (-15+(rand()%40))/20.);
224*a58d3d2aSXin Li         if (rand()%10==0) noise_gain = 0;
225*a58d3d2aSXin Li         fseek(fnoise, pos, SEEK_SET);
226*a58d3d2aSXin Li       }
227*a58d3d2aSXin Li     }
228*a58d3d2aSXin Li     if (fnoise != NULL) {
229*a58d3d2aSXin Li       opus_int16 noise[FRAME_SIZE];
230*a58d3d2aSXin Li       ret = fread(noise, sizeof(opus_int16), FRAME_SIZE, fnoise);
231*a58d3d2aSXin Li       for (i=0;i<FRAME_SIZE;i++) x[i] += noise[i]*noise_gain;
232*a58d3d2aSXin Li     }
233*a58d3d2aSXin Li     biquad(x, mem_hp_x, x, b_hp, a_hp, FRAME_SIZE);
234*a58d3d2aSXin Li     biquad(x, mem_resp_x, x, b_sig, a_sig, FRAME_SIZE);
235*a58d3d2aSXin Li     for (i=0;i<FRAME_SIZE;i++) {
236*a58d3d2aSXin Li       float g;
237*a58d3d2aSXin Li       float f = (float)i/FRAME_SIZE;
238*a58d3d2aSXin Li       g = f*speech_gain + (1-f)*old_speech_gain;
239*a58d3d2aSXin Li       x[i] *= g;
240*a58d3d2aSXin Li     }
241*a58d3d2aSXin Li     if (burg) {
242*a58d3d2aSXin Li       float ceps[2*NB_BANDS];
243*a58d3d2aSXin Li       burg_cepstral_analysis(ceps, x);
244*a58d3d2aSXin Li       fwrite(ceps, sizeof(float), 2*NB_BANDS, ffeat);
245*a58d3d2aSXin Li     }
246*a58d3d2aSXin Li     preemphasis(x, &mem_preemph, x, PREEMPHASIS, FRAME_SIZE);
247*a58d3d2aSXin Li     for (i=0;i<FRAME_SIZE;i++) x[i] += rand()/(float)RAND_MAX - .5f;
248*a58d3d2aSXin Li     /* PCM is delayed by 1/2 frame to make the features centered on the frames. */
249*a58d3d2aSXin Li     for (i=0;i<FRAME_SIZE-TRAINING_OFFSET;i++) pcm[i+TRAINING_OFFSET] = float2short(x[i]);
250*a58d3d2aSXin Li     compute_frame_features(st, x, arch);
251*a58d3d2aSXin Li 
252*a58d3d2aSXin Li     if (fpcm) {
253*a58d3d2aSXin Li         compute_noise(noisebuf, noise_std);
254*a58d3d2aSXin Li     }
255*a58d3d2aSXin Li 
256*a58d3d2aSXin Li     if (pitch) {
257*a58d3d2aSXin Li       signed char pitch_features[PITCH_MAX_PERIOD-PITCH_MIN_PERIOD+PITCH_IF_FEATURES];
258*a58d3d2aSXin Li       for (i=0;i<PITCH_MAX_PERIOD-PITCH_MIN_PERIOD;i++) {
259*a58d3d2aSXin Li         pitch_features[i] = (int)floor(.5f + 127.f*st->xcorr_features[i]);
260*a58d3d2aSXin Li       }
261*a58d3d2aSXin Li       for (i=0;i<PITCH_IF_FEATURES;i++) {
262*a58d3d2aSXin Li         pitch_features[i+PITCH_MAX_PERIOD-PITCH_MIN_PERIOD] = (int)floor(.5f + 127.f*st->if_features[i]);
263*a58d3d2aSXin Li       }
264*a58d3d2aSXin Li       fwrite(pitch_features, PITCH_MAX_PERIOD-PITCH_MIN_PERIOD+PITCH_IF_FEATURES, 1, ffeat);
265*a58d3d2aSXin Li     } else {
266*a58d3d2aSXin Li       fwrite(st->features, sizeof(float), NB_TOTAL_FEATURES, ffeat);
267*a58d3d2aSXin Li     }
268*a58d3d2aSXin Li     /*if(pitch) fwrite(pcm, FRAME_SIZE, 2, stdout);*/
269*a58d3d2aSXin Li     if (fpcm) write_audio(st, pcm, noisebuf, fpcm);
270*a58d3d2aSXin Li     /*if (fpcm) fwrite(pcm, sizeof(opus_int16), FRAME_SIZE, fpcm);*/
271*a58d3d2aSXin Li     for (i=0;i<TRAINING_OFFSET;i++) pcm[i] = float2short(x[i+FRAME_SIZE-TRAINING_OFFSET]);
272*a58d3d2aSXin Li     old_speech_gain = speech_gain;
273*a58d3d2aSXin Li     count++;
274*a58d3d2aSXin Li   }
275*a58d3d2aSXin Li   fclose(f1);
276*a58d3d2aSXin Li   fclose(ffeat);
277*a58d3d2aSXin Li   if (fpcm) fclose(fpcm);
278*a58d3d2aSXin Li   lpcnet_encoder_destroy(st);
279*a58d3d2aSXin Li   return 0;
280*a58d3d2aSXin Li }
281