1*a58d3d2aSXin Li /* Copyright (c) 2017-2018 Mozilla */
2*a58d3d2aSXin Li /*
3*a58d3d2aSXin Li Redistribution and use in source and binary forms, with or without
4*a58d3d2aSXin Li modification, are permitted provided that the following conditions
5*a58d3d2aSXin Li are met:
6*a58d3d2aSXin Li
7*a58d3d2aSXin Li - Redistributions of source code must retain the above copyright
8*a58d3d2aSXin Li notice, this list of conditions and the following disclaimer.
9*a58d3d2aSXin Li
10*a58d3d2aSXin Li - Redistributions in binary form must reproduce the above copyright
11*a58d3d2aSXin Li notice, this list of conditions and the following disclaimer in the
12*a58d3d2aSXin Li documentation and/or other materials provided with the distribution.
13*a58d3d2aSXin Li
14*a58d3d2aSXin Li THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
15*a58d3d2aSXin Li ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
16*a58d3d2aSXin Li LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
17*a58d3d2aSXin Li A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR
18*a58d3d2aSXin Li CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
19*a58d3d2aSXin Li EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
20*a58d3d2aSXin Li PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
21*a58d3d2aSXin Li PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
22*a58d3d2aSXin Li LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
23*a58d3d2aSXin Li NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
24*a58d3d2aSXin Li SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
25*a58d3d2aSXin Li */
26*a58d3d2aSXin Li
27*a58d3d2aSXin Li #ifdef HAVE_CONFIG_H
28*a58d3d2aSXin Li #include "config.h"
29*a58d3d2aSXin Li #endif
30*a58d3d2aSXin Li
31*a58d3d2aSXin Li #include <stdlib.h>
32*a58d3d2aSXin Li #include <string.h>
33*a58d3d2aSXin Li #include <stdio.h>
34*a58d3d2aSXin Li #include <unistd.h>
35*a58d3d2aSXin Li #include "kiss_fft.h"
36*a58d3d2aSXin Li #include "common.h"
37*a58d3d2aSXin Li #include <math.h>
38*a58d3d2aSXin Li #include "freq.h"
39*a58d3d2aSXin Li #include "pitch.h"
40*a58d3d2aSXin Li #include "arch.h"
41*a58d3d2aSXin Li #include <assert.h>
42*a58d3d2aSXin Li #include "lpcnet.h"
43*a58d3d2aSXin Li #include "lpcnet_private.h"
44*a58d3d2aSXin Li #include "os_support.h"
45*a58d3d2aSXin Li #include "cpu_support.h"
46*a58d3d2aSXin Li
47*a58d3d2aSXin Li
biquad(float * y,float mem[2],const float * x,const float * b,const float * a,int N)48*a58d3d2aSXin Li static void biquad(float *y, float mem[2], const float *x, const float *b, const float *a, int N) {
49*a58d3d2aSXin Li int i;
50*a58d3d2aSXin Li for (i=0;i<N;i++) {
51*a58d3d2aSXin Li float xi, yi;
52*a58d3d2aSXin Li xi = x[i];
53*a58d3d2aSXin Li yi = x[i] + mem[0];
54*a58d3d2aSXin Li mem[0] = mem[1] + (b[0]*(double)xi - a[0]*(double)yi);
55*a58d3d2aSXin Li mem[1] = (b[1]*(double)xi - a[1]*(double)yi);
56*a58d3d2aSXin Li y[i] = yi;
57*a58d3d2aSXin Li }
58*a58d3d2aSXin Li }
59*a58d3d2aSXin Li
uni_rand(void)60*a58d3d2aSXin Li static float uni_rand(void) {
61*a58d3d2aSXin Li return rand()/(double)RAND_MAX-.5;
62*a58d3d2aSXin Li }
63*a58d3d2aSXin Li
rand_resp(float * a,float * b)64*a58d3d2aSXin Li static void rand_resp(float *a, float *b) {
65*a58d3d2aSXin Li a[0] = .75*uni_rand();
66*a58d3d2aSXin Li a[1] = .75*uni_rand();
67*a58d3d2aSXin Li b[0] = .75*uni_rand();
68*a58d3d2aSXin Li b[1] = .75*uni_rand();
69*a58d3d2aSXin Li }
70*a58d3d2aSXin Li
compute_noise(int * noise,float noise_std)71*a58d3d2aSXin Li void compute_noise(int *noise, float noise_std) {
72*a58d3d2aSXin Li int i;
73*a58d3d2aSXin Li for (i=0;i<FRAME_SIZE;i++) {
74*a58d3d2aSXin Li noise[i] = (int)floor(.5 + noise_std*.707*(log_approx(rand()/(float)RAND_MAX)-log_approx(rand()/(float)RAND_MAX)));
75*a58d3d2aSXin Li }
76*a58d3d2aSXin Li }
77*a58d3d2aSXin Li
float2short(float x)78*a58d3d2aSXin Li static opus_int16 float2short(float x)
79*a58d3d2aSXin Li {
80*a58d3d2aSXin Li int i;
81*a58d3d2aSXin Li i = (int)floor(.5+x);
82*a58d3d2aSXin Li return IMAX(-32767, IMIN(32767, i));
83*a58d3d2aSXin Li }
84*a58d3d2aSXin Li
85*a58d3d2aSXin Li
write_audio(LPCNetEncState * st,const opus_int16 * pcm,const int * noise,FILE * file)86*a58d3d2aSXin Li void write_audio(LPCNetEncState *st, const opus_int16 *pcm, const int *noise, FILE *file) {
87*a58d3d2aSXin Li int i;
88*a58d3d2aSXin Li opus_int16 data[2*FRAME_SIZE];
89*a58d3d2aSXin Li for (i=0;i<FRAME_SIZE;i++) {
90*a58d3d2aSXin Li float p=0;
91*a58d3d2aSXin Li float e;
92*a58d3d2aSXin Li int j;
93*a58d3d2aSXin Li for (j=0;j<LPC_ORDER;j++) p -= st->features[NB_BANDS+2+j]*st->sig_mem[j];
94*a58d3d2aSXin Li e = lin2ulaw(pcm[i] - p);
95*a58d3d2aSXin Li /* Signal in. */
96*a58d3d2aSXin Li data[2*i] = float2short(st->sig_mem[0]);
97*a58d3d2aSXin Li /* Signal out. */
98*a58d3d2aSXin Li data[2*i+1] = pcm[i];
99*a58d3d2aSXin Li /* Simulate error on excitation. */
100*a58d3d2aSXin Li e += noise[i];
101*a58d3d2aSXin Li e = IMIN(255, IMAX(0, e));
102*a58d3d2aSXin Li
103*a58d3d2aSXin Li OPUS_MOVE(&st->sig_mem[1], &st->sig_mem[0], LPC_ORDER-1);
104*a58d3d2aSXin Li st->sig_mem[0] = p + ulaw2lin(e);
105*a58d3d2aSXin Li }
106*a58d3d2aSXin Li fwrite(data, 4*FRAME_SIZE, 1, file);
107*a58d3d2aSXin Li }
108*a58d3d2aSXin Li
main(int argc,char ** argv)109*a58d3d2aSXin Li int main(int argc, char **argv) {
110*a58d3d2aSXin Li int i;
111*a58d3d2aSXin Li char *argv0;
112*a58d3d2aSXin Li int count=0;
113*a58d3d2aSXin Li static const float a_hp[2] = {-1.99599, 0.99600};
114*a58d3d2aSXin Li static const float b_hp[2] = {-2, 1};
115*a58d3d2aSXin Li float a_sig[2] = {0};
116*a58d3d2aSXin Li float b_sig[2] = {0};
117*a58d3d2aSXin Li float mem_hp_x[2]={0};
118*a58d3d2aSXin Li float mem_resp_x[2]={0};
119*a58d3d2aSXin Li float mem_preemph=0;
120*a58d3d2aSXin Li float x[FRAME_SIZE];
121*a58d3d2aSXin Li int gain_change_count=0;
122*a58d3d2aSXin Li FILE *f1;
123*a58d3d2aSXin Li FILE *ffeat;
124*a58d3d2aSXin Li FILE *fpcm=NULL;
125*a58d3d2aSXin Li opus_int16 pcm[FRAME_SIZE]={0};
126*a58d3d2aSXin Li int noisebuf[FRAME_SIZE]={0};
127*a58d3d2aSXin Li opus_int16 tmp[FRAME_SIZE] = {0};
128*a58d3d2aSXin Li float speech_gain=1;
129*a58d3d2aSXin Li float old_speech_gain = 1;
130*a58d3d2aSXin Li int one_pass_completed = 0;
131*a58d3d2aSXin Li LPCNetEncState *st;
132*a58d3d2aSXin Li float noise_std=0;
133*a58d3d2aSXin Li int training = -1;
134*a58d3d2aSXin Li int burg = 0;
135*a58d3d2aSXin Li int pitch = 0;
136*a58d3d2aSXin Li FILE *fnoise = NULL;
137*a58d3d2aSXin Li float noise_gain = 0;
138*a58d3d2aSXin Li long noise_size=0;
139*a58d3d2aSXin Li int arch;
140*a58d3d2aSXin Li srand(getpid());
141*a58d3d2aSXin Li arch = opus_select_arch();
142*a58d3d2aSXin Li st = lpcnet_encoder_create();
143*a58d3d2aSXin Li argv0=argv[0];
144*a58d3d2aSXin Li if (argc == 5 && strcmp(argv[1], "-btrain")==0) {
145*a58d3d2aSXin Li burg = 1;
146*a58d3d2aSXin Li training = 1;
147*a58d3d2aSXin Li }
148*a58d3d2aSXin Li else if (argc == 4 && strcmp(argv[1], "-btest")==0) {
149*a58d3d2aSXin Li burg = 1;
150*a58d3d2aSXin Li training = 0;
151*a58d3d2aSXin Li }
152*a58d3d2aSXin Li else if (argc == 5 && strcmp(argv[1], "-ptrain")==0) {
153*a58d3d2aSXin Li pitch = 1;
154*a58d3d2aSXin Li training = 1;
155*a58d3d2aSXin Li fnoise = fopen(argv[2], "rb");
156*a58d3d2aSXin Li fseek(fnoise, 0, SEEK_END);
157*a58d3d2aSXin Li noise_size = ftell(fnoise);
158*a58d3d2aSXin Li fseek(fnoise, 0, SEEK_SET);
159*a58d3d2aSXin Li argv++;
160*a58d3d2aSXin Li }
161*a58d3d2aSXin Li else if (argc == 4 && strcmp(argv[1], "-ptest")==0) {
162*a58d3d2aSXin Li pitch = 1;
163*a58d3d2aSXin Li training = 0;
164*a58d3d2aSXin Li }
165*a58d3d2aSXin Li else if (argc == 5 && strcmp(argv[1], "-train")==0) training = 1;
166*a58d3d2aSXin Li else if (argc == 4 && strcmp(argv[1], "-test")==0) training = 0;
167*a58d3d2aSXin Li if (training == -1) {
168*a58d3d2aSXin Li fprintf(stderr, "usage: %s -train <speech> <features out> <pcm out>\n", argv0);
169*a58d3d2aSXin Li fprintf(stderr, " or %s -test <speech> <features out>\n", argv0);
170*a58d3d2aSXin Li return 1;
171*a58d3d2aSXin Li }
172*a58d3d2aSXin Li f1 = fopen(argv[2], "r");
173*a58d3d2aSXin Li if (f1 == NULL) {
174*a58d3d2aSXin Li fprintf(stderr,"Error opening input .s16 16kHz speech input file: %s\n", argv[2]);
175*a58d3d2aSXin Li exit(1);
176*a58d3d2aSXin Li }
177*a58d3d2aSXin Li ffeat = fopen(argv[3], "wb");
178*a58d3d2aSXin Li if (ffeat == NULL) {
179*a58d3d2aSXin Li fprintf(stderr,"Error opening output feature file: %s\n", argv[3]);
180*a58d3d2aSXin Li exit(1);
181*a58d3d2aSXin Li }
182*a58d3d2aSXin Li if (training && !pitch) {
183*a58d3d2aSXin Li fpcm = fopen(argv[4], "wb");
184*a58d3d2aSXin Li if (fpcm == NULL) {
185*a58d3d2aSXin Li fprintf(stderr,"Error opening output PCM file: %s\n", argv[4]);
186*a58d3d2aSXin Li exit(1);
187*a58d3d2aSXin Li }
188*a58d3d2aSXin Li }
189*a58d3d2aSXin Li while (1) {
190*a58d3d2aSXin Li size_t ret;
191*a58d3d2aSXin Li ret = fread(tmp, sizeof(opus_int16), FRAME_SIZE, f1);
192*a58d3d2aSXin Li if (feof(f1) || ret != FRAME_SIZE) {
193*a58d3d2aSXin Li if (!training) break;
194*a58d3d2aSXin Li rewind(f1);
195*a58d3d2aSXin Li ret = fread(tmp, sizeof(opus_int16), FRAME_SIZE, f1);
196*a58d3d2aSXin Li if (ret != FRAME_SIZE) {
197*a58d3d2aSXin Li fprintf(stderr, "error reading\n");
198*a58d3d2aSXin Li exit(1);
199*a58d3d2aSXin Li }
200*a58d3d2aSXin Li one_pass_completed = 1;
201*a58d3d2aSXin Li }
202*a58d3d2aSXin Li for (i=0;i<FRAME_SIZE;i++) x[i] = tmp[i];
203*a58d3d2aSXin Li if (count*FRAME_SIZE_5MS>=10000000 && one_pass_completed) break;
204*a58d3d2aSXin Li if (training && ++gain_change_count > 2821) {
205*a58d3d2aSXin Li float tmp1, tmp2;
206*a58d3d2aSXin Li speech_gain = pow(10., (-30+(rand()%40))/20.);
207*a58d3d2aSXin Li if (rand()&1) speech_gain = -speech_gain;
208*a58d3d2aSXin Li if (rand()%20==0) speech_gain *= .01;
209*a58d3d2aSXin Li if (!pitch && rand()%100==0) speech_gain = 0;
210*a58d3d2aSXin Li gain_change_count = 0;
211*a58d3d2aSXin Li rand_resp(a_sig, b_sig);
212*a58d3d2aSXin Li tmp1 = rand()/(float)RAND_MAX;
213*a58d3d2aSXin Li tmp2 = rand()/(float)RAND_MAX;
214*a58d3d2aSXin Li noise_std = ABS16(-1.5*log(1e-4+tmp1)-.5*log(1e-4+tmp2));
215*a58d3d2aSXin Li if (fnoise != NULL) {
216*a58d3d2aSXin Li long pos;
217*a58d3d2aSXin Li /* Randomize the fraction because rand() only gives us 31 bits. */
218*a58d3d2aSXin Li float frac_pos = rand()/(float)RAND_MAX;
219*a58d3d2aSXin Li pos = (long)(frac_pos*noise_size);
220*a58d3d2aSXin Li /* 32-bit alignment. */
221*a58d3d2aSXin Li pos = pos/4 * 4;
222*a58d3d2aSXin Li if (pos > noise_size-500000) pos = noise_size-500000;
223*a58d3d2aSXin Li noise_gain = pow(10., (-15+(rand()%40))/20.);
224*a58d3d2aSXin Li if (rand()%10==0) noise_gain = 0;
225*a58d3d2aSXin Li fseek(fnoise, pos, SEEK_SET);
226*a58d3d2aSXin Li }
227*a58d3d2aSXin Li }
228*a58d3d2aSXin Li if (fnoise != NULL) {
229*a58d3d2aSXin Li opus_int16 noise[FRAME_SIZE];
230*a58d3d2aSXin Li ret = fread(noise, sizeof(opus_int16), FRAME_SIZE, fnoise);
231*a58d3d2aSXin Li for (i=0;i<FRAME_SIZE;i++) x[i] += noise[i]*noise_gain;
232*a58d3d2aSXin Li }
233*a58d3d2aSXin Li biquad(x, mem_hp_x, x, b_hp, a_hp, FRAME_SIZE);
234*a58d3d2aSXin Li biquad(x, mem_resp_x, x, b_sig, a_sig, FRAME_SIZE);
235*a58d3d2aSXin Li for (i=0;i<FRAME_SIZE;i++) {
236*a58d3d2aSXin Li float g;
237*a58d3d2aSXin Li float f = (float)i/FRAME_SIZE;
238*a58d3d2aSXin Li g = f*speech_gain + (1-f)*old_speech_gain;
239*a58d3d2aSXin Li x[i] *= g;
240*a58d3d2aSXin Li }
241*a58d3d2aSXin Li if (burg) {
242*a58d3d2aSXin Li float ceps[2*NB_BANDS];
243*a58d3d2aSXin Li burg_cepstral_analysis(ceps, x);
244*a58d3d2aSXin Li fwrite(ceps, sizeof(float), 2*NB_BANDS, ffeat);
245*a58d3d2aSXin Li }
246*a58d3d2aSXin Li preemphasis(x, &mem_preemph, x, PREEMPHASIS, FRAME_SIZE);
247*a58d3d2aSXin Li for (i=0;i<FRAME_SIZE;i++) x[i] += rand()/(float)RAND_MAX - .5f;
248*a58d3d2aSXin Li /* PCM is delayed by 1/2 frame to make the features centered on the frames. */
249*a58d3d2aSXin Li for (i=0;i<FRAME_SIZE-TRAINING_OFFSET;i++) pcm[i+TRAINING_OFFSET] = float2short(x[i]);
250*a58d3d2aSXin Li compute_frame_features(st, x, arch);
251*a58d3d2aSXin Li
252*a58d3d2aSXin Li if (fpcm) {
253*a58d3d2aSXin Li compute_noise(noisebuf, noise_std);
254*a58d3d2aSXin Li }
255*a58d3d2aSXin Li
256*a58d3d2aSXin Li if (pitch) {
257*a58d3d2aSXin Li signed char pitch_features[PITCH_MAX_PERIOD-PITCH_MIN_PERIOD+PITCH_IF_FEATURES];
258*a58d3d2aSXin Li for (i=0;i<PITCH_MAX_PERIOD-PITCH_MIN_PERIOD;i++) {
259*a58d3d2aSXin Li pitch_features[i] = (int)floor(.5f + 127.f*st->xcorr_features[i]);
260*a58d3d2aSXin Li }
261*a58d3d2aSXin Li for (i=0;i<PITCH_IF_FEATURES;i++) {
262*a58d3d2aSXin Li pitch_features[i+PITCH_MAX_PERIOD-PITCH_MIN_PERIOD] = (int)floor(.5f + 127.f*st->if_features[i]);
263*a58d3d2aSXin Li }
264*a58d3d2aSXin Li fwrite(pitch_features, PITCH_MAX_PERIOD-PITCH_MIN_PERIOD+PITCH_IF_FEATURES, 1, ffeat);
265*a58d3d2aSXin Li } else {
266*a58d3d2aSXin Li fwrite(st->features, sizeof(float), NB_TOTAL_FEATURES, ffeat);
267*a58d3d2aSXin Li }
268*a58d3d2aSXin Li /*if(pitch) fwrite(pcm, FRAME_SIZE, 2, stdout);*/
269*a58d3d2aSXin Li if (fpcm) write_audio(st, pcm, noisebuf, fpcm);
270*a58d3d2aSXin Li /*if (fpcm) fwrite(pcm, sizeof(opus_int16), FRAME_SIZE, fpcm);*/
271*a58d3d2aSXin Li for (i=0;i<TRAINING_OFFSET;i++) pcm[i] = float2short(x[i+FRAME_SIZE-TRAINING_OFFSET]);
272*a58d3d2aSXin Li old_speech_gain = speech_gain;
273*a58d3d2aSXin Li count++;
274*a58d3d2aSXin Li }
275*a58d3d2aSXin Li fclose(f1);
276*a58d3d2aSXin Li fclose(ffeat);
277*a58d3d2aSXin Li if (fpcm) fclose(fpcm);
278*a58d3d2aSXin Li lpcnet_encoder_destroy(st);
279*a58d3d2aSXin Li return 0;
280*a58d3d2aSXin Li }
281