1*a58d3d2aSXin Li /* Copyright (c) 2017-2019 Mozilla */
2*a58d3d2aSXin Li /*
3*a58d3d2aSXin Li Redistribution and use in source and binary forms, with or without
4*a58d3d2aSXin Li modification, are permitted provided that the following conditions
5*a58d3d2aSXin Li are met:
6*a58d3d2aSXin Li
7*a58d3d2aSXin Li - Redistributions of source code must retain the above copyright
8*a58d3d2aSXin Li notice, this list of conditions and the following disclaimer.
9*a58d3d2aSXin Li
10*a58d3d2aSXin Li - Redistributions in binary form must reproduce the above copyright
11*a58d3d2aSXin Li notice, this list of conditions and the following disclaimer in the
12*a58d3d2aSXin Li documentation and/or other materials provided with the distribution.
13*a58d3d2aSXin Li
14*a58d3d2aSXin Li THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
15*a58d3d2aSXin Li ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
16*a58d3d2aSXin Li LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
17*a58d3d2aSXin Li A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR
18*a58d3d2aSXin Li CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
19*a58d3d2aSXin Li EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
20*a58d3d2aSXin Li PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
21*a58d3d2aSXin Li PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
22*a58d3d2aSXin Li LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
23*a58d3d2aSXin Li NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
24*a58d3d2aSXin Li SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
25*a58d3d2aSXin Li */
26*a58d3d2aSXin Li
27*a58d3d2aSXin Li #ifdef HAVE_CONFIG_H
28*a58d3d2aSXin Li #include "config.h"
29*a58d3d2aSXin Li #endif
30*a58d3d2aSXin Li
31*a58d3d2aSXin Li #include <stdlib.h>
32*a58d3d2aSXin Li #include <string.h>
33*a58d3d2aSXin Li #include <stdio.h>
34*a58d3d2aSXin Li #include "kiss_fft.h"
35*a58d3d2aSXin Li #include "common.h"
36*a58d3d2aSXin Li #include <math.h>
37*a58d3d2aSXin Li #include "freq.h"
38*a58d3d2aSXin Li #include "pitch.h"
39*a58d3d2aSXin Li #include "arch.h"
40*a58d3d2aSXin Li #include <assert.h>
41*a58d3d2aSXin Li #include "lpcnet_private.h"
42*a58d3d2aSXin Li #include "lpcnet.h"
43*a58d3d2aSXin Li #include "os_support.h"
44*a58d3d2aSXin Li #include "_kiss_fft_guts.h"
45*a58d3d2aSXin Li #include "celt_lpc.h"
46*a58d3d2aSXin Li #include "mathops.h"
47*a58d3d2aSXin Li
48*a58d3d2aSXin Li
lpcnet_encoder_get_size(void)49*a58d3d2aSXin Li int lpcnet_encoder_get_size(void) {
50*a58d3d2aSXin Li return sizeof(LPCNetEncState);
51*a58d3d2aSXin Li }
52*a58d3d2aSXin Li
lpcnet_encoder_init(LPCNetEncState * st)53*a58d3d2aSXin Li int lpcnet_encoder_init(LPCNetEncState *st) {
54*a58d3d2aSXin Li memset(st, 0, sizeof(*st));
55*a58d3d2aSXin Li pitchdnn_init(&st->pitchdnn);
56*a58d3d2aSXin Li return 0;
57*a58d3d2aSXin Li }
58*a58d3d2aSXin Li
lpcnet_encoder_load_model(LPCNetEncState * st,const void * data,int len)59*a58d3d2aSXin Li int lpcnet_encoder_load_model(LPCNetEncState *st, const void *data, int len) {
60*a58d3d2aSXin Li return pitchdnn_load_model(&st->pitchdnn, data, len);
61*a58d3d2aSXin Li }
62*a58d3d2aSXin Li
lpcnet_encoder_create(void)63*a58d3d2aSXin Li LPCNetEncState *lpcnet_encoder_create(void) {
64*a58d3d2aSXin Li LPCNetEncState *st;
65*a58d3d2aSXin Li st = opus_alloc(lpcnet_encoder_get_size());
66*a58d3d2aSXin Li lpcnet_encoder_init(st);
67*a58d3d2aSXin Li return st;
68*a58d3d2aSXin Li }
69*a58d3d2aSXin Li
lpcnet_encoder_destroy(LPCNetEncState * st)70*a58d3d2aSXin Li void lpcnet_encoder_destroy(LPCNetEncState *st) {
71*a58d3d2aSXin Li opus_free(st);
72*a58d3d2aSXin Li }
73*a58d3d2aSXin Li
frame_analysis(LPCNetEncState * st,kiss_fft_cpx * X,float * Ex,const float * in)74*a58d3d2aSXin Li static void frame_analysis(LPCNetEncState *st, kiss_fft_cpx *X, float *Ex, const float *in) {
75*a58d3d2aSXin Li float x[WINDOW_SIZE];
76*a58d3d2aSXin Li OPUS_COPY(x, st->analysis_mem, OVERLAP_SIZE);
77*a58d3d2aSXin Li OPUS_COPY(&x[OVERLAP_SIZE], in, FRAME_SIZE);
78*a58d3d2aSXin Li OPUS_COPY(st->analysis_mem, &in[FRAME_SIZE-OVERLAP_SIZE], OVERLAP_SIZE);
79*a58d3d2aSXin Li apply_window(x);
80*a58d3d2aSXin Li forward_transform(X, x);
81*a58d3d2aSXin Li lpcn_compute_band_energy(Ex, X);
82*a58d3d2aSXin Li }
83*a58d3d2aSXin Li
biquad(float * y,float mem[2],const float * x,const float * b,const float * a,int N)84*a58d3d2aSXin Li static void biquad(float *y, float mem[2], const float *x, const float *b, const float *a, int N) {
85*a58d3d2aSXin Li int i;
86*a58d3d2aSXin Li float mem0, mem1;
87*a58d3d2aSXin Li mem0 = mem[0];
88*a58d3d2aSXin Li mem1 = mem[1];
89*a58d3d2aSXin Li for (i=0;i<N;i++) {
90*a58d3d2aSXin Li float xi, yi, mem00;
91*a58d3d2aSXin Li xi = x[i];
92*a58d3d2aSXin Li yi = x[i] + mem0;
93*a58d3d2aSXin Li mem00 = mem0;
94*a58d3d2aSXin Li /* Original code:
95*a58d3d2aSXin Li mem0 = mem1 + (b[0]*xi - a[0]*yi);
96*a58d3d2aSXin Li mem1 = (b[1]*xi - a[1]*yi);
97*a58d3d2aSXin Li Modified to reduce dependency chains: (the +1e-30f forces the ordering and has no effect on the output)
98*a58d3d2aSXin Li */
99*a58d3d2aSXin Li mem0 = (b[0]-a[0])*xi + mem1 - a[0]*mem0;
100*a58d3d2aSXin Li mem1 = (b[1]-a[1])*xi + 1e-30f - a[1]*mem00;
101*a58d3d2aSXin Li y[i] = yi;
102*a58d3d2aSXin Li }
103*a58d3d2aSXin Li mem[0] = mem0;
104*a58d3d2aSXin Li mem[1] = mem1;
105*a58d3d2aSXin Li }
106*a58d3d2aSXin Li
107*a58d3d2aSXin Li #define celt_log10(x) (0.3010299957f*celt_log2(x))
108*a58d3d2aSXin Li
compute_frame_features(LPCNetEncState * st,const float * in,int arch)109*a58d3d2aSXin Li void compute_frame_features(LPCNetEncState *st, const float *in, int arch) {
110*a58d3d2aSXin Li float aligned_in[FRAME_SIZE];
111*a58d3d2aSXin Li int i;
112*a58d3d2aSXin Li float Ly[NB_BANDS];
113*a58d3d2aSXin Li float follow, logMax;
114*a58d3d2aSXin Li kiss_fft_cpx X[FREQ_SIZE];
115*a58d3d2aSXin Li float Ex[NB_BANDS];
116*a58d3d2aSXin Li float xcorr[PITCH_MAX_PERIOD];
117*a58d3d2aSXin Li float ener0;
118*a58d3d2aSXin Li float ener;
119*a58d3d2aSXin Li float x[FRAME_SIZE+LPC_ORDER];
120*a58d3d2aSXin Li float frame_corr;
121*a58d3d2aSXin Li float xy, xx, yy;
122*a58d3d2aSXin Li int pitch;
123*a58d3d2aSXin Li float ener_norm[PITCH_MAX_PERIOD - PITCH_MIN_PERIOD];
124*a58d3d2aSXin Li /* [b,a]=ellip(2, 2, 20, 1200/8000); */
125*a58d3d2aSXin Li static const float lp_b[2] = {-0.84946f, 1.f};
126*a58d3d2aSXin Li static const float lp_a[2] = {-1.54220f, 0.70781f};
127*a58d3d2aSXin Li OPUS_COPY(aligned_in, &st->analysis_mem[OVERLAP_SIZE-TRAINING_OFFSET], TRAINING_OFFSET);
128*a58d3d2aSXin Li frame_analysis(st, X, Ex, in);
129*a58d3d2aSXin Li st->if_features[0] = MAX16(-1.f, MIN16(1.f, (1.f/64)*(10.f*celt_log10(1e-15f + X[0].r*X[0].r)-6.f)));
130*a58d3d2aSXin Li for (i=1;i<PITCH_IF_MAX_FREQ;i++) {
131*a58d3d2aSXin Li kiss_fft_cpx prod;
132*a58d3d2aSXin Li float norm_1;
133*a58d3d2aSXin Li C_MULC(prod, X[i], st->prev_if[i]);
134*a58d3d2aSXin Li norm_1 = 1.f/sqrt(1e-15f + prod.r*prod.r + prod.i*prod.i);
135*a58d3d2aSXin Li C_MULBYSCALAR(prod, norm_1);
136*a58d3d2aSXin Li st->if_features[3*i-2] = prod.r;
137*a58d3d2aSXin Li st->if_features[3*i-1] = prod.i;
138*a58d3d2aSXin Li st->if_features[3*i] = MAX16(-1.f, MIN16(1.f, (1.f/64)*(10.f*celt_log10(1e-15f + X[i].r*X[i].r + X[i].i*X[i].i)-6.f)));
139*a58d3d2aSXin Li }
140*a58d3d2aSXin Li OPUS_COPY(st->prev_if, X, PITCH_IF_MAX_FREQ);
141*a58d3d2aSXin Li /*for (i=0;i<88;i++) printf("%f ", st->if_features[i]);printf("\n");*/
142*a58d3d2aSXin Li logMax = -2;
143*a58d3d2aSXin Li follow = -2;
144*a58d3d2aSXin Li for (i=0;i<NB_BANDS;i++) {
145*a58d3d2aSXin Li Ly[i] = celt_log10(1e-2f+Ex[i]);
146*a58d3d2aSXin Li Ly[i] = MAX16(logMax-8, MAX16(follow-2.5f, Ly[i]));
147*a58d3d2aSXin Li logMax = MAX16(logMax, Ly[i]);
148*a58d3d2aSXin Li follow = MAX16(follow-2.5f, Ly[i]);
149*a58d3d2aSXin Li }
150*a58d3d2aSXin Li dct(st->features, Ly);
151*a58d3d2aSXin Li st->features[0] -= 4;
152*a58d3d2aSXin Li lpc_from_cepstrum(st->lpc, st->features);
153*a58d3d2aSXin Li for (i=0;i<LPC_ORDER;i++) st->features[NB_BANDS+2+i] = st->lpc[i];
154*a58d3d2aSXin Li OPUS_MOVE(st->exc_buf, &st->exc_buf[FRAME_SIZE], PITCH_MAX_PERIOD);
155*a58d3d2aSXin Li OPUS_MOVE(st->lp_buf, &st->lp_buf[FRAME_SIZE], PITCH_MAX_PERIOD);
156*a58d3d2aSXin Li OPUS_COPY(&aligned_in[TRAINING_OFFSET], in, FRAME_SIZE-TRAINING_OFFSET);
157*a58d3d2aSXin Li OPUS_COPY(&x[0], st->pitch_mem, LPC_ORDER);
158*a58d3d2aSXin Li OPUS_COPY(&x[LPC_ORDER], aligned_in, FRAME_SIZE);
159*a58d3d2aSXin Li OPUS_COPY(st->pitch_mem, &aligned_in[FRAME_SIZE-LPC_ORDER], LPC_ORDER);
160*a58d3d2aSXin Li celt_fir(&x[LPC_ORDER], st->lpc, &st->lp_buf[PITCH_MAX_PERIOD], FRAME_SIZE, LPC_ORDER, arch);
161*a58d3d2aSXin Li for (i=0;i<FRAME_SIZE;i++) {
162*a58d3d2aSXin Li st->exc_buf[PITCH_MAX_PERIOD+i] = st->lp_buf[PITCH_MAX_PERIOD+i] + .7f*st->pitch_filt;
163*a58d3d2aSXin Li st->pitch_filt = st->lp_buf[PITCH_MAX_PERIOD+i];
164*a58d3d2aSXin Li /*printf("%f\n", st->exc_buf[PITCH_MAX_PERIOD+i]);*/
165*a58d3d2aSXin Li }
166*a58d3d2aSXin Li biquad(&st->lp_buf[PITCH_MAX_PERIOD], st->lp_mem, &st->lp_buf[PITCH_MAX_PERIOD], lp_b, lp_a, FRAME_SIZE);
167*a58d3d2aSXin Li {
168*a58d3d2aSXin Li double ener1;
169*a58d3d2aSXin Li float *buf = st->exc_buf;
170*a58d3d2aSXin Li celt_pitch_xcorr(&buf[PITCH_MAX_PERIOD], buf, xcorr, FRAME_SIZE, PITCH_MAX_PERIOD-PITCH_MIN_PERIOD, arch);
171*a58d3d2aSXin Li ener0 = celt_inner_prod(&buf[PITCH_MAX_PERIOD], &buf[PITCH_MAX_PERIOD], FRAME_SIZE, arch);
172*a58d3d2aSXin Li ener1 = celt_inner_prod(&buf[0], &buf[0], FRAME_SIZE, arch);
173*a58d3d2aSXin Li /*printf("%f\n", st->frame_weight[sub]);*/
174*a58d3d2aSXin Li for (i=0;i<PITCH_MAX_PERIOD-PITCH_MIN_PERIOD;i++) {
175*a58d3d2aSXin Li ener = 1 + ener0 + ener1;
176*a58d3d2aSXin Li st->xcorr_features[i] = 2*xcorr[i];
177*a58d3d2aSXin Li ener_norm[i] = ener;
178*a58d3d2aSXin Li ener1 += buf[i+FRAME_SIZE]*(double)buf[i+FRAME_SIZE] - buf[i]*(double)buf[i];
179*a58d3d2aSXin Li /*printf("%f ", st->xcorr_features[i]);*/
180*a58d3d2aSXin Li }
181*a58d3d2aSXin Li /* Split in a separate loop so the compiler can vectorize it */
182*a58d3d2aSXin Li for (i=0;i<PITCH_MAX_PERIOD-PITCH_MIN_PERIOD;i++) {
183*a58d3d2aSXin Li st->xcorr_features[i] /= ener_norm[i];
184*a58d3d2aSXin Li }
185*a58d3d2aSXin Li /*printf("\n");*/
186*a58d3d2aSXin Li }
187*a58d3d2aSXin Li st->dnn_pitch = compute_pitchdnn(&st->pitchdnn, st->if_features, st->xcorr_features, arch);
188*a58d3d2aSXin Li pitch = (int)floor(.5+256./pow(2.f,((1./60.)*((st->dnn_pitch+1.5)*60))));
189*a58d3d2aSXin Li xx = celt_inner_prod(&st->lp_buf[PITCH_MAX_PERIOD], &st->lp_buf[PITCH_MAX_PERIOD], FRAME_SIZE, arch);
190*a58d3d2aSXin Li yy = celt_inner_prod(&st->lp_buf[PITCH_MAX_PERIOD-pitch], &st->lp_buf[PITCH_MAX_PERIOD-pitch], FRAME_SIZE, arch);
191*a58d3d2aSXin Li xy = celt_inner_prod(&st->lp_buf[PITCH_MAX_PERIOD], &st->lp_buf[PITCH_MAX_PERIOD-pitch], FRAME_SIZE, arch);
192*a58d3d2aSXin Li /*printf("%f %f\n", frame_corr, xy/sqrt(1e-15+xx*yy));*/
193*a58d3d2aSXin Li frame_corr = xy/sqrt(1+xx*yy);
194*a58d3d2aSXin Li frame_corr = log(1.f+exp(5.f*frame_corr))/log(1+exp(5.f));
195*a58d3d2aSXin Li st->features[NB_BANDS] = st->dnn_pitch;
196*a58d3d2aSXin Li st->features[NB_BANDS + 1] = frame_corr-.5f;
197*a58d3d2aSXin Li }
198*a58d3d2aSXin Li
preemphasis(float * y,float * mem,const float * x,float coef,int N)199*a58d3d2aSXin Li void preemphasis(float *y, float *mem, const float *x, float coef, int N) {
200*a58d3d2aSXin Li int i;
201*a58d3d2aSXin Li for (i=0;i<N;i++) {
202*a58d3d2aSXin Li float yi;
203*a58d3d2aSXin Li yi = x[i] + *mem;
204*a58d3d2aSXin Li *mem = -coef*x[i];
205*a58d3d2aSXin Li y[i] = yi;
206*a58d3d2aSXin Li }
207*a58d3d2aSXin Li }
208*a58d3d2aSXin Li
lpcnet_compute_single_frame_features_impl(LPCNetEncState * st,float * x,float features[NB_TOTAL_FEATURES],int arch)209*a58d3d2aSXin Li static int lpcnet_compute_single_frame_features_impl(LPCNetEncState *st, float *x, float features[NB_TOTAL_FEATURES], int arch) {
210*a58d3d2aSXin Li preemphasis(x, &st->mem_preemph, x, PREEMPHASIS, FRAME_SIZE);
211*a58d3d2aSXin Li compute_frame_features(st, x, arch);
212*a58d3d2aSXin Li OPUS_COPY(features, &st->features[0], NB_TOTAL_FEATURES);
213*a58d3d2aSXin Li return 0;
214*a58d3d2aSXin Li }
215*a58d3d2aSXin Li
lpcnet_compute_single_frame_features(LPCNetEncState * st,const opus_int16 * pcm,float features[NB_TOTAL_FEATURES],int arch)216*a58d3d2aSXin Li int lpcnet_compute_single_frame_features(LPCNetEncState *st, const opus_int16 *pcm, float features[NB_TOTAL_FEATURES], int arch) {
217*a58d3d2aSXin Li int i;
218*a58d3d2aSXin Li float x[FRAME_SIZE];
219*a58d3d2aSXin Li for (i=0;i<FRAME_SIZE;i++) x[i] = pcm[i];
220*a58d3d2aSXin Li lpcnet_compute_single_frame_features_impl(st, x, features, arch);
221*a58d3d2aSXin Li return 0;
222*a58d3d2aSXin Li }
223*a58d3d2aSXin Li
lpcnet_compute_single_frame_features_float(LPCNetEncState * st,const float * pcm,float features[NB_TOTAL_FEATURES],int arch)224*a58d3d2aSXin Li int lpcnet_compute_single_frame_features_float(LPCNetEncState *st, const float *pcm, float features[NB_TOTAL_FEATURES], int arch) {
225*a58d3d2aSXin Li int i;
226*a58d3d2aSXin Li float x[FRAME_SIZE];
227*a58d3d2aSXin Li for (i=0;i<FRAME_SIZE;i++) x[i] = pcm[i];
228*a58d3d2aSXin Li lpcnet_compute_single_frame_features_impl(st, x, features, arch);
229*a58d3d2aSXin Li return 0;
230*a58d3d2aSXin Li }
231