xref: /aosp_15_r20/external/libopus/dnn/fargan.c (revision a58d3d2adb790c104798cd88c8a3aff4fa8b82cc)
1 /* Copyright (c) 2023 Amazon */
2 /*
3    Redistribution and use in source and binary forms, with or without
4    modification, are permitted provided that the following conditions
5    are met:
6 
7    - Redistributions of source code must retain the above copyright
8    notice, this list of conditions and the following disclaimer.
9 
10    - Redistributions in binary form must reproduce the above copyright
11    notice, this list of conditions and the following disclaimer in the
12    documentation and/or other materials provided with the distribution.
13 
14    THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
15    ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
16    LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
17    A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
18    OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
19    EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
20    PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
21    PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
22    LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
23    NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
24    SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
25 */
26 
27 #ifdef HAVE_CONFIG_H
28 #include "config.h"
29 #endif
30 
31 #include "fargan.h"
32 #include "os_support.h"
33 #include "freq.h"
34 #include "fargan_data.h"
35 #include "lpcnet.h"
36 #include "pitch.h"
37 #include "nnet.h"
38 #include "lpcnet_private.h"
39 #include "cpu_support.h"
40 
41 #define FARGAN_FEATURES (NB_FEATURES)
42 
compute_fargan_cond(FARGANState * st,float * cond,const float * features,int period)43 static void compute_fargan_cond(FARGANState *st, float *cond, const float *features, int period)
44 {
45   FARGAN *model;
46   float dense_in[NB_FEATURES+COND_NET_PEMBED_OUT_SIZE];
47   float conv1_in[COND_NET_FCONV1_IN_SIZE];
48   float fdense2_in[COND_NET_FCONV1_OUT_SIZE];
49   model = &st->model;
50   celt_assert(FARGAN_FEATURES+COND_NET_PEMBED_OUT_SIZE == model->cond_net_fdense1.nb_inputs);
51   celt_assert(COND_NET_FCONV1_IN_SIZE == model->cond_net_fdense1.nb_outputs);
52   celt_assert(COND_NET_FCONV1_OUT_SIZE == model->cond_net_fconv1.nb_outputs);
53   OPUS_COPY(&dense_in[NB_FEATURES], &model->cond_net_pembed.float_weights[IMAX(0,IMIN(period-32, 223))*COND_NET_PEMBED_OUT_SIZE], COND_NET_PEMBED_OUT_SIZE);
54   OPUS_COPY(dense_in, features, NB_FEATURES);
55 
56   compute_generic_dense(&model->cond_net_fdense1, conv1_in, dense_in, ACTIVATION_TANH, st->arch);
57   compute_generic_conv1d(&model->cond_net_fconv1, fdense2_in, st->cond_conv1_state, conv1_in, COND_NET_FCONV1_IN_SIZE, ACTIVATION_TANH, st->arch);
58   compute_generic_dense(&model->cond_net_fdense2, cond, fdense2_in, ACTIVATION_TANH, st->arch);
59 }
60 
fargan_deemphasis(float * pcm,float * deemph_mem)61 static void fargan_deemphasis(float *pcm, float *deemph_mem) {
62   int i;
63   for (i=0;i<FARGAN_SUBFRAME_SIZE;i++) {
64     pcm[i] += FARGAN_DEEMPHASIS * *deemph_mem;
65     *deemph_mem = pcm[i];
66   }
67 }
68 
run_fargan_subframe(FARGANState * st,float * pcm,const float * cond,int period)69 static void run_fargan_subframe(FARGANState *st, float *pcm, const float *cond, int period)
70 {
71   int i, pos;
72   float fwc0_in[SIG_NET_INPUT_SIZE];
73   float gru1_in[SIG_NET_FWC0_CONV_OUT_SIZE+2*FARGAN_SUBFRAME_SIZE];
74   float gru2_in[SIG_NET_GRU1_OUT_SIZE+2*FARGAN_SUBFRAME_SIZE];
75   float gru3_in[SIG_NET_GRU2_OUT_SIZE+2*FARGAN_SUBFRAME_SIZE];
76   float pred[FARGAN_SUBFRAME_SIZE+4];
77   float prev[FARGAN_SUBFRAME_SIZE];
78   float pitch_gate[4];
79   float gain;
80   float gain_1;
81   float skip_cat[10000];
82   float skip_out[SIG_NET_SKIP_DENSE_OUT_SIZE];
83   FARGAN *model;
84 
85   celt_assert(st->cont_initialized);
86   model = &st->model;
87 
88   compute_generic_dense(&model->sig_net_cond_gain_dense, &gain, cond, ACTIVATION_LINEAR, st->arch);
89   gain = exp(gain);
90   gain_1 = 1.f/(1e-5f + gain);
91 
92   pos = PITCH_MAX_PERIOD-period-2;
93   for (i=0;i<FARGAN_SUBFRAME_SIZE+4;i++) {
94     pred[i] = MIN32(1.f, MAX32(-1.f, gain_1*st->pitch_buf[IMAX(0, pos)]));
95     pos++;
96     if (pos == PITCH_MAX_PERIOD) pos -= period;
97   }
98   for (i=0;i<FARGAN_SUBFRAME_SIZE;i++) prev[i] = MAX32(-1.f, MIN16(1.f, gain_1*st->pitch_buf[PITCH_MAX_PERIOD-FARGAN_SUBFRAME_SIZE+i]));
99 
100   OPUS_COPY(&fwc0_in[0], &cond[0], FARGAN_COND_SIZE);
101   OPUS_COPY(&fwc0_in[FARGAN_COND_SIZE], pred, FARGAN_SUBFRAME_SIZE+4);
102   OPUS_COPY(&fwc0_in[FARGAN_COND_SIZE+FARGAN_SUBFRAME_SIZE+4], prev, FARGAN_SUBFRAME_SIZE);
103 
104   compute_generic_conv1d(&model->sig_net_fwc0_conv, gru1_in, st->fwc0_mem, fwc0_in, SIG_NET_INPUT_SIZE, ACTIVATION_TANH, st->arch);
105   celt_assert(SIG_NET_FWC0_GLU_GATE_OUT_SIZE == model->sig_net_fwc0_glu_gate.nb_outputs);
106   compute_glu(&model->sig_net_fwc0_glu_gate, gru1_in, gru1_in, st->arch);
107 
108   compute_generic_dense(&model->sig_net_gain_dense_out, pitch_gate, gru1_in, ACTIVATION_SIGMOID, st->arch);
109 
110   for (i=0;i<FARGAN_SUBFRAME_SIZE;i++) gru1_in[SIG_NET_FWC0_GLU_GATE_OUT_SIZE+i] = pitch_gate[0]*pred[i+2];
111   OPUS_COPY(&gru1_in[SIG_NET_FWC0_GLU_GATE_OUT_SIZE+FARGAN_SUBFRAME_SIZE], prev, FARGAN_SUBFRAME_SIZE);
112   compute_generic_gru(&model->sig_net_gru1_input, &model->sig_net_gru1_recurrent, st->gru1_state, gru1_in, st->arch);
113   compute_glu(&model->sig_net_gru1_glu_gate, gru2_in, st->gru1_state, st->arch);
114 
115   for (i=0;i<FARGAN_SUBFRAME_SIZE;i++) gru2_in[SIG_NET_GRU1_OUT_SIZE+i] = pitch_gate[1]*pred[i+2];
116   OPUS_COPY(&gru2_in[SIG_NET_GRU1_OUT_SIZE+FARGAN_SUBFRAME_SIZE], prev, FARGAN_SUBFRAME_SIZE);
117   compute_generic_gru(&model->sig_net_gru2_input, &model->sig_net_gru2_recurrent, st->gru2_state, gru2_in, st->arch);
118   compute_glu(&model->sig_net_gru2_glu_gate, gru3_in, st->gru2_state, st->arch);
119 
120   for (i=0;i<FARGAN_SUBFRAME_SIZE;i++) gru3_in[SIG_NET_GRU2_OUT_SIZE+i] = pitch_gate[2]*pred[i+2];
121   OPUS_COPY(&gru3_in[SIG_NET_GRU2_OUT_SIZE+FARGAN_SUBFRAME_SIZE], prev, FARGAN_SUBFRAME_SIZE);
122   compute_generic_gru(&model->sig_net_gru3_input, &model->sig_net_gru3_recurrent, st->gru3_state, gru3_in, st->arch);
123   compute_glu(&model->sig_net_gru3_glu_gate, &skip_cat[SIG_NET_GRU1_OUT_SIZE+SIG_NET_GRU2_OUT_SIZE], st->gru3_state, st->arch);
124 
125   OPUS_COPY(skip_cat, gru2_in, SIG_NET_GRU1_OUT_SIZE);
126   OPUS_COPY(&skip_cat[SIG_NET_GRU1_OUT_SIZE], gru3_in, SIG_NET_GRU2_OUT_SIZE);
127   OPUS_COPY(&skip_cat[SIG_NET_GRU1_OUT_SIZE+SIG_NET_GRU2_OUT_SIZE+SIG_NET_GRU3_OUT_SIZE], gru1_in, SIG_NET_FWC0_CONV_OUT_SIZE);
128   for (i=0;i<FARGAN_SUBFRAME_SIZE;i++) skip_cat[SIG_NET_GRU1_OUT_SIZE+SIG_NET_GRU2_OUT_SIZE+SIG_NET_GRU3_OUT_SIZE+SIG_NET_FWC0_CONV_OUT_SIZE+i] = pitch_gate[3]*pred[i+2];
129   OPUS_COPY(&skip_cat[SIG_NET_GRU1_OUT_SIZE+SIG_NET_GRU2_OUT_SIZE+SIG_NET_GRU3_OUT_SIZE+SIG_NET_FWC0_CONV_OUT_SIZE+FARGAN_SUBFRAME_SIZE], prev, FARGAN_SUBFRAME_SIZE);
130 
131   compute_generic_dense(&model->sig_net_skip_dense, skip_out, skip_cat, ACTIVATION_TANH, st->arch);
132   compute_glu(&model->sig_net_skip_glu_gate, skip_out, skip_out, st->arch);
133 
134   compute_generic_dense(&model->sig_net_sig_dense_out, pcm, skip_out, ACTIVATION_TANH, st->arch);
135   for (i=0;i<FARGAN_SUBFRAME_SIZE;i++) pcm[i] *= gain;
136 
137   OPUS_MOVE(st->pitch_buf, &st->pitch_buf[FARGAN_SUBFRAME_SIZE], PITCH_MAX_PERIOD-FARGAN_SUBFRAME_SIZE);
138   OPUS_COPY(&st->pitch_buf[PITCH_MAX_PERIOD-FARGAN_SUBFRAME_SIZE], pcm, FARGAN_SUBFRAME_SIZE);
139   fargan_deemphasis(pcm, &st->deemph_mem);
140 }
141 
fargan_cont(FARGANState * st,const float * pcm0,const float * features0)142 void fargan_cont(FARGANState *st, const float *pcm0, const float *features0)
143 {
144   int i;
145   float cond[COND_NET_FDENSE2_OUT_SIZE];
146   float x0[FARGAN_CONT_SAMPLES];
147   float dummy[FARGAN_SUBFRAME_SIZE];
148   int period=0;
149 
150   /* Pre-load features. */
151   for (i=0;i<5;i++) {
152     const float *features = &features0[i*NB_FEATURES];
153     st->last_period = period;
154     period = (int)floor(.5+256./pow(2.f,((1./60.)*((features[NB_BANDS]+1.5)*60))));
155     compute_fargan_cond(st, cond, features, period);
156   }
157 
158   x0[0] = 0;
159   for (i=1;i<FARGAN_CONT_SAMPLES;i++) {
160     x0[i] = pcm0[i] - FARGAN_DEEMPHASIS*pcm0[i-1];
161   }
162 
163   OPUS_COPY(&st->pitch_buf[PITCH_MAX_PERIOD-FARGAN_FRAME_SIZE], x0, FARGAN_FRAME_SIZE);
164   st->cont_initialized = 1;
165 
166   for (i=0;i<FARGAN_NB_SUBFRAMES;i++) {
167     run_fargan_subframe(st, dummy, &cond[i*FARGAN_COND_SIZE], st->last_period);
168     OPUS_COPY(&st->pitch_buf[PITCH_MAX_PERIOD-FARGAN_SUBFRAME_SIZE], &x0[FARGAN_FRAME_SIZE+i*FARGAN_SUBFRAME_SIZE], FARGAN_SUBFRAME_SIZE);
169   }
170   st->deemph_mem = pcm0[FARGAN_CONT_SAMPLES-1];
171 }
172 
173 
fargan_init(FARGANState * st)174 void fargan_init(FARGANState *st)
175 {
176   int ret;
177   OPUS_CLEAR(st, 1);
178   st->arch = opus_select_arch();
179 #ifndef USE_WEIGHTS_FILE
180   ret = init_fargan(&st->model, fargan_arrays);
181 #else
182   ret = 0;
183 #endif
184   celt_assert(ret == 0);
185 }
186 
fargan_load_model(FARGANState * st,const void * data,int len)187 int fargan_load_model(FARGANState *st, const void *data, int len) {
188   WeightArray *list;
189   int ret;
190   parse_weights(&list, data, len);
191   ret = init_fargan(&st->model, list);
192   opus_free(list);
193   if (ret == 0) return 0;
194   else return -1;
195 }
196 
fargan_synthesize_impl(FARGANState * st,float * pcm,const float * features)197 static void fargan_synthesize_impl(FARGANState *st, float *pcm, const float *features)
198 {
199   int subframe;
200   float cond[COND_NET_FDENSE2_OUT_SIZE];
201   int period;
202   celt_assert(st->cont_initialized);
203 
204   period = (int)floor(.5+256./pow(2.f,((1./60.)*((features[NB_BANDS]+1.5)*60))));
205   compute_fargan_cond(st, cond, features, period);
206   for (subframe=0;subframe<FARGAN_NB_SUBFRAMES;subframe++) {
207     float *sub_cond;
208     sub_cond = &cond[subframe*FARGAN_COND_SIZE];
209     run_fargan_subframe(st, &pcm[subframe*FARGAN_SUBFRAME_SIZE], sub_cond, st->last_period);
210   }
211   st->last_period = period;
212 }
213 
fargan_synthesize(FARGANState * st,float * pcm,const float * features)214 void fargan_synthesize(FARGANState *st, float *pcm, const float *features)
215 {
216   fargan_synthesize_impl(st, pcm, features);
217 }
218 
fargan_synthesize_int(FARGANState * st,opus_int16 * pcm,const float * features)219 void fargan_synthesize_int(FARGANState *st, opus_int16 *pcm, const float *features)
220 {
221   int i;
222   float fpcm[FARGAN_FRAME_SIZE];
223   fargan_synthesize(st, fpcm, features);
224   for (i=0;i<LPCNET_FRAME_SIZE;i++) pcm[i] = (int)floor(.5 + MIN32(32767, MAX32(-32767, 32768.f*fpcm[i])));
225 }
226