xref: /aosp_15_r20/external/sonic/sonic_lite.c (revision b290403dc9d28f89f133eb7e190ea8185d440ecd)
1 /* Sonic library
2    Copyright 2010
3    Bill Cox
4    This file is part of the Sonic Library.
5 
6    This file is licensed under the Apache 2.0 license.
7 */
8 
9 /* This file is designed for low-powered microcontrollers, minimizing memory
10    compared to the fuller sonic.c implementation. */
11 
12 #include "sonic_lite.h"
13 
14 #include <string.h>
15 
16 #define SONIC_MAX_PERIOD (SONIC_SAMPLE_RATE / SONIC_MIN_PITCH)
17 #define SONIC_MIN_PERIOD (SONIC_SAMPLE_RATE / SONIC_MAX_PITCH)
18 #define SONIC_SKIP (SONIC_SAMPLE_RATE / SONIC_AMDF_FREQ)
19 #define SONIC_INPUT_BUFFER_SIZE (2 * SONIC_MAX_PERIOD + SONIC_INPUT_SAMPLES)
20 
21 struct sonicStruct {
22   short inputBuffer[SONIC_INPUT_BUFFER_SIZE];
23   short outputBuffer [2 * SONIC_MAX_PERIOD];
24   short downSampleBuffer[(2 * SONIC_MAX_PERIOD) / SONIC_SKIP];
25   float speed;
26   float volume;
27   int numInputSamples;
28   int numOutputSamples;
29   int remainingInputToCopy;
30   int prevPeriod;
31   int prevMinDiff;
32 };
33 
34 static struct sonicStruct sonicStream;
35 
36 /* Scale the samples by the factor.  Volume should be no greater than 127X, or
37    it is possible to overflow the fixed-point mathi. */
scaleSamples(short * samples,int numSamples,float volume)38 static void scaleSamples(short *samples, int numSamples, float volume) {
39   /* This is 24-bit integer and 8-bit fraction fixed-point representation. */
40   int fixedPointVolume;
41   int value;
42 
43   if (volume > 127.0) {
44     volume = 127.0;
45   }
46   fixedPointVolume = volume * 256.0f;
47   while (numSamples--) {
48     value = (*samples * fixedPointVolume) >> 8;
49     if (value > 32767) {
50       value = 32767;
51     } else if (value < -32767) {
52       value = -32767;
53     }
54     *samples++ = value;
55   }
56 }
57 
58 /* Set the speed of the stream. */
sonicSetSpeed(float speed)59 void sonicSetSpeed(float speed) { sonicStream.speed = speed; }
60 
61 /* Set the scaling factor of the stream. */
sonicSetVolume(float volume)62 void sonicSetVolume(float volume) {
63   sonicStream.volume = volume;
64 }
65 
66 /* Create a sonic stream.  Return NULL only if we are out of memory and cannot
67    allocate the stream. */
sonicInit(void)68 void sonicInit(void) {
69   sonicStream.speed = 1.0;
70   sonicStream.volume = 1.0f;
71   sonicStream.numInputSamples = 0;;
72   sonicStream.numOutputSamples = 0;
73   sonicStream.remainingInputToCopy = 0;
74   sonicStream.prevPeriod = 0;
75   sonicStream.prevMinDiff = 0;
76 }
77 
78 /* Add the input samples to the input buffer. */
addShortSamplesToInputBuffer(short * samples,int numSamples)79 static int addShortSamplesToInputBuffer(short *samples,
80                                         int numSamples) {
81   if (numSamples == 0) {
82     return 1;
83   }
84   memcpy(sonicStream.inputBuffer + sonicStream.numInputSamples,
85          samples, numSamples * sizeof(short));
86   sonicStream.numInputSamples += numSamples;
87   return 1;
88 }
89 
90 /* Remove input samples that we have already processed. */
removeInputSamples(int position)91 static void removeInputSamples(int position) {
92   int remainingSamples = sonicStream.numInputSamples - position;
93 
94   if (remainingSamples > 0) {
95     memmove(sonicStream.inputBuffer,
96             sonicStream.inputBuffer + position,
97             remainingSamples * sizeof(short));
98   }
99   sonicStream.numInputSamples = remainingSamples;
100 }
101 
102 /* Just copy from the array to the output buffer */
copyToOutput(short * samples,int numSamples)103 static void copyToOutput(short *samples, int numSamples) {
104   memcpy(sonicStream.outputBuffer + sonicStream.numOutputSamples,
105          samples, numSamples * sizeof(short));
106   sonicStream.numOutputSamples += numSamples;
107 }
108 
109 /* Just copy from the input buffer to the output buffer. */
copyInputToOutput(int position)110 static int copyInputToOutput(int position) {
111   int numSamples = sonicStream.remainingInputToCopy;
112 
113   if (numSamples > 2 * SONIC_MAX_PERIOD) {
114     numSamples = 2 * SONIC_MAX_PERIOD;
115   }
116   copyToOutput(sonicStream.inputBuffer + position, numSamples);
117   sonicStream.remainingInputToCopy -= numSamples;
118   return numSamples;
119 }
120 
121 /* Read short data out of the stream.  Sometimes no data will be available, and
122    zero is returned, which is not an error condition. */
sonicReadShortFromStream(short * samples,int maxSamples)123 int sonicReadShortFromStream(short *samples, int maxSamples) {
124   int numSamples = sonicStream.numOutputSamples;
125   int remainingSamples = 0;
126 
127   if (numSamples == 0) {
128     return 0;
129   }
130   if (numSamples > maxSamples) {
131     remainingSamples = numSamples - maxSamples;
132     numSamples = maxSamples;
133   }
134   memcpy(samples, sonicStream.outputBuffer, numSamples * sizeof(short));
135   if (remainingSamples > 0) {
136     memmove(sonicStream.outputBuffer, sonicStream.outputBuffer + numSamples,
137             remainingSamples * sizeof(short));
138   }
139   sonicStream.numOutputSamples = remainingSamples;
140   return numSamples;
141 }
142 
143 /* Force the sonic stream to generate output using whatever data it currently
144    has.  No extra delay will be added to the output, but flushing in the middle
145    of words could introduce distortion. */
sonicFlushStream(void)146 void sonicFlushStream(void) {
147   int maxRequired = 2 * SONIC_MAX_PERIOD;
148   int remainingSamples = sonicStream.numInputSamples;
149   float speed = sonicStream.speed;
150   int expectedOutputSamples = sonicStream.numOutputSamples + (int)((remainingSamples / speed) + 0.5f);
151 
152   memset(sonicStream.inputBuffer + remainingSamples, 0,
153       sizeof(short) * (SONIC_INPUT_BUFFER_SIZE - remainingSamples));
154   sonicStream.numInputSamples += 2 * maxRequired;
155   sonicWriteShortToStream(NULL, 0);
156   /* Throw away any extra samples we generated due to the silence we added */
157   if (sonicStream.numOutputSamples > expectedOutputSamples) {
158     sonicStream.numOutputSamples = expectedOutputSamples;
159   }
160   /* Empty input buffer */
161   sonicStream.numInputSamples = 0;
162   sonicStream.remainingInputToCopy = 0;
163 }
164 
165 /* Return the number of samples in the output buffer */
sonicSamplesAvailable(void)166 int sonicSamplesAvailable(void) {
167   return sonicStream.numOutputSamples;
168 }
169 
170 /* If skip is greater than one, average skip samples together and write them to
171    the down-sample buffer. */
downSampleInput(short * samples)172 static void downSampleInput(short *samples) {
173   int numSamples = 2 * SONIC_MAX_PERIOD / SONIC_SKIP;
174   int i, j;
175   int value;
176   short *downSamples = sonicStream.downSampleBuffer;
177 
178   for (i = 0; i < numSamples; i++) {
179     value = 0;
180     for (j = 0; j < SONIC_SKIP; j++) {
181       value += *samples++;
182     }
183     value /= SONIC_SKIP;
184     *downSamples++ = value;
185   }
186 }
187 
188 /* Find the best frequency match in the range, and given a sample skip multiple.
189    For now, just find the pitch of the first channel. */
findPitchPeriodInRange(short * samples,int minPeriod,int maxPeriod,int * retMinDiff,int * retMaxDiff)190 static int findPitchPeriodInRange(short *samples, int minPeriod, int maxPeriod,
191                                   int* retMinDiff, int* retMaxDiff) {
192   int period, bestPeriod = 0, worstPeriod = 255;
193   short *s;
194   short *p;
195   short sVal, pVal;
196   unsigned long diff, minDiff = 1, maxDiff = 0;
197   int i;
198 
199   for (period = minPeriod; period <= maxPeriod; period++) {
200     diff = 0;
201     s = samples;
202     p = samples + period;
203     for (i = 0; i < period; i++) {
204       sVal = *s++;
205       pVal = *p++;
206       diff += sVal >= pVal ? (unsigned short)(sVal - pVal)
207                            : (unsigned short)(pVal - sVal);
208     }
209     /* Note that the highest number of samples we add into diff will be less
210        than 256, since we skip samples.  Thus, diff is a 24 bit number, and
211        we can safely multiply by numSamples without overflow */
212     if (bestPeriod == 0 || diff * bestPeriod < minDiff * period) {
213       minDiff = diff;
214       bestPeriod = period;
215     }
216     if (diff * worstPeriod > maxDiff * period) {
217       maxDiff = diff;
218       worstPeriod = period;
219     }
220   }
221   *retMinDiff = minDiff / bestPeriod;
222   *retMaxDiff = maxDiff / worstPeriod;
223   return bestPeriod;
224 }
225 
226 /* At abrupt ends of voiced words, we can have pitch periods that are better
227    approximated by the previous pitch period estimate.  Try to detect this case.  */
prevPeriodBetter(int minDiff,int maxDiff,int preferNewPeriod)228 static int prevPeriodBetter(int minDiff, int maxDiff, int preferNewPeriod) {
229   if (minDiff == 0 || sonicStream.prevPeriod == 0) {
230     return 0;
231   }
232   if (preferNewPeriod) {
233     if (maxDiff > minDiff * 3) {
234       /* Got a reasonable match this period */
235       return 0;
236     }
237     if (minDiff * 2 <= sonicStream.prevMinDiff * 3) {
238       /* Mismatch is not that much greater this period */
239       return 0;
240     }
241   } else {
242     if (minDiff <= sonicStream.prevMinDiff) {
243       return 0;
244     }
245   }
246   return 1;
247 }
248 
249 /* Find the pitch period.  This is a critical step, and we may have to try
250    multiple ways to get a good answer.  This version uses Average Magnitude
251    Difference Function (AMDF).  To improve speed, we down sample by an integer
252    factor get in the 11KHz range, and then do it again with a narrower
253    frequency range without down sampling */
findPitchPeriod(short * samples,int preferNewPeriod)254 static int findPitchPeriod(short *samples, int preferNewPeriod) {
255   int minPeriod = SONIC_MIN_PERIOD;
256   int maxPeriod = SONIC_MAX_PERIOD;
257   int minDiff, maxDiff, retPeriod;
258   int period;
259 
260   if (SONIC_SKIP == 1) {
261     period = findPitchPeriodInRange(samples, minPeriod, maxPeriod, &minDiff, &maxDiff);
262   } else {
263     downSampleInput(samples);
264     period = findPitchPeriodInRange(sonicStream.downSampleBuffer, minPeriod / SONIC_SKIP,
265                                     maxPeriod / SONIC_SKIP, &minDiff, &maxDiff);
266     period *= SONIC_SKIP;
267     minPeriod = period - (SONIC_SKIP << 2);
268     maxPeriod = period + (SONIC_SKIP << 2);
269     if (minPeriod < SONIC_MIN_PERIOD) {
270       minPeriod = SONIC_MIN_PERIOD;
271     }
272     if (maxPeriod > SONIC_MAX_PERIOD) {
273       maxPeriod = SONIC_MAX_PERIOD;
274     }
275     period = findPitchPeriodInRange(samples, minPeriod, maxPeriod, &minDiff, &maxDiff);
276   }
277   if (prevPeriodBetter(minDiff, maxDiff, preferNewPeriod)) {
278     retPeriod = sonicStream.prevPeriod;
279   } else {
280     retPeriod = period;
281   }
282   sonicStream.prevMinDiff = minDiff;
283   sonicStream.prevPeriod = period;
284   return retPeriod;
285 }
286 
287 /* Overlap two sound segments, ramp the volume of one down, while ramping the
288    other one from zero up, and add them, storing the result at the output. */
overlapAdd(int numSamples,short * out,short * rampDown,short * rampUp)289 static void overlapAdd(int numSamples, short *out, short *rampDown, short *rampUp) {
290   short *o;
291   short *u;
292   short *d;
293   int t;
294 
295   o = out;
296   u = rampUp;
297   d = rampDown;
298   for (t = 0; t < numSamples; t++) {
299     *o = (*d * (numSamples - t) + *u * t) / numSamples;
300     o++;
301     d++;
302     u++;
303   }
304 }
305 
306 /* Skip over a pitch period, and copy period/speed samples to the output */
skipPitchPeriod(short * samples,float speed,int period)307 static int skipPitchPeriod(short *samples, float speed, int period) {
308   long newSamples;
309 
310   if (speed >= 2.0f) {
311     newSamples = period / (speed - 1.0f);
312   } else {
313     newSamples = period;
314     sonicStream.remainingInputToCopy = period * (2.0f - speed) / (speed - 1.0f);
315   }
316   overlapAdd(newSamples, sonicStream.outputBuffer + sonicStream.numOutputSamples,
317       samples, samples + period);
318   sonicStream.numOutputSamples += newSamples;
319   return newSamples;
320 }
321 
322 /* Resample as many pitch periods as we have buffered on the input. */
changeSpeed(float speed)323 static void changeSpeed(float speed) {
324   short *samples;
325   int numSamples = sonicStream.numInputSamples;
326   int position = 0, period, newSamples;
327   int maxRequired = 2 * SONIC_MAX_PERIOD;
328 
329   /* printf("Changing speed to %f\n", speed); */
330   if (sonicStream.numInputSamples < maxRequired) {
331     return;
332   }
333   do {
334     if (sonicStream.remainingInputToCopy > 0) {
335       newSamples = copyInputToOutput(position);
336       position += newSamples;
337     } else {
338       samples = sonicStream.inputBuffer + position;
339       period = findPitchPeriod(samples, 1);
340       newSamples = skipPitchPeriod(samples, speed, period);
341       position += period + newSamples;
342     }
343   } while (position + maxRequired <= numSamples);
344   removeInputSamples(position);
345 }
346 
347 /* Resample as many pitch periods as we have buffered on the input.  Also scale
348    the output by the volume. */
processStreamInput(void)349 static void processStreamInput(void) {
350   int originalNumOutputSamples = sonicStream.numOutputSamples;
351   float speed = sonicStream.speed;
352 
353   if (speed > 1.00001) {
354     changeSpeed(speed);
355   } else {
356     copyToOutput(sonicStream.inputBuffer, sonicStream.numInputSamples);
357     sonicStream.numInputSamples = 0;
358   }
359   if (sonicStream.volume != 1.0f) {
360     /* Adjust output volume. */
361     scaleSamples( sonicStream.outputBuffer + originalNumOutputSamples,
362         (sonicStream.numOutputSamples - originalNumOutputSamples), sonicStream.volume);
363   }
364 }
365 
366 /* Simple wrapper around sonicWriteFloatToStream that does the short to float
367    conversion for you. */
sonicWriteShortToStream(short * samples,int numSamples)368 void sonicWriteShortToStream(short *samples, int numSamples) {
369   addShortSamplesToInputBuffer(samples, numSamples);
370   processStreamInput();
371 }
372