xref: /aosp_15_r20/external/sonic/sonic.h (revision b290403dc9d28f89f133eb7e190ea8185d440ecd)
1*b290403dSRicardo Garcia #ifndef SONIC_H_
2*b290403dSRicardo Garcia #define SONIC_H_
3*b290403dSRicardo Garcia 
4*b290403dSRicardo Garcia /* Sonic library
5*b290403dSRicardo Garcia    Copyright 2010
6*b290403dSRicardo Garcia    Bill Cox
7*b290403dSRicardo Garcia    This file is part of the Sonic Library.
8*b290403dSRicardo Garcia 
9*b290403dSRicardo Garcia    This file is licensed under the Apache 2.0 license.
10*b290403dSRicardo Garcia */
11*b290403dSRicardo Garcia 
12*b290403dSRicardo Garcia /*
13*b290403dSRicardo Garcia The Sonic Library implements a new algorithm invented by Bill Cox for the
14*b290403dSRicardo Garcia specific purpose of speeding up speech by high factors at high quality.  It
15*b290403dSRicardo Garcia generates smooth speech at speed up factors as high as 6X, possibly more.  It is
16*b290403dSRicardo Garcia also capable of slowing down speech, and generates high quality results
17*b290403dSRicardo Garcia regardless of the speed up or slow down factor.  For speeding up speech by 2X or
18*b290403dSRicardo Garcia more, the following equation is used:
19*b290403dSRicardo Garcia 
20*b290403dSRicardo Garcia     newSamples = period/(speed - 1.0)
21*b290403dSRicardo Garcia     scale = 1.0/newSamples;
22*b290403dSRicardo Garcia 
23*b290403dSRicardo Garcia where period is the current pitch period, determined using AMDF or any other
24*b290403dSRicardo Garcia pitch estimator, and speed is the speedup factor.  If the current position in
25*b290403dSRicardo Garcia the input stream is pointed to by "samples", and the current output stream
26*b290403dSRicardo Garcia position is pointed to by "out", then newSamples number of samples can be
27*b290403dSRicardo Garcia generated with:
28*b290403dSRicardo Garcia 
29*b290403dSRicardo Garcia     out[t] = (samples[t]*(newSamples - t) + samples[t + period]*t)/newSamples;
30*b290403dSRicardo Garcia 
31*b290403dSRicardo Garcia where t = 0 to newSamples - 1.
32*b290403dSRicardo Garcia 
33*b290403dSRicardo Garcia For speed factors < 2X, the PICOLA algorithm is used.  The above
34*b290403dSRicardo Garcia algorithm is first used to double the speed of one pitch period.  Then, enough
35*b290403dSRicardo Garcia input is directly copied from the input to the output to achieve the desired
36*b290403dSRicardo Garcia speed up factor, where 1.0 < speed < 2.0.  The amount of data copied is derived:
37*b290403dSRicardo Garcia 
38*b290403dSRicardo Garcia     speed = (2*period + length)/(period + length)
39*b290403dSRicardo Garcia     speed*length + speed*period = 2*period + length
40*b290403dSRicardo Garcia     length(speed - 1) = 2*period - speed*period
41*b290403dSRicardo Garcia     length = period*(2 - speed)/(speed - 1)
42*b290403dSRicardo Garcia 
43*b290403dSRicardo Garcia For slowing down speech where 0.5 < speed < 1.0, a pitch period is inserted into
44*b290403dSRicardo Garcia the output twice, and length of input is copied from the input to the output
45*b290403dSRicardo Garcia until the output desired speed is reached.  The length of data copied is:
46*b290403dSRicardo Garcia 
47*b290403dSRicardo Garcia     length = period*(speed - 0.5)/(1 - speed)
48*b290403dSRicardo Garcia 
49*b290403dSRicardo Garcia For slow down factors below 0.5, no data is copied, and an algorithm
50*b290403dSRicardo Garcia similar to high speed factors is used.
51*b290403dSRicardo Garcia */
52*b290403dSRicardo Garcia 
53*b290403dSRicardo Garcia /* Uncomment this to use sin-wav based overlap add which in theory can improve
54*b290403dSRicardo Garcia    sound quality slightly, at the expense of lots of floating point math. */
55*b290403dSRicardo Garcia /* #define SONIC_USE_SIN */
56*b290403dSRicardo Garcia 
57*b290403dSRicardo Garcia #ifdef __cplusplus
58*b290403dSRicardo Garcia extern "C" {
59*b290403dSRicardo Garcia #endif
60*b290403dSRicardo Garcia 
61*b290403dSRicardo Garcia #ifdef SONIC_INTERNAL
62*b290403dSRicardo Garcia /* The following #define's are used to change the names of the routines defined
63*b290403dSRicardo Garcia  * here so that a new library (i.e. speedy) can reuse these names, and then call
64*b290403dSRicardo Garcia  * the original names.  We do this for two reasons: 1) we don't want to change
65*b290403dSRicardo Garcia  * the original API, and 2) we want to add a shim, using the original names and
66*b290403dSRicardo Garcia  * still call these routines.
67*b290403dSRicardo Garcia  *
68*b290403dSRicardo Garcia  * Original users of this API and the libsonic library need to do nothing.  The
69*b290403dSRicardo Garcia  * original behavior remains.
70*b290403dSRicardo Garcia  *
71*b290403dSRicardo Garcia  * A new user that add some additional functionality above this library (a shim)
72*b290403dSRicardo Garcia  * should #define SONIC_INTERNAL before including this file, undefine all these
73*b290403dSRicardo Garcia  * symbols and call the sonicIntXXX functions directly.
74*b290403dSRicardo Garcia  */
75*b290403dSRicardo Garcia #define sonicCreateStream sonicIntCreateStream
76*b290403dSRicardo Garcia #define sonicDestroyStream sonicIntDestroyStream
77*b290403dSRicardo Garcia #define sonicWriteFloatToStream sonicIntWriteFloatToStream
78*b290403dSRicardo Garcia #define sonicWriteShortToStream sonicIntWriteShortToStream
79*b290403dSRicardo Garcia #define sonicWriteUnsignedCharToStream sonicIntWriteUnsignedCharToStream
80*b290403dSRicardo Garcia #define sonicReadFloatFromStream sonicIntReadFloatFromStream
81*b290403dSRicardo Garcia #define sonicReadShortFromStream sonicIntReadShortFromStream
82*b290403dSRicardo Garcia #define sonicReadUnsignedCharFromStream sonicIntReadUnsignedCharFromStream
83*b290403dSRicardo Garcia #define sonicFlushStream sonicIntFlushStream
84*b290403dSRicardo Garcia #define sonicSamplesAvailable sonicIntSamplesAvailable
85*b290403dSRicardo Garcia #define sonicGetSpeed sonicIntGetSpeed
86*b290403dSRicardo Garcia #define sonicSetSpeed sonicIntSetSpeed
87*b290403dSRicardo Garcia #define sonicGetPitch sonicIntGetPitch
88*b290403dSRicardo Garcia #define sonicSetPitch sonicIntSetPitch
89*b290403dSRicardo Garcia #define sonicGetRate sonicIntGetRate
90*b290403dSRicardo Garcia #define sonicSetRate sonicIntSetRate
91*b290403dSRicardo Garcia #define sonicGetVolume sonicIntGetVolume
92*b290403dSRicardo Garcia #define sonicSetVolume sonicIntSetVolume
93*b290403dSRicardo Garcia #define sonicGetQuality sonicIntGetQuality
94*b290403dSRicardo Garcia #define sonicSetQuality sonicIntSetQuality
95*b290403dSRicardo Garcia #define sonicGetSampleRate sonicIntGetSampleRate
96*b290403dSRicardo Garcia #define sonicSetSampleRate sonicIntSetSampleRate
97*b290403dSRicardo Garcia #define sonicGetNumChannels sonicIntGetNumChannels
98*b290403dSRicardo Garcia #define sonicGetUserData sonicIntGetUserData
99*b290403dSRicardo Garcia #define sonicSetUserData sonicIntSetUserData
100*b290403dSRicardo Garcia #define sonicSetNumChannels sonicIntSetNumChannels
101*b290403dSRicardo Garcia #define sonicChangeFloatSpeed sonicIntChangeFloatSpeed
102*b290403dSRicardo Garcia #define sonicChangeShortSpeed sonicIntChangeShortSpeed
103*b290403dSRicardo Garcia #define sonicEnableNonlinearSpeedup sonicIntEnableNonlinearSpeedup
104*b290403dSRicardo Garcia #define sonicSetDurationFeedbackStrength sonicIntSetDurationFeedbackStrength
105*b290403dSRicardo Garcia #define sonicComputeSpectrogram sonicIntComputeSpectrogram
106*b290403dSRicardo Garcia #define sonicGetSpectrogram sonicIntGetSpectrogram
107*b290403dSRicardo Garcia 
108*b290403dSRicardo Garcia #endif /* SONIC_INTERNAL */
109*b290403dSRicardo Garcia 
110*b290403dSRicardo Garcia /* This specifies the range of voice pitches we try to match.
111*b290403dSRicardo Garcia    Note that if we go lower than 65, we could overflow in findPitchInRange */
112*b290403dSRicardo Garcia #ifndef SONIC_MIN_PITCH
113*b290403dSRicardo Garcia #define SONIC_MIN_PITCH 65
114*b290403dSRicardo Garcia #endif  /* SONIC_MIN_PITCH */
115*b290403dSRicardo Garcia #ifndef SONIC_MAX_PITCH
116*b290403dSRicardo Garcia #define SONIC_MAX_PITCH 400
117*b290403dSRicardo Garcia #endif  /* SONIC_MAX_PITCH */
118*b290403dSRicardo Garcia 
119*b290403dSRicardo Garcia /* These are used to down-sample some inputs to improve speed */
120*b290403dSRicardo Garcia #define SONIC_AMDF_FREQ 4000
121*b290403dSRicardo Garcia 
122*b290403dSRicardo Garcia struct sonicStreamStruct;
123*b290403dSRicardo Garcia typedef struct sonicStreamStruct* sonicStream;
124*b290403dSRicardo Garcia 
125*b290403dSRicardo Garcia /* For all of the following functions, numChannels is multiplied by numSamples
126*b290403dSRicardo Garcia    to determine the actual number of values read or returned. */
127*b290403dSRicardo Garcia 
128*b290403dSRicardo Garcia /* Create a sonic stream.  Return NULL only if we are out of memory and cannot
129*b290403dSRicardo Garcia   allocate the stream. Set numChannels to 1 for mono, and 2 for stereo. */
130*b290403dSRicardo Garcia sonicStream sonicCreateStream(int sampleRate, int numChannels);
131*b290403dSRicardo Garcia /* Destroy the sonic stream. */
132*b290403dSRicardo Garcia void sonicDestroyStream(sonicStream stream);
133*b290403dSRicardo Garcia /* Attach user data to the stream. */
134*b290403dSRicardo Garcia void sonicSetUserData(sonicStream stream, void *userData);
135*b290403dSRicardo Garcia /* Retrieve user data attached to the stream. */
136*b290403dSRicardo Garcia void *sonicGetUserData(sonicStream stream);
137*b290403dSRicardo Garcia /* Use this to write floating point data to be speed up or down into the stream.
138*b290403dSRicardo Garcia    Values must be between -1 and 1.  Return 0 if memory realloc failed,
139*b290403dSRicardo Garcia    otherwise 1 */
140*b290403dSRicardo Garcia int sonicWriteFloatToStream(sonicStream stream, const float* samples, int numSamples);
141*b290403dSRicardo Garcia /* Use this to write 16-bit data to be speed up or down into the stream.
142*b290403dSRicardo Garcia    Return 0 if memory realloc failed, otherwise 1 */
143*b290403dSRicardo Garcia int sonicWriteShortToStream(sonicStream stream, const short* samples, int numSamples);
144*b290403dSRicardo Garcia /* Use this to write 8-bit unsigned data to be speed up or down into the stream.
145*b290403dSRicardo Garcia    Return 0 if memory realloc failed, otherwise 1 */
146*b290403dSRicardo Garcia int sonicWriteUnsignedCharToStream(sonicStream stream, const unsigned char* samples,
147*b290403dSRicardo Garcia                                    int numSamples);
148*b290403dSRicardo Garcia /* Use this to read floating point data out of the stream.  Sometimes no data
149*b290403dSRicardo Garcia    will be available, and zero is returned, which is not an error condition. */
150*b290403dSRicardo Garcia int sonicReadFloatFromStream(sonicStream stream, float* samples,
151*b290403dSRicardo Garcia                              int maxSamples);
152*b290403dSRicardo Garcia /* Use this to read 16-bit data out of the stream.  Sometimes no data will
153*b290403dSRicardo Garcia    be available, and zero is returned, which is not an error condition. */
154*b290403dSRicardo Garcia int sonicReadShortFromStream(sonicStream stream, short* samples,
155*b290403dSRicardo Garcia                              int maxSamples);
156*b290403dSRicardo Garcia /* Use this to read 8-bit unsigned data out of the stream.  Sometimes no data
157*b290403dSRicardo Garcia    will be available, and zero is returned, which is not an error condition. */
158*b290403dSRicardo Garcia int sonicReadUnsignedCharFromStream(sonicStream stream, unsigned char* samples,
159*b290403dSRicardo Garcia                                     int maxSamples);
160*b290403dSRicardo Garcia /* Force the sonic stream to generate output using whatever data it currently
161*b290403dSRicardo Garcia    has.  No extra delay will be added to the output, but flushing in the middle
162*b290403dSRicardo Garcia    of words could introduce distortion. */
163*b290403dSRicardo Garcia int sonicFlushStream(sonicStream stream);
164*b290403dSRicardo Garcia /* Return the number of samples in the output buffer */
165*b290403dSRicardo Garcia int sonicSamplesAvailable(sonicStream stream);
166*b290403dSRicardo Garcia /* Get the speed of the stream. */
167*b290403dSRicardo Garcia float sonicGetSpeed(sonicStream stream);
168*b290403dSRicardo Garcia /* Set the speed of the stream. */
169*b290403dSRicardo Garcia void sonicSetSpeed(sonicStream stream, float speed);
170*b290403dSRicardo Garcia /* Get the pitch of the stream. */
171*b290403dSRicardo Garcia float sonicGetPitch(sonicStream stream);
172*b290403dSRicardo Garcia /* Set the pitch of the stream. */
173*b290403dSRicardo Garcia void sonicSetPitch(sonicStream stream, float pitch);
174*b290403dSRicardo Garcia /* Get the rate of the stream. */
175*b290403dSRicardo Garcia float sonicGetRate(sonicStream stream);
176*b290403dSRicardo Garcia /* Set the rate of the stream. */
177*b290403dSRicardo Garcia void sonicSetRate(sonicStream stream, float rate);
178*b290403dSRicardo Garcia /* Get the scaling factor of the stream. */
179*b290403dSRicardo Garcia float sonicGetVolume(sonicStream stream);
180*b290403dSRicardo Garcia /* Set the scaling factor of the stream. */
181*b290403dSRicardo Garcia void sonicSetVolume(sonicStream stream, float volume);
182*b290403dSRicardo Garcia /* Chord pitch is DEPRECATED.  AFAIK, it was never used by anyone.  These
183*b290403dSRicardo Garcia    functions still exist to avoid breaking existing code. */
184*b290403dSRicardo Garcia /* Get the chord pitch setting. */
185*b290403dSRicardo Garcia int sonicGetChordPitch(sonicStream stream);
186*b290403dSRicardo Garcia /* Set chord pitch mode on or off.  Default is off.  See the documentation
187*b290403dSRicardo Garcia    page for a description of this feature. */
188*b290403dSRicardo Garcia void sonicSetChordPitch(sonicStream stream, int useChordPitch);
189*b290403dSRicardo Garcia /* Get the quality setting. */
190*b290403dSRicardo Garcia int sonicGetQuality(sonicStream stream);
191*b290403dSRicardo Garcia /* Set the "quality".  Default 0 is virtually as good as 1, but very much
192*b290403dSRicardo Garcia  * faster. */
193*b290403dSRicardo Garcia void sonicSetQuality(sonicStream stream, int quality);
194*b290403dSRicardo Garcia /* Get the sample rate of the stream. */
195*b290403dSRicardo Garcia int sonicGetSampleRate(sonicStream stream);
196*b290403dSRicardo Garcia /* Set the sample rate of the stream.  This will drop any samples that have not
197*b290403dSRicardo Garcia  * been read. */
198*b290403dSRicardo Garcia void sonicSetSampleRate(sonicStream stream, int sampleRate);
199*b290403dSRicardo Garcia /* Get the number of channels. */
200*b290403dSRicardo Garcia int sonicGetNumChannels(sonicStream stream);
201*b290403dSRicardo Garcia /* Set the number of channels.  This will drop any samples that have not been
202*b290403dSRicardo Garcia  * read. */
203*b290403dSRicardo Garcia void sonicSetNumChannels(sonicStream stream, int numChannels);
204*b290403dSRicardo Garcia /* This is a non-stream oriented interface to just change the speed of a sound
205*b290403dSRicardo Garcia    sample.  It works in-place on the sample array, so there must be at least
206*b290403dSRicardo Garcia    speed*numSamples available space in the array. Returns the new number of
207*b290403dSRicardo Garcia    samples. */
208*b290403dSRicardo Garcia int sonicChangeFloatSpeed(float* samples, int numSamples, float speed,
209*b290403dSRicardo Garcia                           float pitch, float rate, float volume,
210*b290403dSRicardo Garcia                           int useChordPitch, int sampleRate, int numChannels);
211*b290403dSRicardo Garcia /* This is a non-stream oriented interface to just change the speed of a sound
212*b290403dSRicardo Garcia    sample.  It works in-place on the sample array, so there must be at least
213*b290403dSRicardo Garcia    speed*numSamples available space in the array. Returns the new number of
214*b290403dSRicardo Garcia    samples. */
215*b290403dSRicardo Garcia int sonicChangeShortSpeed(short* samples, int numSamples, float speed,
216*b290403dSRicardo Garcia                           float pitch, float rate, float volume,
217*b290403dSRicardo Garcia                           int useChordPitch, int sampleRate, int numChannels);
218*b290403dSRicardo Garcia 
219*b290403dSRicardo Garcia #ifdef SONIC_SPECTROGRAM
220*b290403dSRicardo Garcia /*
221*b290403dSRicardo Garcia This code generates high quality spectrograms from sound samples, using
222*b290403dSRicardo Garcia Time-Aliased-FFTs as described at:
223*b290403dSRicardo Garcia 
224*b290403dSRicardo Garcia     https://github.com/waywardgeek/spectrogram
225*b290403dSRicardo Garcia 
226*b290403dSRicardo Garcia Basically, two adjacent pitch periods are overlap-added to create a sound
227*b290403dSRicardo Garcia sample that accurately represents the speech sound at that moment in time.
228*b290403dSRicardo Garcia This set of samples is converted to a spetral line using an FFT, and the result
229*b290403dSRicardo Garcia is saved as a single spectral line at that moment in time.  The resulting
230*b290403dSRicardo Garcia spectral lines vary in resolution (it is equal to the number of samples in the
231*b290403dSRicardo Garcia pitch period), and the spacing of spectral lines also varies (proportional to
232*b290403dSRicardo Garcia the numver of samples in the pitch period).
233*b290403dSRicardo Garcia 
234*b290403dSRicardo Garcia To generate a bitmap, linear interpolation is used to render the grayscale
235*b290403dSRicardo Garcia value at any particular point in time and frequency.
236*b290403dSRicardo Garcia */
237*b290403dSRicardo Garcia 
238*b290403dSRicardo Garcia #define SONIC_MAX_SPECTRUM_FREQ 5000
239*b290403dSRicardo Garcia 
240*b290403dSRicardo Garcia struct sonicSpectrogramStruct;
241*b290403dSRicardo Garcia struct sonicBitmapStruct;
242*b290403dSRicardo Garcia typedef struct sonicSpectrogramStruct* sonicSpectrogram;
243*b290403dSRicardo Garcia typedef struct sonicBitmapStruct* sonicBitmap;
244*b290403dSRicardo Garcia 
245*b290403dSRicardo Garcia /* sonicBitmap objects represent spectrograms as grayscale bitmaps where each
246*b290403dSRicardo Garcia    pixel is from 0 (black) to 255 (white).  Bitmaps are rows*cols in size.
247*b290403dSRicardo Garcia    Rows are indexed top to bottom and columns are indexed left to right */
248*b290403dSRicardo Garcia struct sonicBitmapStruct {
249*b290403dSRicardo Garcia   unsigned char* data;
250*b290403dSRicardo Garcia   int numRows;
251*b290403dSRicardo Garcia   int numCols;
252*b290403dSRicardo Garcia };
253*b290403dSRicardo Garcia 
254*b290403dSRicardo Garcia typedef struct sonicBitmapStruct* sonicBitmap;
255*b290403dSRicardo Garcia 
256*b290403dSRicardo Garcia /* Enable coomputation of a spectrogram on the fly. */
257*b290403dSRicardo Garcia void sonicComputeSpectrogram(sonicStream stream);
258*b290403dSRicardo Garcia 
259*b290403dSRicardo Garcia /* Get the spectrogram. */
260*b290403dSRicardo Garcia sonicSpectrogram sonicGetSpectrogram(sonicStream stream);
261*b290403dSRicardo Garcia 
262*b290403dSRicardo Garcia /* Create an empty spectrogram. Called automatically if sonicComputeSpectrogram
263*b290403dSRicardo Garcia    has been called. */
264*b290403dSRicardo Garcia sonicSpectrogram sonicCreateSpectrogram(int sampleRate);
265*b290403dSRicardo Garcia 
266*b290403dSRicardo Garcia /* Destroy the spectrotram.  This is called automatically when calling
267*b290403dSRicardo Garcia    sonicDestroyStream. */
268*b290403dSRicardo Garcia void sonicDestroySpectrogram(sonicSpectrogram spectrogram);
269*b290403dSRicardo Garcia 
270*b290403dSRicardo Garcia /* Convert the spectrogram to a bitmap. Caller must destroy bitmap when done. */
271*b290403dSRicardo Garcia sonicBitmap sonicConvertSpectrogramToBitmap(sonicSpectrogram spectrogram,
272*b290403dSRicardo Garcia                                             int numRows, int numCols);
273*b290403dSRicardo Garcia 
274*b290403dSRicardo Garcia /* Destroy a bitmap returned by sonicConvertSpectrogramToBitmap. */
275*b290403dSRicardo Garcia void sonicDestroyBitmap(sonicBitmap bitmap);
276*b290403dSRicardo Garcia 
277*b290403dSRicardo Garcia int sonicWritePGM(sonicBitmap bitmap, char* fileName);
278*b290403dSRicardo Garcia 
279*b290403dSRicardo Garcia /* Add two pitch periods worth of samples to the spectrogram.  There must be
280*b290403dSRicardo Garcia    2*period samples.  Time should advance one pitch period for each call to
281*b290403dSRicardo Garcia    this function. */
282*b290403dSRicardo Garcia void sonicAddPitchPeriodToSpectrogram(sonicSpectrogram spectrogram,
283*b290403dSRicardo Garcia                                       short* samples, int numSamples,
284*b290403dSRicardo Garcia                                       int numChannels);
285*b290403dSRicardo Garcia #endif  /* SONIC_SPECTROGRAM */
286*b290403dSRicardo Garcia 
287*b290403dSRicardo Garcia #ifdef __cplusplus
288*b290403dSRicardo Garcia }
289*b290403dSRicardo Garcia #endif
290*b290403dSRicardo Garcia 
291*b290403dSRicardo Garcia #endif  /* SONIC_H_ */
292