1*b290403dSRicardo Garcia #ifndef SONIC_H_ 2*b290403dSRicardo Garcia #define SONIC_H_ 3*b290403dSRicardo Garcia 4*b290403dSRicardo Garcia /* Sonic library 5*b290403dSRicardo Garcia Copyright 2010 6*b290403dSRicardo Garcia Bill Cox 7*b290403dSRicardo Garcia This file is part of the Sonic Library. 8*b290403dSRicardo Garcia 9*b290403dSRicardo Garcia This file is licensed under the Apache 2.0 license. 10*b290403dSRicardo Garcia */ 11*b290403dSRicardo Garcia 12*b290403dSRicardo Garcia /* 13*b290403dSRicardo Garcia The Sonic Library implements a new algorithm invented by Bill Cox for the 14*b290403dSRicardo Garcia specific purpose of speeding up speech by high factors at high quality. It 15*b290403dSRicardo Garcia generates smooth speech at speed up factors as high as 6X, possibly more. It is 16*b290403dSRicardo Garcia also capable of slowing down speech, and generates high quality results 17*b290403dSRicardo Garcia regardless of the speed up or slow down factor. For speeding up speech by 2X or 18*b290403dSRicardo Garcia more, the following equation is used: 19*b290403dSRicardo Garcia 20*b290403dSRicardo Garcia newSamples = period/(speed - 1.0) 21*b290403dSRicardo Garcia scale = 1.0/newSamples; 22*b290403dSRicardo Garcia 23*b290403dSRicardo Garcia where period is the current pitch period, determined using AMDF or any other 24*b290403dSRicardo Garcia pitch estimator, and speed is the speedup factor. If the current position in 25*b290403dSRicardo Garcia the input stream is pointed to by "samples", and the current output stream 26*b290403dSRicardo Garcia position is pointed to by "out", then newSamples number of samples can be 27*b290403dSRicardo Garcia generated with: 28*b290403dSRicardo Garcia 29*b290403dSRicardo Garcia out[t] = (samples[t]*(newSamples - t) + samples[t + period]*t)/newSamples; 30*b290403dSRicardo Garcia 31*b290403dSRicardo Garcia where t = 0 to newSamples - 1. 32*b290403dSRicardo Garcia 33*b290403dSRicardo Garcia For speed factors < 2X, the PICOLA algorithm is used. The above 34*b290403dSRicardo Garcia algorithm is first used to double the speed of one pitch period. Then, enough 35*b290403dSRicardo Garcia input is directly copied from the input to the output to achieve the desired 36*b290403dSRicardo Garcia speed up factor, where 1.0 < speed < 2.0. The amount of data copied is derived: 37*b290403dSRicardo Garcia 38*b290403dSRicardo Garcia speed = (2*period + length)/(period + length) 39*b290403dSRicardo Garcia speed*length + speed*period = 2*period + length 40*b290403dSRicardo Garcia length(speed - 1) = 2*period - speed*period 41*b290403dSRicardo Garcia length = period*(2 - speed)/(speed - 1) 42*b290403dSRicardo Garcia 43*b290403dSRicardo Garcia For slowing down speech where 0.5 < speed < 1.0, a pitch period is inserted into 44*b290403dSRicardo Garcia the output twice, and length of input is copied from the input to the output 45*b290403dSRicardo Garcia until the output desired speed is reached. The length of data copied is: 46*b290403dSRicardo Garcia 47*b290403dSRicardo Garcia length = period*(speed - 0.5)/(1 - speed) 48*b290403dSRicardo Garcia 49*b290403dSRicardo Garcia For slow down factors below 0.5, no data is copied, and an algorithm 50*b290403dSRicardo Garcia similar to high speed factors is used. 51*b290403dSRicardo Garcia */ 52*b290403dSRicardo Garcia 53*b290403dSRicardo Garcia /* Uncomment this to use sin-wav based overlap add which in theory can improve 54*b290403dSRicardo Garcia sound quality slightly, at the expense of lots of floating point math. */ 55*b290403dSRicardo Garcia /* #define SONIC_USE_SIN */ 56*b290403dSRicardo Garcia 57*b290403dSRicardo Garcia #ifdef __cplusplus 58*b290403dSRicardo Garcia extern "C" { 59*b290403dSRicardo Garcia #endif 60*b290403dSRicardo Garcia 61*b290403dSRicardo Garcia #ifdef SONIC_INTERNAL 62*b290403dSRicardo Garcia /* The following #define's are used to change the names of the routines defined 63*b290403dSRicardo Garcia * here so that a new library (i.e. speedy) can reuse these names, and then call 64*b290403dSRicardo Garcia * the original names. We do this for two reasons: 1) we don't want to change 65*b290403dSRicardo Garcia * the original API, and 2) we want to add a shim, using the original names and 66*b290403dSRicardo Garcia * still call these routines. 67*b290403dSRicardo Garcia * 68*b290403dSRicardo Garcia * Original users of this API and the libsonic library need to do nothing. The 69*b290403dSRicardo Garcia * original behavior remains. 70*b290403dSRicardo Garcia * 71*b290403dSRicardo Garcia * A new user that add some additional functionality above this library (a shim) 72*b290403dSRicardo Garcia * should #define SONIC_INTERNAL before including this file, undefine all these 73*b290403dSRicardo Garcia * symbols and call the sonicIntXXX functions directly. 74*b290403dSRicardo Garcia */ 75*b290403dSRicardo Garcia #define sonicCreateStream sonicIntCreateStream 76*b290403dSRicardo Garcia #define sonicDestroyStream sonicIntDestroyStream 77*b290403dSRicardo Garcia #define sonicWriteFloatToStream sonicIntWriteFloatToStream 78*b290403dSRicardo Garcia #define sonicWriteShortToStream sonicIntWriteShortToStream 79*b290403dSRicardo Garcia #define sonicWriteUnsignedCharToStream sonicIntWriteUnsignedCharToStream 80*b290403dSRicardo Garcia #define sonicReadFloatFromStream sonicIntReadFloatFromStream 81*b290403dSRicardo Garcia #define sonicReadShortFromStream sonicIntReadShortFromStream 82*b290403dSRicardo Garcia #define sonicReadUnsignedCharFromStream sonicIntReadUnsignedCharFromStream 83*b290403dSRicardo Garcia #define sonicFlushStream sonicIntFlushStream 84*b290403dSRicardo Garcia #define sonicSamplesAvailable sonicIntSamplesAvailable 85*b290403dSRicardo Garcia #define sonicGetSpeed sonicIntGetSpeed 86*b290403dSRicardo Garcia #define sonicSetSpeed sonicIntSetSpeed 87*b290403dSRicardo Garcia #define sonicGetPitch sonicIntGetPitch 88*b290403dSRicardo Garcia #define sonicSetPitch sonicIntSetPitch 89*b290403dSRicardo Garcia #define sonicGetRate sonicIntGetRate 90*b290403dSRicardo Garcia #define sonicSetRate sonicIntSetRate 91*b290403dSRicardo Garcia #define sonicGetVolume sonicIntGetVolume 92*b290403dSRicardo Garcia #define sonicSetVolume sonicIntSetVolume 93*b290403dSRicardo Garcia #define sonicGetQuality sonicIntGetQuality 94*b290403dSRicardo Garcia #define sonicSetQuality sonicIntSetQuality 95*b290403dSRicardo Garcia #define sonicGetSampleRate sonicIntGetSampleRate 96*b290403dSRicardo Garcia #define sonicSetSampleRate sonicIntSetSampleRate 97*b290403dSRicardo Garcia #define sonicGetNumChannels sonicIntGetNumChannels 98*b290403dSRicardo Garcia #define sonicGetUserData sonicIntGetUserData 99*b290403dSRicardo Garcia #define sonicSetUserData sonicIntSetUserData 100*b290403dSRicardo Garcia #define sonicSetNumChannels sonicIntSetNumChannels 101*b290403dSRicardo Garcia #define sonicChangeFloatSpeed sonicIntChangeFloatSpeed 102*b290403dSRicardo Garcia #define sonicChangeShortSpeed sonicIntChangeShortSpeed 103*b290403dSRicardo Garcia #define sonicEnableNonlinearSpeedup sonicIntEnableNonlinearSpeedup 104*b290403dSRicardo Garcia #define sonicSetDurationFeedbackStrength sonicIntSetDurationFeedbackStrength 105*b290403dSRicardo Garcia #define sonicComputeSpectrogram sonicIntComputeSpectrogram 106*b290403dSRicardo Garcia #define sonicGetSpectrogram sonicIntGetSpectrogram 107*b290403dSRicardo Garcia 108*b290403dSRicardo Garcia #endif /* SONIC_INTERNAL */ 109*b290403dSRicardo Garcia 110*b290403dSRicardo Garcia /* This specifies the range of voice pitches we try to match. 111*b290403dSRicardo Garcia Note that if we go lower than 65, we could overflow in findPitchInRange */ 112*b290403dSRicardo Garcia #ifndef SONIC_MIN_PITCH 113*b290403dSRicardo Garcia #define SONIC_MIN_PITCH 65 114*b290403dSRicardo Garcia #endif /* SONIC_MIN_PITCH */ 115*b290403dSRicardo Garcia #ifndef SONIC_MAX_PITCH 116*b290403dSRicardo Garcia #define SONIC_MAX_PITCH 400 117*b290403dSRicardo Garcia #endif /* SONIC_MAX_PITCH */ 118*b290403dSRicardo Garcia 119*b290403dSRicardo Garcia /* These are used to down-sample some inputs to improve speed */ 120*b290403dSRicardo Garcia #define SONIC_AMDF_FREQ 4000 121*b290403dSRicardo Garcia 122*b290403dSRicardo Garcia struct sonicStreamStruct; 123*b290403dSRicardo Garcia typedef struct sonicStreamStruct* sonicStream; 124*b290403dSRicardo Garcia 125*b290403dSRicardo Garcia /* For all of the following functions, numChannels is multiplied by numSamples 126*b290403dSRicardo Garcia to determine the actual number of values read or returned. */ 127*b290403dSRicardo Garcia 128*b290403dSRicardo Garcia /* Create a sonic stream. Return NULL only if we are out of memory and cannot 129*b290403dSRicardo Garcia allocate the stream. Set numChannels to 1 for mono, and 2 for stereo. */ 130*b290403dSRicardo Garcia sonicStream sonicCreateStream(int sampleRate, int numChannels); 131*b290403dSRicardo Garcia /* Destroy the sonic stream. */ 132*b290403dSRicardo Garcia void sonicDestroyStream(sonicStream stream); 133*b290403dSRicardo Garcia /* Attach user data to the stream. */ 134*b290403dSRicardo Garcia void sonicSetUserData(sonicStream stream, void *userData); 135*b290403dSRicardo Garcia /* Retrieve user data attached to the stream. */ 136*b290403dSRicardo Garcia void *sonicGetUserData(sonicStream stream); 137*b290403dSRicardo Garcia /* Use this to write floating point data to be speed up or down into the stream. 138*b290403dSRicardo Garcia Values must be between -1 and 1. Return 0 if memory realloc failed, 139*b290403dSRicardo Garcia otherwise 1 */ 140*b290403dSRicardo Garcia int sonicWriteFloatToStream(sonicStream stream, const float* samples, int numSamples); 141*b290403dSRicardo Garcia /* Use this to write 16-bit data to be speed up or down into the stream. 142*b290403dSRicardo Garcia Return 0 if memory realloc failed, otherwise 1 */ 143*b290403dSRicardo Garcia int sonicWriteShortToStream(sonicStream stream, const short* samples, int numSamples); 144*b290403dSRicardo Garcia /* Use this to write 8-bit unsigned data to be speed up or down into the stream. 145*b290403dSRicardo Garcia Return 0 if memory realloc failed, otherwise 1 */ 146*b290403dSRicardo Garcia int sonicWriteUnsignedCharToStream(sonicStream stream, const unsigned char* samples, 147*b290403dSRicardo Garcia int numSamples); 148*b290403dSRicardo Garcia /* Use this to read floating point data out of the stream. Sometimes no data 149*b290403dSRicardo Garcia will be available, and zero is returned, which is not an error condition. */ 150*b290403dSRicardo Garcia int sonicReadFloatFromStream(sonicStream stream, float* samples, 151*b290403dSRicardo Garcia int maxSamples); 152*b290403dSRicardo Garcia /* Use this to read 16-bit data out of the stream. Sometimes no data will 153*b290403dSRicardo Garcia be available, and zero is returned, which is not an error condition. */ 154*b290403dSRicardo Garcia int sonicReadShortFromStream(sonicStream stream, short* samples, 155*b290403dSRicardo Garcia int maxSamples); 156*b290403dSRicardo Garcia /* Use this to read 8-bit unsigned data out of the stream. Sometimes no data 157*b290403dSRicardo Garcia will be available, and zero is returned, which is not an error condition. */ 158*b290403dSRicardo Garcia int sonicReadUnsignedCharFromStream(sonicStream stream, unsigned char* samples, 159*b290403dSRicardo Garcia int maxSamples); 160*b290403dSRicardo Garcia /* Force the sonic stream to generate output using whatever data it currently 161*b290403dSRicardo Garcia has. No extra delay will be added to the output, but flushing in the middle 162*b290403dSRicardo Garcia of words could introduce distortion. */ 163*b290403dSRicardo Garcia int sonicFlushStream(sonicStream stream); 164*b290403dSRicardo Garcia /* Return the number of samples in the output buffer */ 165*b290403dSRicardo Garcia int sonicSamplesAvailable(sonicStream stream); 166*b290403dSRicardo Garcia /* Get the speed of the stream. */ 167*b290403dSRicardo Garcia float sonicGetSpeed(sonicStream stream); 168*b290403dSRicardo Garcia /* Set the speed of the stream. */ 169*b290403dSRicardo Garcia void sonicSetSpeed(sonicStream stream, float speed); 170*b290403dSRicardo Garcia /* Get the pitch of the stream. */ 171*b290403dSRicardo Garcia float sonicGetPitch(sonicStream stream); 172*b290403dSRicardo Garcia /* Set the pitch of the stream. */ 173*b290403dSRicardo Garcia void sonicSetPitch(sonicStream stream, float pitch); 174*b290403dSRicardo Garcia /* Get the rate of the stream. */ 175*b290403dSRicardo Garcia float sonicGetRate(sonicStream stream); 176*b290403dSRicardo Garcia /* Set the rate of the stream. */ 177*b290403dSRicardo Garcia void sonicSetRate(sonicStream stream, float rate); 178*b290403dSRicardo Garcia /* Get the scaling factor of the stream. */ 179*b290403dSRicardo Garcia float sonicGetVolume(sonicStream stream); 180*b290403dSRicardo Garcia /* Set the scaling factor of the stream. */ 181*b290403dSRicardo Garcia void sonicSetVolume(sonicStream stream, float volume); 182*b290403dSRicardo Garcia /* Chord pitch is DEPRECATED. AFAIK, it was never used by anyone. These 183*b290403dSRicardo Garcia functions still exist to avoid breaking existing code. */ 184*b290403dSRicardo Garcia /* Get the chord pitch setting. */ 185*b290403dSRicardo Garcia int sonicGetChordPitch(sonicStream stream); 186*b290403dSRicardo Garcia /* Set chord pitch mode on or off. Default is off. See the documentation 187*b290403dSRicardo Garcia page for a description of this feature. */ 188*b290403dSRicardo Garcia void sonicSetChordPitch(sonicStream stream, int useChordPitch); 189*b290403dSRicardo Garcia /* Get the quality setting. */ 190*b290403dSRicardo Garcia int sonicGetQuality(sonicStream stream); 191*b290403dSRicardo Garcia /* Set the "quality". Default 0 is virtually as good as 1, but very much 192*b290403dSRicardo Garcia * faster. */ 193*b290403dSRicardo Garcia void sonicSetQuality(sonicStream stream, int quality); 194*b290403dSRicardo Garcia /* Get the sample rate of the stream. */ 195*b290403dSRicardo Garcia int sonicGetSampleRate(sonicStream stream); 196*b290403dSRicardo Garcia /* Set the sample rate of the stream. This will drop any samples that have not 197*b290403dSRicardo Garcia * been read. */ 198*b290403dSRicardo Garcia void sonicSetSampleRate(sonicStream stream, int sampleRate); 199*b290403dSRicardo Garcia /* Get the number of channels. */ 200*b290403dSRicardo Garcia int sonicGetNumChannels(sonicStream stream); 201*b290403dSRicardo Garcia /* Set the number of channels. This will drop any samples that have not been 202*b290403dSRicardo Garcia * read. */ 203*b290403dSRicardo Garcia void sonicSetNumChannels(sonicStream stream, int numChannels); 204*b290403dSRicardo Garcia /* This is a non-stream oriented interface to just change the speed of a sound 205*b290403dSRicardo Garcia sample. It works in-place on the sample array, so there must be at least 206*b290403dSRicardo Garcia speed*numSamples available space in the array. Returns the new number of 207*b290403dSRicardo Garcia samples. */ 208*b290403dSRicardo Garcia int sonicChangeFloatSpeed(float* samples, int numSamples, float speed, 209*b290403dSRicardo Garcia float pitch, float rate, float volume, 210*b290403dSRicardo Garcia int useChordPitch, int sampleRate, int numChannels); 211*b290403dSRicardo Garcia /* This is a non-stream oriented interface to just change the speed of a sound 212*b290403dSRicardo Garcia sample. It works in-place on the sample array, so there must be at least 213*b290403dSRicardo Garcia speed*numSamples available space in the array. Returns the new number of 214*b290403dSRicardo Garcia samples. */ 215*b290403dSRicardo Garcia int sonicChangeShortSpeed(short* samples, int numSamples, float speed, 216*b290403dSRicardo Garcia float pitch, float rate, float volume, 217*b290403dSRicardo Garcia int useChordPitch, int sampleRate, int numChannels); 218*b290403dSRicardo Garcia 219*b290403dSRicardo Garcia #ifdef SONIC_SPECTROGRAM 220*b290403dSRicardo Garcia /* 221*b290403dSRicardo Garcia This code generates high quality spectrograms from sound samples, using 222*b290403dSRicardo Garcia Time-Aliased-FFTs as described at: 223*b290403dSRicardo Garcia 224*b290403dSRicardo Garcia https://github.com/waywardgeek/spectrogram 225*b290403dSRicardo Garcia 226*b290403dSRicardo Garcia Basically, two adjacent pitch periods are overlap-added to create a sound 227*b290403dSRicardo Garcia sample that accurately represents the speech sound at that moment in time. 228*b290403dSRicardo Garcia This set of samples is converted to a spetral line using an FFT, and the result 229*b290403dSRicardo Garcia is saved as a single spectral line at that moment in time. The resulting 230*b290403dSRicardo Garcia spectral lines vary in resolution (it is equal to the number of samples in the 231*b290403dSRicardo Garcia pitch period), and the spacing of spectral lines also varies (proportional to 232*b290403dSRicardo Garcia the numver of samples in the pitch period). 233*b290403dSRicardo Garcia 234*b290403dSRicardo Garcia To generate a bitmap, linear interpolation is used to render the grayscale 235*b290403dSRicardo Garcia value at any particular point in time and frequency. 236*b290403dSRicardo Garcia */ 237*b290403dSRicardo Garcia 238*b290403dSRicardo Garcia #define SONIC_MAX_SPECTRUM_FREQ 5000 239*b290403dSRicardo Garcia 240*b290403dSRicardo Garcia struct sonicSpectrogramStruct; 241*b290403dSRicardo Garcia struct sonicBitmapStruct; 242*b290403dSRicardo Garcia typedef struct sonicSpectrogramStruct* sonicSpectrogram; 243*b290403dSRicardo Garcia typedef struct sonicBitmapStruct* sonicBitmap; 244*b290403dSRicardo Garcia 245*b290403dSRicardo Garcia /* sonicBitmap objects represent spectrograms as grayscale bitmaps where each 246*b290403dSRicardo Garcia pixel is from 0 (black) to 255 (white). Bitmaps are rows*cols in size. 247*b290403dSRicardo Garcia Rows are indexed top to bottom and columns are indexed left to right */ 248*b290403dSRicardo Garcia struct sonicBitmapStruct { 249*b290403dSRicardo Garcia unsigned char* data; 250*b290403dSRicardo Garcia int numRows; 251*b290403dSRicardo Garcia int numCols; 252*b290403dSRicardo Garcia }; 253*b290403dSRicardo Garcia 254*b290403dSRicardo Garcia typedef struct sonicBitmapStruct* sonicBitmap; 255*b290403dSRicardo Garcia 256*b290403dSRicardo Garcia /* Enable coomputation of a spectrogram on the fly. */ 257*b290403dSRicardo Garcia void sonicComputeSpectrogram(sonicStream stream); 258*b290403dSRicardo Garcia 259*b290403dSRicardo Garcia /* Get the spectrogram. */ 260*b290403dSRicardo Garcia sonicSpectrogram sonicGetSpectrogram(sonicStream stream); 261*b290403dSRicardo Garcia 262*b290403dSRicardo Garcia /* Create an empty spectrogram. Called automatically if sonicComputeSpectrogram 263*b290403dSRicardo Garcia has been called. */ 264*b290403dSRicardo Garcia sonicSpectrogram sonicCreateSpectrogram(int sampleRate); 265*b290403dSRicardo Garcia 266*b290403dSRicardo Garcia /* Destroy the spectrotram. This is called automatically when calling 267*b290403dSRicardo Garcia sonicDestroyStream. */ 268*b290403dSRicardo Garcia void sonicDestroySpectrogram(sonicSpectrogram spectrogram); 269*b290403dSRicardo Garcia 270*b290403dSRicardo Garcia /* Convert the spectrogram to a bitmap. Caller must destroy bitmap when done. */ 271*b290403dSRicardo Garcia sonicBitmap sonicConvertSpectrogramToBitmap(sonicSpectrogram spectrogram, 272*b290403dSRicardo Garcia int numRows, int numCols); 273*b290403dSRicardo Garcia 274*b290403dSRicardo Garcia /* Destroy a bitmap returned by sonicConvertSpectrogramToBitmap. */ 275*b290403dSRicardo Garcia void sonicDestroyBitmap(sonicBitmap bitmap); 276*b290403dSRicardo Garcia 277*b290403dSRicardo Garcia int sonicWritePGM(sonicBitmap bitmap, char* fileName); 278*b290403dSRicardo Garcia 279*b290403dSRicardo Garcia /* Add two pitch periods worth of samples to the spectrogram. There must be 280*b290403dSRicardo Garcia 2*period samples. Time should advance one pitch period for each call to 281*b290403dSRicardo Garcia this function. */ 282*b290403dSRicardo Garcia void sonicAddPitchPeriodToSpectrogram(sonicSpectrogram spectrogram, 283*b290403dSRicardo Garcia short* samples, int numSamples, 284*b290403dSRicardo Garcia int numChannels); 285*b290403dSRicardo Garcia #endif /* SONIC_SPECTROGRAM */ 286*b290403dSRicardo Garcia 287*b290403dSRicardo Garcia #ifdef __cplusplus 288*b290403dSRicardo Garcia } 289*b290403dSRicardo Garcia #endif 290*b290403dSRicardo Garcia 291*b290403dSRicardo Garcia #endif /* SONIC_H_ */ 292