xref: /aosp_15_r20/external/sonic/Sonic.java (revision b290403dc9d28f89f133eb7e190ea8185d440ecd)
1 /* Sonic library
2    Copyright 2010, 2011
3    Bill Cox
4    This file is part of the Sonic Library.
5 
6    This file is licensed under the Apache 2.0 license.
7 */
8 
9 package sonic;
10 
11 public class Sonic {
12 
13     private static final int SONIC_MIN_PITCH = 65;
14     private static final int SONIC_MAX_PITCH = 400;
15     // This is used to down-sample some inputs to improve speed
16     private static final int SONIC_AMDF_FREQ = 4000;
17     // The number of points to use in the sinc FIR filter for resampling.
18     private static final int SINC_FILTER_POINTS = 12;
19     private static final int SINC_TABLE_SIZE = 601;
20 
21     // Lookup table for windowed sinc function of SINC_FILTER_POINTS points.
22     // The code to generate this is in the header comment of sonic.c.
23     private static final short sincTable[] = {
24         0, 0, 0, 0, 0, 0, 0, -1, -1, -2, -2, -3, -4, -6, -7, -9, -10, -12, -14,
25         -17, -19, -21, -24, -26, -29, -32, -34, -37, -40, -42, -44, -47, -48, -50,
26         -51, -52, -53, -53, -53, -52, -50, -48, -46, -43, -39, -34, -29, -22, -16,
27         -8, 0, 9, 19, 29, 41, 53, 65, 79, 92, 107, 121, 137, 152, 168, 184, 200,
28         215, 231, 247, 262, 276, 291, 304, 317, 328, 339, 348, 357, 363, 369, 372,
29         374, 375, 373, 369, 363, 355, 345, 332, 318, 300, 281, 259, 234, 208, 178,
30         147, 113, 77, 39, 0, -41, -85, -130, -177, -225, -274, -324, -375, -426,
31         -478, -530, -581, -632, -682, -731, -779, -825, -870, -912, -951, -989,
32         -1023, -1053, -1080, -1104, -1123, -1138, -1149, -1154, -1155, -1151,
33         -1141, -1125, -1105, -1078, -1046, -1007, -963, -913, -857, -796, -728,
34         -655, -576, -492, -403, -309, -210, -107, 0, 111, 225, 342, 462, 584, 708,
35         833, 958, 1084, 1209, 1333, 1455, 1575, 1693, 1807, 1916, 2022, 2122, 2216,
36         2304, 2384, 2457, 2522, 2579, 2625, 2663, 2689, 2706, 2711, 2705, 2687,
37         2657, 2614, 2559, 2491, 2411, 2317, 2211, 2092, 1960, 1815, 1658, 1489,
38         1308, 1115, 912, 698, 474, 241, 0, -249, -506, -769, -1037, -1310, -1586,
39         -1864, -2144, -2424, -2703, -2980, -3254, -3523, -3787, -4043, -4291,
40         -4529, -4757, -4972, -5174, -5360, -5531, -5685, -5819, -5935, -6029,
41         -6101, -6150, -6175, -6175, -6149, -6096, -6015, -5905, -5767, -5599,
42         -5401, -5172, -4912, -4621, -4298, -3944, -3558, -3141, -2693, -2214,
43         -1705, -1166, -597, 0, 625, 1277, 1955, 2658, 3386, 4135, 4906, 5697, 6506,
44         7332, 8173, 9027, 9893, 10769, 11654, 12544, 13439, 14335, 15232, 16128,
45         17019, 17904, 18782, 19649, 20504, 21345, 22170, 22977, 23763, 24527,
46         25268, 25982, 26669, 27327, 27953, 28547, 29107, 29632, 30119, 30569,
47         30979, 31349, 31678, 31964, 32208, 32408, 32565, 32677, 32744, 32767,
48         32744, 32677, 32565, 32408, 32208, 31964, 31678, 31349, 30979, 30569,
49         30119, 29632, 29107, 28547, 27953, 27327, 26669, 25982, 25268, 24527,
50         23763, 22977, 22170, 21345, 20504, 19649, 18782, 17904, 17019, 16128,
51         15232, 14335, 13439, 12544, 11654, 10769, 9893, 9027, 8173, 7332, 6506,
52         5697, 4906, 4135, 3386, 2658, 1955, 1277, 625, 0, -597, -1166, -1705,
53         -2214, -2693, -3141, -3558, -3944, -4298, -4621, -4912, -5172, -5401,
54         -5599, -5767, -5905, -6015, -6096, -6149, -6175, -6175, -6150, -6101,
55         -6029, -5935, -5819, -5685, -5531, -5360, -5174, -4972, -4757, -4529,
56         -4291, -4043, -3787, -3523, -3254, -2980, -2703, -2424, -2144, -1864,
57         -1586, -1310, -1037, -769, -506, -249, 0, 241, 474, 698, 912, 1115, 1308,
58         1489, 1658, 1815, 1960, 2092, 2211, 2317, 2411, 2491, 2559, 2614, 2657,
59         2687, 2705, 2711, 2706, 2689, 2663, 2625, 2579, 2522, 2457, 2384, 2304,
60         2216, 2122, 2022, 1916, 1807, 1693, 1575, 1455, 1333, 1209, 1084, 958, 833,
61         708, 584, 462, 342, 225, 111, 0, -107, -210, -309, -403, -492, -576, -655,
62         -728, -796, -857, -913, -963, -1007, -1046, -1078, -1105, -1125, -1141,
63         -1151, -1155, -1154, -1149, -1138, -1123, -1104, -1080, -1053, -1023, -989,
64         -951, -912, -870, -825, -779, -731, -682, -632, -581, -530, -478, -426,
65         -375, -324, -274, -225, -177, -130, -85, -41, 0, 39, 77, 113, 147, 178,
66         208, 234, 259, 281, 300, 318, 332, 345, 355, 363, 369, 373, 375, 374, 372,
67         369, 363, 357, 348, 339, 328, 317, 304, 291, 276, 262, 247, 231, 215, 200,
68         184, 168, 152, 137, 121, 107, 92, 79, 65, 53, 41, 29, 19, 9, 0, -8, -16,
69         -22, -29, -34, -39, -43, -46, -48, -50, -52, -53, -53, -53, -52, -51, -50,
70         -48, -47, -44, -42, -40, -37, -34, -32, -29, -26, -24, -21, -19, -17, -14,
71         -12, -10, -9, -7, -6, -4, -3, -2, -2, -1, -1, 0, 0, 0, 0, 0, 0, 0
72     };
73 
74     private short inputBuffer[];
75     private short outputBuffer[];
76     private short pitchBuffer[];
77     private short downSampleBuffer[];
78     private float speed;
79     private float volume;
80     private float pitch;
81     private float rate;
82     private int oldRatePosition;
83     private int newRatePosition;
84     private boolean useChordPitch;
85     private int quality;
86     private int numChannels;
87     private int inputBufferSize;
88     private int pitchBufferSize;
89     private int outputBufferSize;
90     private int numInputSamples;
91     private int numOutputSamples;
92     private int numPitchSamples;
93     private int minPeriod;
94     private int maxPeriod;
95     private int maxRequired;
96     private int remainingInputToCopy;
97     private int sampleRate;
98     private int prevPeriod;
99     private int prevMinDiff;
100     private int minDiff;
101     private int maxDiff;
102 
103     // Resize the array.
resize( short[] oldArray, int newLength)104     private short[] resize(
105         short[] oldArray,
106         int newLength)
107     {
108         newLength *= numChannels;
109         short[]        newArray = new short[newLength];
110         int length = oldArray.length <= newLength? oldArray.length : newLength;
111 
112         System.arraycopy(oldArray, 0, newArray, 0, length);
113         return newArray;
114     }
115 
116     // Move samples from one array to another.  May move samples down within an array, but not up.
move( short dest[], int destPos, short source[], int sourcePos, int numSamples)117     private void move(
118         short dest[],
119         int destPos,
120         short source[],
121         int sourcePos,
122         int numSamples)
123     {
124         System.arraycopy(source, sourcePos*numChannels, dest, destPos*numChannels, numSamples*numChannels);
125     }
126 
127     // Scale the samples by the factor.
scaleSamples( short samples[], int position, int numSamples, float volume)128     private void scaleSamples(
129         short samples[],
130         int position,
131         int numSamples,
132         float volume)
133     {
134         // Convert volume to fixed-point, with a 12 bit fraction.
135         int fixedPointVolume = (int)(volume*4096.0f);
136         int start = position*numChannels;
137         int stop = start + numSamples*numChannels;
138 
139         for(int xSample = start; xSample < stop; xSample++) {
140             // Convert back from fixed point to 16-bit integer.
141             int value = (samples[xSample]*fixedPointVolume) >> 12;
142             if(value > 32767) {
143                 value = 32767;
144             } else if(value < -32767) {
145                 value = -32767;
146             }
147             samples[xSample] = (short)value;
148         }
149     }
150 
151     // Get the speed of the stream.
getSpeed()152     public float getSpeed()
153     {
154         return speed;
155     }
156 
157     // Set the speed of the stream.
setSpeed( float speed)158     public void setSpeed(
159         float speed)
160     {
161         this.speed = speed;
162     }
163 
164     // Get the pitch of the stream.
getPitch()165     public float getPitch()
166     {
167         return pitch;
168     }
169 
170     // Set the pitch of the stream.
setPitch( float pitch)171     public void setPitch(
172         float pitch)
173     {
174         this.pitch = pitch;
175     }
176 
177     // Get the rate of the stream.
getRate()178     public float getRate()
179     {
180         return rate;
181     }
182 
183     // Set the playback rate of the stream. This scales pitch and speed at the same time.
setRate( float rate)184     public void setRate(
185         float rate)
186     {
187         this.rate = rate;
188         this.oldRatePosition = 0;
189         this.newRatePosition = 0;
190     }
191 
192     // Get the vocal chord pitch setting.
getChordPitch()193     public boolean getChordPitch()
194     {
195         return useChordPitch;
196     }
197 
198     // Set the vocal chord mode for pitch computation.  Default is off.
setChordPitch( boolean useChordPitch)199     public void setChordPitch(
200         boolean useChordPitch)
201     {
202         this.useChordPitch = useChordPitch;
203     }
204 
205     // Get the quality setting.
getQuality()206     public int getQuality()
207     {
208         return quality;
209     }
210 
211     // Set the "quality".  Default 0 is virtually as good as 1, but very much faster.
setQuality( int quality)212     public void setQuality(
213         int quality)
214     {
215         this.quality = quality;
216     }
217 
218     // Get the scaling factor of the stream.
getVolume()219     public float getVolume()
220     {
221         return volume;
222     }
223 
224     // Set the scaling factor of the stream.
setVolume( float volume)225     public void setVolume(
226         float volume)
227     {
228         this.volume = volume;
229     }
230 
231     // Allocate stream buffers.
allocateStreamBuffers( int sampleRate, int numChannels)232     private void allocateStreamBuffers(
233         int sampleRate,
234         int numChannels)
235     {
236         minPeriod = sampleRate/SONIC_MAX_PITCH;
237         maxPeriod = sampleRate/SONIC_MIN_PITCH;
238         maxRequired = 2*maxPeriod;
239         inputBufferSize = maxRequired;
240         inputBuffer = new short[maxRequired*numChannels];
241         outputBufferSize = maxRequired;
242         outputBuffer = new short[maxRequired*numChannels];
243         pitchBufferSize = maxRequired;
244         pitchBuffer = new short[maxRequired*numChannels];
245         downSampleBuffer = new short[maxRequired];
246         this.sampleRate = sampleRate;
247         this.numChannels = numChannels;
248         oldRatePosition = 0;
249         newRatePosition = 0;
250         prevPeriod = 0;
251     }
252 
253     // Create a sonic stream.
Sonic( int sampleRate, int numChannels)254     public Sonic(
255         int sampleRate,
256         int numChannels)
257     {
258         allocateStreamBuffers(sampleRate, numChannels);
259         speed = 1.0f;
260         pitch = 1.0f;
261         volume = 1.0f;
262         rate = 1.0f;
263         oldRatePosition = 0;
264         newRatePosition = 0;
265         useChordPitch = false;
266         quality = 0;
267     }
268 
269     // Get the sample rate of the stream.
getSampleRate()270     public int getSampleRate()
271     {
272         return sampleRate;
273     }
274 
275     // Set the sample rate of the stream.  This will cause samples buffered in the stream to be lost.
setSampleRate( int sampleRate)276     public void setSampleRate(
277         int sampleRate)
278     {
279         allocateStreamBuffers(sampleRate, numChannels);
280     }
281 
282     // Get the number of channels.
getNumChannels()283     public int getNumChannels()
284     {
285         return numChannels;
286     }
287 
288     // Set the num channels of the stream.  This will cause samples buffered in the stream to be lost.
setNumChannels( int numChannels)289     public void setNumChannels(
290         int numChannels)
291     {
292         allocateStreamBuffers(sampleRate, numChannels);
293     }
294 
295     // Enlarge the output buffer if needed.
enlargeOutputBufferIfNeeded( int numSamples)296     private void enlargeOutputBufferIfNeeded(
297         int numSamples)
298     {
299         if(numOutputSamples + numSamples > outputBufferSize) {
300             outputBufferSize += (outputBufferSize >> 1) + numSamples;
301             outputBuffer = resize(outputBuffer, outputBufferSize);
302         }
303     }
304 
305     // Enlarge the input buffer if needed.
enlargeInputBufferIfNeeded( int numSamples)306     private void enlargeInputBufferIfNeeded(
307         int numSamples)
308     {
309         if(numInputSamples + numSamples > inputBufferSize) {
310             inputBufferSize += (inputBufferSize >> 1) + numSamples;
311             inputBuffer = resize(inputBuffer, inputBufferSize);
312         }
313     }
314 
315     // Add the input samples to the input buffer.
addFloatSamplesToInputBuffer( float samples[], int numSamples)316     private void addFloatSamplesToInputBuffer(
317         float samples[],
318         int numSamples)
319     {
320         if(numSamples == 0) {
321             return;
322         }
323         enlargeInputBufferIfNeeded(numSamples);
324         int xBuffer = numInputSamples*numChannels;
325         for(int xSample = 0; xSample < numSamples*numChannels; xSample++) {
326             inputBuffer[xBuffer++] = (short)(samples[xSample]*32767.0f);
327         }
328         numInputSamples += numSamples;
329     }
330 
331     // Add the input samples to the input buffer.
addShortSamplesToInputBuffer( short samples[], int numSamples)332     private void addShortSamplesToInputBuffer(
333         short samples[],
334         int numSamples)
335     {
336         if(numSamples == 0) {
337             return;
338         }
339         enlargeInputBufferIfNeeded(numSamples);
340         move(inputBuffer, numInputSamples, samples, 0, numSamples);
341         numInputSamples += numSamples;
342     }
343 
344     // Add the input samples to the input buffer.
addUnsignedByteSamplesToInputBuffer( byte samples[], int numSamples)345     private void addUnsignedByteSamplesToInputBuffer(
346         byte samples[],
347         int numSamples)
348     {
349         short sample;
350 
351         enlargeInputBufferIfNeeded(numSamples);
352         int xBuffer = numInputSamples*numChannels;
353         for(int xSample = 0; xSample < numSamples*numChannels; xSample++) {
354                 sample = (short)((samples[xSample] & 0xff) - 128); // Convert from unsigned to signed
355             inputBuffer[xBuffer++] = (short) (sample << 8);
356         }
357         numInputSamples += numSamples;
358     }
359 
360     // Add the input samples to the input buffer.  They must be 16-bit little-endian encoded in a byte array.
addBytesToInputBuffer( byte inBuffer[], int numBytes)361     private void addBytesToInputBuffer(
362         byte inBuffer[],
363         int numBytes)
364     {
365             int numSamples = numBytes/(2*numChannels);
366         short sample;
367 
368         enlargeInputBufferIfNeeded(numSamples);
369         int xBuffer = numInputSamples*numChannels;
370         for(int xByte = 0; xByte + 1 < numBytes; xByte += 2) {
371                 sample = (short)((inBuffer[xByte] & 0xff) | (inBuffer[xByte + 1] << 8));
372             inputBuffer[xBuffer++] = sample;
373         }
374         numInputSamples += numSamples;
375     }
376 
377     // Remove input samples that we have already processed.
removeInputSamples( int position)378     private void removeInputSamples(
379         int position)
380     {
381         int remainingSamples = numInputSamples - position;
382 
383         move(inputBuffer, 0, inputBuffer, position, remainingSamples);
384         numInputSamples = remainingSamples;
385     }
386 
387     // Just copy from the array to the output buffer
copyToOutput( short samples[], int position, int numSamples)388     private void copyToOutput(
389         short samples[],
390         int position,
391         int numSamples)
392     {
393         enlargeOutputBufferIfNeeded(numSamples);
394         move(outputBuffer, numOutputSamples, samples, position, numSamples);
395         numOutputSamples += numSamples;
396     }
397 
398     // Just copy from the input buffer to the output buffer.  Return num samples copied.
copyInputToOutput( int position)399     private int copyInputToOutput(
400         int position)
401     {
402         int numSamples = remainingInputToCopy;
403 
404         if(numSamples > maxRequired) {
405             numSamples = maxRequired;
406         }
407         copyToOutput(inputBuffer, position, numSamples);
408         remainingInputToCopy -= numSamples;
409         return numSamples;
410     }
411 
412     // Read data out of the stream.  Sometimes no data will be available, and zero
413     // is returned, which is not an error condition.
readFloatFromStream( float samples[], int maxSamples)414     public int readFloatFromStream(
415         float samples[],
416         int maxSamples)
417     {
418         int numSamples = numOutputSamples;
419         int remainingSamples = 0;
420 
421         if(numSamples == 0) {
422             return 0;
423         }
424         if(numSamples > maxSamples) {
425             remainingSamples = numSamples - maxSamples;
426             numSamples = maxSamples;
427         }
428         for(int xSample = 0; xSample < numSamples*numChannels; xSample++) {
429             samples[xSample] = (outputBuffer[xSample])/32767.0f;
430         }
431         move(outputBuffer, 0, outputBuffer, numSamples, remainingSamples);
432         numOutputSamples = remainingSamples;
433         return numSamples;
434     }
435 
436     // Read short data out of the stream.  Sometimes no data will be available, and zero
437     // is returned, which is not an error condition.
readShortFromStream( short samples[], int maxSamples)438     public int readShortFromStream(
439         short samples[],
440         int maxSamples)
441     {
442         int numSamples = numOutputSamples;
443         int remainingSamples = 0;
444 
445         if(numSamples == 0) {
446             return 0;
447         }
448         if(numSamples > maxSamples) {
449             remainingSamples = numSamples - maxSamples;
450             numSamples = maxSamples;
451         }
452         move(samples, 0, outputBuffer, 0, numSamples);
453         move(outputBuffer, 0, outputBuffer, numSamples, remainingSamples);
454         numOutputSamples = remainingSamples;
455         return numSamples;
456     }
457 
458     // Read unsigned byte data out of the stream.  Sometimes no data will be available, and zero
459     // is returned, which is not an error condition.
readUnsignedByteFromStream( byte samples[], int maxSamples)460     public int readUnsignedByteFromStream(
461         byte samples[],
462         int maxSamples)
463     {
464         int numSamples = numOutputSamples;
465         int remainingSamples = 0;
466 
467         if(numSamples == 0) {
468             return 0;
469         }
470         if(numSamples > maxSamples) {
471             remainingSamples = numSamples - maxSamples;
472             numSamples = maxSamples;
473         }
474         for(int xSample = 0; xSample < numSamples*numChannels; xSample++) {
475                 samples[xSample] = (byte)((outputBuffer[xSample] >> 8) + 128);
476         }
477         move(outputBuffer, 0, outputBuffer, numSamples, remainingSamples);
478         numOutputSamples = remainingSamples;
479         return numSamples;
480     }
481 
482     // Read unsigned byte data out of the stream.  Sometimes no data will be available, and zero
483     // is returned, which is not an error condition.
readBytesFromStream( byte outBuffer[], int maxBytes)484     public int readBytesFromStream(
485         byte outBuffer[],
486         int maxBytes)
487     {
488             int maxSamples = maxBytes/(2*numChannels);
489         int numSamples = numOutputSamples;
490         int remainingSamples = 0;
491 
492         if(numSamples == 0 || maxSamples == 0) {
493             return 0;
494         }
495         if(numSamples > maxSamples) {
496             remainingSamples = numSamples - maxSamples;
497             numSamples = maxSamples;
498         }
499         for(int xSample = 0; xSample < numSamples*numChannels; xSample++) {
500                 short sample = outputBuffer[xSample];
501                 outBuffer[xSample << 1] = (byte)(sample & 0xff);
502                 outBuffer[(xSample << 1) + 1] = (byte)(sample >> 8);
503         }
504         move(outputBuffer, 0, outputBuffer, numSamples, remainingSamples);
505         numOutputSamples = remainingSamples;
506         return 2*numSamples*numChannels;
507     }
508 
509     // Force the sonic stream to generate output using whatever data it currently
510     // has.  No extra delay will be added to the output, but flushing in the middle of
511     // words could introduce distortion.
flushStream()512     public void flushStream()
513     {
514         int remainingSamples = numInputSamples;
515         float s = speed/pitch;
516         float r = rate*pitch;
517         int expectedOutputSamples = numOutputSamples + (int)((remainingSamples/s + numPitchSamples)/r + 0.5f);
518 
519         // Add enough silence to flush both input and pitch buffers.
520         enlargeInputBufferIfNeeded(remainingSamples + 2*maxRequired);
521         for(int xSample = 0; xSample < 2*maxRequired*numChannels; xSample++) {
522             inputBuffer[remainingSamples*numChannels + xSample] = 0;
523         }
524         numInputSamples += 2*maxRequired;
525         writeShortToStream(null, 0);
526         // Throw away any extra samples we generated due to the silence we added.
527         if(numOutputSamples > expectedOutputSamples) {
528             numOutputSamples = expectedOutputSamples;
529         }
530         // Empty input and pitch buffers.
531         numInputSamples = 0;
532         remainingInputToCopy = 0;
533         numPitchSamples = 0;
534     }
535 
536     // Return the number of samples in the output buffer
samplesAvailable()537     public int samplesAvailable()
538     {
539         return numOutputSamples;
540     }
541 
542     // If skip is greater than one, average skip samples together and write them to
543     // the down-sample buffer.  If numChannels is greater than one, mix the channels
544     // together as we down sample.
downSampleInput( short samples[], int position, int skip)545     private void downSampleInput(
546         short samples[],
547         int position,
548         int skip)
549     {
550         int numSamples = maxRequired/skip;
551         int samplesPerValue = numChannels*skip;
552         int value;
553 
554         position *= numChannels;
555         for(int i = 0; i < numSamples; i++) {
556             value = 0;
557             for(int j = 0; j < samplesPerValue; j++) {
558                 value += samples[position + i*samplesPerValue + j];
559             }
560             value /= samplesPerValue;
561             downSampleBuffer[i] = (short)value;
562         }
563     }
564 
565     // Find the best frequency match in the range, and given a sample skip multiple.
566     // For now, just find the pitch of the first channel.
findPitchPeriodInRange( short samples[], int position, int minPeriod, int maxPeriod)567     private int findPitchPeriodInRange(
568         short samples[],
569         int position,
570         int minPeriod,
571         int maxPeriod)
572     {
573         int bestPeriod = 0, worstPeriod = 255;
574         int minDiff = 1, maxDiff = 0;
575 
576         position *= numChannels;
577         for(int period = minPeriod; period <= maxPeriod; period++) {
578             int diff = 0;
579             for(int i = 0; i < period; i++) {
580                 short sVal = samples[position + i];
581                 short pVal = samples[position + period + i];
582                 diff += sVal >= pVal? sVal - pVal : pVal - sVal;
583             }
584             /* Note that the highest number of samples we add into diff will be less
585                than 256, since we skip samples.  Thus, diff is a 24 bit number, and
586                we can safely multiply by numSamples without overflow */
587             if(diff*bestPeriod < minDiff*period) {
588                 minDiff = diff;
589                 bestPeriod = period;
590             }
591             if(diff*worstPeriod > maxDiff*period) {
592                 maxDiff = diff;
593                 worstPeriod = period;
594             }
595         }
596         this.minDiff = minDiff/bestPeriod;
597         this.maxDiff = maxDiff/worstPeriod;
598 
599         return bestPeriod;
600     }
601 
602     // At abrupt ends of voiced words, we can have pitch periods that are better
603     // approximated by the previous pitch period estimate.  Try to detect this case.
prevPeriodBetter( int minDiff, int maxDiff, boolean preferNewPeriod)604     private boolean prevPeriodBetter(
605         int minDiff,
606         int maxDiff,
607         boolean preferNewPeriod)
608     {
609         if(minDiff == 0 || prevPeriod == 0) {
610             return false;
611         }
612         if(preferNewPeriod) {
613             if(maxDiff > minDiff*3) {
614                 // Got a reasonable match this period
615                 return false;
616             }
617             if(minDiff*2 <= prevMinDiff*3) {
618                 // Mismatch is not that much greater this period
619                 return false;
620             }
621         } else {
622             if(minDiff <= prevMinDiff) {
623                 return false;
624             }
625         }
626         return true;
627     }
628 
629     // Find the pitch period.  This is a critical step, and we may have to try
630     // multiple ways to get a good answer.  This version uses AMDF.  To improve
631     // speed, we down sample by an integer factor get in the 11KHz range, and then
632     // do it again with a narrower frequency range without down sampling
findPitchPeriod( short samples[], int position, boolean preferNewPeriod)633     private int findPitchPeriod(
634         short samples[],
635         int position,
636         boolean preferNewPeriod)
637     {
638         int period, retPeriod;
639         int skip = 1;
640 
641         if(sampleRate > SONIC_AMDF_FREQ && quality == 0) {
642             skip = sampleRate/SONIC_AMDF_FREQ;
643         }
644         if(numChannels == 1 && skip == 1) {
645             period = findPitchPeriodInRange(samples, position, minPeriod, maxPeriod);
646         } else {
647             downSampleInput(samples, position, skip);
648             period = findPitchPeriodInRange(downSampleBuffer, 0, minPeriod/skip,
649                 maxPeriod/skip);
650             if(skip != 1) {
651                 period *= skip;
652                 int minP = period - (skip << 2);
653                 int maxP = period + (skip << 2);
654                 if(minP < minPeriod) {
655                     minP = minPeriod;
656                 }
657                 if(maxP > maxPeriod) {
658                     maxP = maxPeriod;
659                 }
660                 if(numChannels == 1) {
661                     period = findPitchPeriodInRange(samples, position, minP, maxP);
662                 } else {
663                     downSampleInput(samples, position, 1);
664                     period = findPitchPeriodInRange(downSampleBuffer, 0, minP, maxP);
665                 }
666             }
667         }
668         if(prevPeriodBetter(minDiff, maxDiff, preferNewPeriod)) {
669             retPeriod = prevPeriod;
670         } else {
671             retPeriod = period;
672         }
673         prevMinDiff = minDiff;
674         prevPeriod = period;
675         return retPeriod;
676     }
677 
678     // Overlap two sound segments, ramp the volume of one down, while ramping the
679     // other one from zero up, and add them, storing the result at the output.
overlapAdd( int numSamples, int numChannels, short out[], int outPos, short rampDown[], int rampDownPos, short rampUp[], int rampUpPos)680     private void overlapAdd(
681         int numSamples,
682         int numChannels,
683         short out[],
684         int outPos,
685         short rampDown[],
686         int rampDownPos,
687         short rampUp[],
688         int rampUpPos)
689     {
690          for(int i = 0; i < numChannels; i++) {
691             int o = outPos*numChannels + i;
692             int u = rampUpPos*numChannels + i;
693             int d = rampDownPos*numChannels + i;
694             for(int t = 0; t < numSamples; t++) {
695                 out[o] = (short)((rampDown[d]*(numSamples - t) + rampUp[u]*t)/numSamples);
696                 o += numChannels;
697                 d += numChannels;
698                 u += numChannels;
699             }
700         }
701     }
702 
703     // Overlap two sound segments, ramp the volume of one down, while ramping the
704     // other one from zero up, and add them, storing the result at the output.
overlapAddWithSeparation( int numSamples, int numChannels, int separation, short out[], int outPos, short rampDown[], int rampDownPos, short rampUp[], int rampUpPos)705     private void overlapAddWithSeparation(
706         int numSamples,
707         int numChannels,
708         int separation,
709         short out[],
710         int outPos,
711         short rampDown[],
712         int rampDownPos,
713         short rampUp[],
714         int rampUpPos)
715     {
716         for(int i = 0; i < numChannels; i++) {
717             int o = outPos*numChannels + i;
718             int u = rampUpPos*numChannels + i;
719             int d = rampDownPos*numChannels + i;
720             for(int t = 0; t < numSamples + separation; t++) {
721                 if(t < separation) {
722                     out[o] = (short)(rampDown[d]*(numSamples - t)/numSamples);
723                     d += numChannels;
724                 } else if(t < numSamples) {
725                     out[o] = (short)((rampDown[d]*(numSamples - t) + rampUp[u]*(t - separation))/numSamples);
726                     d += numChannels;
727                     u += numChannels;
728                 } else {
729                     out[o] = (short)(rampUp[u]*(t - separation)/numSamples);
730                     u += numChannels;
731                 }
732                 o += numChannels;
733             }
734         }
735     }
736 
737     // Just move the new samples in the output buffer to the pitch buffer
moveNewSamplesToPitchBuffer( int originalNumOutputSamples)738     private void moveNewSamplesToPitchBuffer(
739         int originalNumOutputSamples)
740     {
741         int numSamples = numOutputSamples - originalNumOutputSamples;
742 
743         if(numPitchSamples + numSamples > pitchBufferSize) {
744             pitchBufferSize += (pitchBufferSize >> 1) + numSamples;
745             pitchBuffer = resize(pitchBuffer, pitchBufferSize);
746         }
747         move(pitchBuffer, numPitchSamples, outputBuffer, originalNumOutputSamples, numSamples);
748         numOutputSamples = originalNumOutputSamples;
749         numPitchSamples += numSamples;
750     }
751 
752     // Remove processed samples from the pitch buffer.
removePitchSamples( int numSamples)753     private void removePitchSamples(
754         int numSamples)
755     {
756         if(numSamples == 0) {
757             return;
758         }
759         move(pitchBuffer, 0, pitchBuffer, numSamples, numPitchSamples - numSamples);
760         numPitchSamples -= numSamples;
761     }
762 
763     // Change the pitch.  The latency this introduces could be reduced by looking at
764     // past samples to determine pitch, rather than future.
adjustPitch( int originalNumOutputSamples)765     private void adjustPitch(
766         int originalNumOutputSamples)
767     {
768         int period, newPeriod, separation;
769         int position = 0;
770 
771         if(numOutputSamples == originalNumOutputSamples) {
772             return;
773         }
774         moveNewSamplesToPitchBuffer(originalNumOutputSamples);
775         while(numPitchSamples - position >= maxRequired) {
776             period = findPitchPeriod(pitchBuffer, position, false);
777             newPeriod = (int)(period/pitch);
778             enlargeOutputBufferIfNeeded(newPeriod);
779             if(pitch >= 1.0f) {
780                 overlapAdd(newPeriod, numChannels, outputBuffer, numOutputSamples, pitchBuffer,
781                         position, pitchBuffer, position + period - newPeriod);
782             } else {
783                 separation = newPeriod - period;
784                 overlapAddWithSeparation(period, numChannels, separation, outputBuffer, numOutputSamples,
785                         pitchBuffer, position, pitchBuffer, position);
786             }
787             numOutputSamples += newPeriod;
788             position += period;
789         }
790         removePitchSamples(position);
791     }
792 
793     // Approximate the sinc function times a Hann window from the sinc table.
findSincCoefficient(int i, int ratio, int width)794     private int findSincCoefficient(int i, int ratio, int width) {
795         int lobePoints = (SINC_TABLE_SIZE-1)/SINC_FILTER_POINTS;
796         int left = i*lobePoints + (ratio*lobePoints)/width;
797         int right = left + 1;
798         int position = i*lobePoints*width + ratio*lobePoints - left*width;
799         int leftVal = sincTable[left];
800         int rightVal = sincTable[right];
801 
802         return ((leftVal*(width - position) + rightVal*position) << 1)/width;
803     }
804 
805     // Return 1 if value >= 0, else -1.  This represents the sign of value.
getSign(int value)806     private int getSign(int value) {
807         return value >= 0? 1 : -1;
808     }
809 
810     // Interpolate the new output sample.
interpolate( short in[], int inPos, int oldSampleRate, int newSampleRate)811     private short interpolate(
812         short in[],
813         int inPos,  // Index to first sample which already includes channel offset.
814         int oldSampleRate,
815         int newSampleRate)
816     {
817         // Compute N-point sinc FIR-filter here.  Clip rather than overflow.
818         int i;
819         int total = 0;
820         int position = newRatePosition*oldSampleRate;
821         int leftPosition = oldRatePosition*newSampleRate;
822         int rightPosition = (oldRatePosition + 1)*newSampleRate;
823         int ratio = rightPosition - position - 1;
824         int width = rightPosition - leftPosition;
825         int weight, value;
826         int oldSign;
827         int overflowCount = 0;
828 
829         for (i = 0; i < SINC_FILTER_POINTS; i++) {
830             weight = findSincCoefficient(i, ratio, width);
831             /* printf("%u %f\n", i, weight); */
832             value = in[inPos + i*numChannels]*weight;
833             oldSign = getSign(total);
834             total += value;
835             if (oldSign != getSign(total) && getSign(value) == oldSign) {
836                 /* We must have overflowed.  This can happen with a sinc filter. */
837                 overflowCount += oldSign;
838             }
839         }
840         /* It is better to clip than to wrap if there was a overflow. */
841         if (overflowCount > 0) {
842             return Short.MAX_VALUE;
843         } else if (overflowCount < 0) {
844             return Short.MIN_VALUE;
845         }
846         return (short)(total >> 16);
847     }
848 
849     // Change the rate.
adjustRate( float rate, int originalNumOutputSamples)850     private void adjustRate(
851         float rate,
852         int originalNumOutputSamples)
853     {
854         int newSampleRate = (int)(sampleRate/rate);
855         int oldSampleRate = sampleRate;
856         int position;
857         int N = SINC_FILTER_POINTS;
858 
859         // Set these values to help with the integer math
860         while(newSampleRate > (1 << 14) || oldSampleRate > (1 << 14)) {
861             newSampleRate >>= 1;
862             oldSampleRate >>= 1;
863         }
864         if(numOutputSamples == originalNumOutputSamples) {
865             return;
866         }
867         moveNewSamplesToPitchBuffer(originalNumOutputSamples);
868         // Leave at least N pitch samples in the buffer
869         for(position = 0; position < numPitchSamples - N; position++) {
870             while((oldRatePosition + 1)*newSampleRate > newRatePosition*oldSampleRate) {
871                 enlargeOutputBufferIfNeeded(1);
872                 for(int i = 0; i < numChannels; i++) {
873                     outputBuffer[numOutputSamples*numChannels + i] = interpolate(pitchBuffer,
874                             position*numChannels + i, oldSampleRate, newSampleRate);
875                 }
876                 newRatePosition++;
877                 numOutputSamples++;
878             }
879             oldRatePosition++;
880             if(oldRatePosition == oldSampleRate) {
881                 oldRatePosition = 0;
882                 if(newRatePosition != newSampleRate) {
883                     System.out.printf("Assertion failed: newRatePosition != newSampleRate\n");
884                     assert false;
885                 }
886                 newRatePosition = 0;
887             }
888         }
889         removePitchSamples(position);
890     }
891 
892 
893     // Skip over a pitch period, and copy period/speed samples to the output
skipPitchPeriod( short samples[], int position, float speed, int period)894     private int skipPitchPeriod(
895         short samples[],
896         int position,
897         float speed,
898         int period)
899     {
900         int newSamples;
901 
902         if(speed >= 2.0f) {
903             newSamples = (int)(period/(speed - 1.0f));
904         } else {
905             newSamples = period;
906             remainingInputToCopy = (int)(period*(2.0f - speed)/(speed - 1.0f));
907         }
908         enlargeOutputBufferIfNeeded(newSamples);
909         overlapAdd(newSamples, numChannels, outputBuffer, numOutputSamples, samples, position,
910                 samples, position + period);
911         numOutputSamples += newSamples;
912         return newSamples;
913     }
914 
915     // Insert a pitch period, and determine how much input to copy directly.
insertPitchPeriod( short samples[], int position, float speed, int period)916     private int insertPitchPeriod(
917         short samples[],
918         int position,
919         float speed,
920         int period)
921     {
922         int newSamples;
923 
924         if(speed < 0.5f) {
925             newSamples = (int)(period*speed/(1.0f - speed));
926         } else {
927             newSamples = period;
928             remainingInputToCopy = (int)(period*(2.0f*speed - 1.0f)/(1.0f - speed));
929         }
930         enlargeOutputBufferIfNeeded(period + newSamples);
931         move(outputBuffer, numOutputSamples, samples, position, period);
932         overlapAdd(newSamples, numChannels, outputBuffer, numOutputSamples + period, samples,
933                 position + period, samples, position);
934         numOutputSamples += period + newSamples;
935         return newSamples;
936     }
937 
938     // Resample as many pitch periods as we have buffered on the input.  Return 0 if
939     // we fail to resize an input or output buffer.  Also scale the output by the volume.
changeSpeed( float speed)940     private void changeSpeed(
941         float speed)
942     {
943         int numSamples = numInputSamples;
944         int position = 0, period, newSamples;
945 
946         if(numInputSamples < maxRequired) {
947             return;
948         }
949         do {
950             if(remainingInputToCopy > 0) {
951                 newSamples = copyInputToOutput(position);
952                 position += newSamples;
953             } else {
954                 period = findPitchPeriod(inputBuffer, position, true);
955                 if(speed > 1.0) {
956                     newSamples = skipPitchPeriod(inputBuffer, position, speed, period);
957                     position += period + newSamples;
958                 } else {
959                     newSamples = insertPitchPeriod(inputBuffer, position, speed, period);
960                     position += newSamples;
961                 }
962             }
963         } while(position + maxRequired <= numSamples);
964         removeInputSamples(position);
965     }
966 
967     // Resample as many pitch periods as we have buffered on the input.  Scale the output by the volume.
processStreamInput()968     private void processStreamInput()
969     {
970         int originalNumOutputSamples = numOutputSamples;
971         float s = speed/pitch;
972         float r = rate;
973 
974         if(!useChordPitch) {
975             r *= pitch;
976         }
977         if(s > 1.00001 || s < 0.99999) {
978             changeSpeed(s);
979         } else {
980             copyToOutput(inputBuffer, 0, numInputSamples);
981             numInputSamples = 0;
982         }
983         if(useChordPitch) {
984             if(pitch != 1.0f) {
985                 adjustPitch(originalNumOutputSamples);
986             }
987         } else if(r != 1.0f) {
988             adjustRate(r, originalNumOutputSamples);
989         }
990         if(volume != 1.0f) {
991             // Adjust output volume.
992             scaleSamples(outputBuffer, originalNumOutputSamples, numOutputSamples - originalNumOutputSamples,
993                 volume);
994         }
995     }
996 
997     // Write floating point data to the input buffer and process it.
writeFloatToStream( float samples[], int numSamples)998     public void writeFloatToStream(
999         float samples[],
1000         int numSamples)
1001     {
1002         addFloatSamplesToInputBuffer(samples, numSamples);
1003         processStreamInput();
1004     }
1005 
1006     // Write the data to the input stream, and process it.
writeShortToStream( short samples[], int numSamples)1007     public void writeShortToStream(
1008         short samples[],
1009         int numSamples)
1010     {
1011         addShortSamplesToInputBuffer(samples, numSamples);
1012         processStreamInput();
1013     }
1014 
1015     // Simple wrapper around sonicWriteFloatToStream that does the unsigned byte to short
1016     // conversion for you.
writeUnsignedByteToStream( byte samples[], int numSamples)1017     public void writeUnsignedByteToStream(
1018         byte samples[],
1019         int numSamples)
1020     {
1021         addUnsignedByteSamplesToInputBuffer(samples, numSamples);
1022         processStreamInput();
1023     }
1024 
1025     // Simple wrapper around sonicWriteBytesToStream that does the byte to 16-bit LE conversion.
writeBytesToStream( byte inBuffer[], int numBytes)1026     public void writeBytesToStream(
1027         byte inBuffer[],
1028         int numBytes)
1029     {
1030         addBytesToInputBuffer(inBuffer, numBytes);
1031         processStreamInput();
1032     }
1033 
1034     // This is a non-stream oriented interface to just change the speed of a sound sample
changeFloatSpeed( float samples[], int numSamples, float speed, float pitch, float rate, float volume, boolean useChordPitch, int sampleRate, int numChannels)1035     public static int changeFloatSpeed(
1036         float samples[],
1037         int numSamples,
1038         float speed,
1039         float pitch,
1040         float rate,
1041         float volume,
1042         boolean useChordPitch,
1043         int sampleRate,
1044         int numChannels)
1045     {
1046         Sonic stream = new Sonic(sampleRate, numChannels);
1047 
1048         stream.setSpeed(speed);
1049         stream.setPitch(pitch);
1050         stream.setRate(rate);
1051         stream.setVolume(volume);
1052         stream.setChordPitch(useChordPitch);
1053         stream.writeFloatToStream(samples, numSamples);
1054         stream.flushStream();
1055         numSamples = stream.samplesAvailable();
1056         stream.readFloatFromStream(samples, numSamples);
1057         return numSamples;
1058     }
1059 
1060     /* This is a non-stream oriented interface to just change the speed of a sound sample */
sonicChangeShortSpeed( short samples[], int numSamples, float speed, float pitch, float rate, float volume, boolean useChordPitch, int sampleRate, int numChannels)1061     public int sonicChangeShortSpeed(
1062         short samples[],
1063         int numSamples,
1064         float speed,
1065         float pitch,
1066         float rate,
1067         float volume,
1068         boolean useChordPitch,
1069         int sampleRate,
1070         int numChannels)
1071     {
1072         Sonic stream = new Sonic(sampleRate, numChannels);
1073 
1074         stream.setSpeed(speed);
1075         stream.setPitch(pitch);
1076         stream.setRate(rate);
1077         stream.setVolume(volume);
1078         stream.setChordPitch(useChordPitch);
1079         stream.writeShortToStream(samples, numSamples);
1080         stream.flushStream();
1081         numSamples = stream.samplesAvailable();
1082         stream.readShortFromStream(samples, numSamples);
1083         return numSamples;
1084     }
1085 }
1086