1 /* Sonic library 2 Copyright 2010, 2011 3 Bill Cox 4 This file is part of the Sonic Library. 5 6 This file is licensed under the Apache 2.0 license. 7 */ 8 9 package sonic; 10 11 public class Sonic { 12 13 private static final int SONIC_MIN_PITCH = 65; 14 private static final int SONIC_MAX_PITCH = 400; 15 // This is used to down-sample some inputs to improve speed 16 private static final int SONIC_AMDF_FREQ = 4000; 17 // The number of points to use in the sinc FIR filter for resampling. 18 private static final int SINC_FILTER_POINTS = 12; 19 private static final int SINC_TABLE_SIZE = 601; 20 21 // Lookup table for windowed sinc function of SINC_FILTER_POINTS points. 22 // The code to generate this is in the header comment of sonic.c. 23 private static final short sincTable[] = { 24 0, 0, 0, 0, 0, 0, 0, -1, -1, -2, -2, -3, -4, -6, -7, -9, -10, -12, -14, 25 -17, -19, -21, -24, -26, -29, -32, -34, -37, -40, -42, -44, -47, -48, -50, 26 -51, -52, -53, -53, -53, -52, -50, -48, -46, -43, -39, -34, -29, -22, -16, 27 -8, 0, 9, 19, 29, 41, 53, 65, 79, 92, 107, 121, 137, 152, 168, 184, 200, 28 215, 231, 247, 262, 276, 291, 304, 317, 328, 339, 348, 357, 363, 369, 372, 29 374, 375, 373, 369, 363, 355, 345, 332, 318, 300, 281, 259, 234, 208, 178, 30 147, 113, 77, 39, 0, -41, -85, -130, -177, -225, -274, -324, -375, -426, 31 -478, -530, -581, -632, -682, -731, -779, -825, -870, -912, -951, -989, 32 -1023, -1053, -1080, -1104, -1123, -1138, -1149, -1154, -1155, -1151, 33 -1141, -1125, -1105, -1078, -1046, -1007, -963, -913, -857, -796, -728, 34 -655, -576, -492, -403, -309, -210, -107, 0, 111, 225, 342, 462, 584, 708, 35 833, 958, 1084, 1209, 1333, 1455, 1575, 1693, 1807, 1916, 2022, 2122, 2216, 36 2304, 2384, 2457, 2522, 2579, 2625, 2663, 2689, 2706, 2711, 2705, 2687, 37 2657, 2614, 2559, 2491, 2411, 2317, 2211, 2092, 1960, 1815, 1658, 1489, 38 1308, 1115, 912, 698, 474, 241, 0, -249, -506, -769, -1037, -1310, -1586, 39 -1864, -2144, -2424, -2703, -2980, -3254, -3523, -3787, -4043, -4291, 40 -4529, -4757, -4972, -5174, -5360, -5531, -5685, -5819, -5935, -6029, 41 -6101, -6150, -6175, -6175, -6149, -6096, -6015, -5905, -5767, -5599, 42 -5401, -5172, -4912, -4621, -4298, -3944, -3558, -3141, -2693, -2214, 43 -1705, -1166, -597, 0, 625, 1277, 1955, 2658, 3386, 4135, 4906, 5697, 6506, 44 7332, 8173, 9027, 9893, 10769, 11654, 12544, 13439, 14335, 15232, 16128, 45 17019, 17904, 18782, 19649, 20504, 21345, 22170, 22977, 23763, 24527, 46 25268, 25982, 26669, 27327, 27953, 28547, 29107, 29632, 30119, 30569, 47 30979, 31349, 31678, 31964, 32208, 32408, 32565, 32677, 32744, 32767, 48 32744, 32677, 32565, 32408, 32208, 31964, 31678, 31349, 30979, 30569, 49 30119, 29632, 29107, 28547, 27953, 27327, 26669, 25982, 25268, 24527, 50 23763, 22977, 22170, 21345, 20504, 19649, 18782, 17904, 17019, 16128, 51 15232, 14335, 13439, 12544, 11654, 10769, 9893, 9027, 8173, 7332, 6506, 52 5697, 4906, 4135, 3386, 2658, 1955, 1277, 625, 0, -597, -1166, -1705, 53 -2214, -2693, -3141, -3558, -3944, -4298, -4621, -4912, -5172, -5401, 54 -5599, -5767, -5905, -6015, -6096, -6149, -6175, -6175, -6150, -6101, 55 -6029, -5935, -5819, -5685, -5531, -5360, -5174, -4972, -4757, -4529, 56 -4291, -4043, -3787, -3523, -3254, -2980, -2703, -2424, -2144, -1864, 57 -1586, -1310, -1037, -769, -506, -249, 0, 241, 474, 698, 912, 1115, 1308, 58 1489, 1658, 1815, 1960, 2092, 2211, 2317, 2411, 2491, 2559, 2614, 2657, 59 2687, 2705, 2711, 2706, 2689, 2663, 2625, 2579, 2522, 2457, 2384, 2304, 60 2216, 2122, 2022, 1916, 1807, 1693, 1575, 1455, 1333, 1209, 1084, 958, 833, 61 708, 584, 462, 342, 225, 111, 0, -107, -210, -309, -403, -492, -576, -655, 62 -728, -796, -857, -913, -963, -1007, -1046, -1078, -1105, -1125, -1141, 63 -1151, -1155, -1154, -1149, -1138, -1123, -1104, -1080, -1053, -1023, -989, 64 -951, -912, -870, -825, -779, -731, -682, -632, -581, -530, -478, -426, 65 -375, -324, -274, -225, -177, -130, -85, -41, 0, 39, 77, 113, 147, 178, 66 208, 234, 259, 281, 300, 318, 332, 345, 355, 363, 369, 373, 375, 374, 372, 67 369, 363, 357, 348, 339, 328, 317, 304, 291, 276, 262, 247, 231, 215, 200, 68 184, 168, 152, 137, 121, 107, 92, 79, 65, 53, 41, 29, 19, 9, 0, -8, -16, 69 -22, -29, -34, -39, -43, -46, -48, -50, -52, -53, -53, -53, -52, -51, -50, 70 -48, -47, -44, -42, -40, -37, -34, -32, -29, -26, -24, -21, -19, -17, -14, 71 -12, -10, -9, -7, -6, -4, -3, -2, -2, -1, -1, 0, 0, 0, 0, 0, 0, 0 72 }; 73 74 private short inputBuffer[]; 75 private short outputBuffer[]; 76 private short pitchBuffer[]; 77 private short downSampleBuffer[]; 78 private float speed; 79 private float volume; 80 private float pitch; 81 private float rate; 82 private int oldRatePosition; 83 private int newRatePosition; 84 private boolean useChordPitch; 85 private int quality; 86 private int numChannels; 87 private int inputBufferSize; 88 private int pitchBufferSize; 89 private int outputBufferSize; 90 private int numInputSamples; 91 private int numOutputSamples; 92 private int numPitchSamples; 93 private int minPeriod; 94 private int maxPeriod; 95 private int maxRequired; 96 private int remainingInputToCopy; 97 private int sampleRate; 98 private int prevPeriod; 99 private int prevMinDiff; 100 private int minDiff; 101 private int maxDiff; 102 103 // Resize the array. resize( short[] oldArray, int newLength)104 private short[] resize( 105 short[] oldArray, 106 int newLength) 107 { 108 newLength *= numChannels; 109 short[] newArray = new short[newLength]; 110 int length = oldArray.length <= newLength? oldArray.length : newLength; 111 112 System.arraycopy(oldArray, 0, newArray, 0, length); 113 return newArray; 114 } 115 116 // Move samples from one array to another. May move samples down within an array, but not up. move( short dest[], int destPos, short source[], int sourcePos, int numSamples)117 private void move( 118 short dest[], 119 int destPos, 120 short source[], 121 int sourcePos, 122 int numSamples) 123 { 124 System.arraycopy(source, sourcePos*numChannels, dest, destPos*numChannels, numSamples*numChannels); 125 } 126 127 // Scale the samples by the factor. scaleSamples( short samples[], int position, int numSamples, float volume)128 private void scaleSamples( 129 short samples[], 130 int position, 131 int numSamples, 132 float volume) 133 { 134 // Convert volume to fixed-point, with a 12 bit fraction. 135 int fixedPointVolume = (int)(volume*4096.0f); 136 int start = position*numChannels; 137 int stop = start + numSamples*numChannels; 138 139 for(int xSample = start; xSample < stop; xSample++) { 140 // Convert back from fixed point to 16-bit integer. 141 int value = (samples[xSample]*fixedPointVolume) >> 12; 142 if(value > 32767) { 143 value = 32767; 144 } else if(value < -32767) { 145 value = -32767; 146 } 147 samples[xSample] = (short)value; 148 } 149 } 150 151 // Get the speed of the stream. getSpeed()152 public float getSpeed() 153 { 154 return speed; 155 } 156 157 // Set the speed of the stream. setSpeed( float speed)158 public void setSpeed( 159 float speed) 160 { 161 this.speed = speed; 162 } 163 164 // Get the pitch of the stream. getPitch()165 public float getPitch() 166 { 167 return pitch; 168 } 169 170 // Set the pitch of the stream. setPitch( float pitch)171 public void setPitch( 172 float pitch) 173 { 174 this.pitch = pitch; 175 } 176 177 // Get the rate of the stream. getRate()178 public float getRate() 179 { 180 return rate; 181 } 182 183 // Set the playback rate of the stream. This scales pitch and speed at the same time. setRate( float rate)184 public void setRate( 185 float rate) 186 { 187 this.rate = rate; 188 this.oldRatePosition = 0; 189 this.newRatePosition = 0; 190 } 191 192 // Get the vocal chord pitch setting. getChordPitch()193 public boolean getChordPitch() 194 { 195 return useChordPitch; 196 } 197 198 // Set the vocal chord mode for pitch computation. Default is off. setChordPitch( boolean useChordPitch)199 public void setChordPitch( 200 boolean useChordPitch) 201 { 202 this.useChordPitch = useChordPitch; 203 } 204 205 // Get the quality setting. getQuality()206 public int getQuality() 207 { 208 return quality; 209 } 210 211 // Set the "quality". Default 0 is virtually as good as 1, but very much faster. setQuality( int quality)212 public void setQuality( 213 int quality) 214 { 215 this.quality = quality; 216 } 217 218 // Get the scaling factor of the stream. getVolume()219 public float getVolume() 220 { 221 return volume; 222 } 223 224 // Set the scaling factor of the stream. setVolume( float volume)225 public void setVolume( 226 float volume) 227 { 228 this.volume = volume; 229 } 230 231 // Allocate stream buffers. allocateStreamBuffers( int sampleRate, int numChannels)232 private void allocateStreamBuffers( 233 int sampleRate, 234 int numChannels) 235 { 236 minPeriod = sampleRate/SONIC_MAX_PITCH; 237 maxPeriod = sampleRate/SONIC_MIN_PITCH; 238 maxRequired = 2*maxPeriod; 239 inputBufferSize = maxRequired; 240 inputBuffer = new short[maxRequired*numChannels]; 241 outputBufferSize = maxRequired; 242 outputBuffer = new short[maxRequired*numChannels]; 243 pitchBufferSize = maxRequired; 244 pitchBuffer = new short[maxRequired*numChannels]; 245 downSampleBuffer = new short[maxRequired]; 246 this.sampleRate = sampleRate; 247 this.numChannels = numChannels; 248 oldRatePosition = 0; 249 newRatePosition = 0; 250 prevPeriod = 0; 251 } 252 253 // Create a sonic stream. Sonic( int sampleRate, int numChannels)254 public Sonic( 255 int sampleRate, 256 int numChannels) 257 { 258 allocateStreamBuffers(sampleRate, numChannels); 259 speed = 1.0f; 260 pitch = 1.0f; 261 volume = 1.0f; 262 rate = 1.0f; 263 oldRatePosition = 0; 264 newRatePosition = 0; 265 useChordPitch = false; 266 quality = 0; 267 } 268 269 // Get the sample rate of the stream. getSampleRate()270 public int getSampleRate() 271 { 272 return sampleRate; 273 } 274 275 // Set the sample rate of the stream. This will cause samples buffered in the stream to be lost. setSampleRate( int sampleRate)276 public void setSampleRate( 277 int sampleRate) 278 { 279 allocateStreamBuffers(sampleRate, numChannels); 280 } 281 282 // Get the number of channels. getNumChannels()283 public int getNumChannels() 284 { 285 return numChannels; 286 } 287 288 // Set the num channels of the stream. This will cause samples buffered in the stream to be lost. setNumChannels( int numChannels)289 public void setNumChannels( 290 int numChannels) 291 { 292 allocateStreamBuffers(sampleRate, numChannels); 293 } 294 295 // Enlarge the output buffer if needed. enlargeOutputBufferIfNeeded( int numSamples)296 private void enlargeOutputBufferIfNeeded( 297 int numSamples) 298 { 299 if(numOutputSamples + numSamples > outputBufferSize) { 300 outputBufferSize += (outputBufferSize >> 1) + numSamples; 301 outputBuffer = resize(outputBuffer, outputBufferSize); 302 } 303 } 304 305 // Enlarge the input buffer if needed. enlargeInputBufferIfNeeded( int numSamples)306 private void enlargeInputBufferIfNeeded( 307 int numSamples) 308 { 309 if(numInputSamples + numSamples > inputBufferSize) { 310 inputBufferSize += (inputBufferSize >> 1) + numSamples; 311 inputBuffer = resize(inputBuffer, inputBufferSize); 312 } 313 } 314 315 // Add the input samples to the input buffer. addFloatSamplesToInputBuffer( float samples[], int numSamples)316 private void addFloatSamplesToInputBuffer( 317 float samples[], 318 int numSamples) 319 { 320 if(numSamples == 0) { 321 return; 322 } 323 enlargeInputBufferIfNeeded(numSamples); 324 int xBuffer = numInputSamples*numChannels; 325 for(int xSample = 0; xSample < numSamples*numChannels; xSample++) { 326 inputBuffer[xBuffer++] = (short)(samples[xSample]*32767.0f); 327 } 328 numInputSamples += numSamples; 329 } 330 331 // Add the input samples to the input buffer. addShortSamplesToInputBuffer( short samples[], int numSamples)332 private void addShortSamplesToInputBuffer( 333 short samples[], 334 int numSamples) 335 { 336 if(numSamples == 0) { 337 return; 338 } 339 enlargeInputBufferIfNeeded(numSamples); 340 move(inputBuffer, numInputSamples, samples, 0, numSamples); 341 numInputSamples += numSamples; 342 } 343 344 // Add the input samples to the input buffer. addUnsignedByteSamplesToInputBuffer( byte samples[], int numSamples)345 private void addUnsignedByteSamplesToInputBuffer( 346 byte samples[], 347 int numSamples) 348 { 349 short sample; 350 351 enlargeInputBufferIfNeeded(numSamples); 352 int xBuffer = numInputSamples*numChannels; 353 for(int xSample = 0; xSample < numSamples*numChannels; xSample++) { 354 sample = (short)((samples[xSample] & 0xff) - 128); // Convert from unsigned to signed 355 inputBuffer[xBuffer++] = (short) (sample << 8); 356 } 357 numInputSamples += numSamples; 358 } 359 360 // Add the input samples to the input buffer. They must be 16-bit little-endian encoded in a byte array. addBytesToInputBuffer( byte inBuffer[], int numBytes)361 private void addBytesToInputBuffer( 362 byte inBuffer[], 363 int numBytes) 364 { 365 int numSamples = numBytes/(2*numChannels); 366 short sample; 367 368 enlargeInputBufferIfNeeded(numSamples); 369 int xBuffer = numInputSamples*numChannels; 370 for(int xByte = 0; xByte + 1 < numBytes; xByte += 2) { 371 sample = (short)((inBuffer[xByte] & 0xff) | (inBuffer[xByte + 1] << 8)); 372 inputBuffer[xBuffer++] = sample; 373 } 374 numInputSamples += numSamples; 375 } 376 377 // Remove input samples that we have already processed. removeInputSamples( int position)378 private void removeInputSamples( 379 int position) 380 { 381 int remainingSamples = numInputSamples - position; 382 383 move(inputBuffer, 0, inputBuffer, position, remainingSamples); 384 numInputSamples = remainingSamples; 385 } 386 387 // Just copy from the array to the output buffer copyToOutput( short samples[], int position, int numSamples)388 private void copyToOutput( 389 short samples[], 390 int position, 391 int numSamples) 392 { 393 enlargeOutputBufferIfNeeded(numSamples); 394 move(outputBuffer, numOutputSamples, samples, position, numSamples); 395 numOutputSamples += numSamples; 396 } 397 398 // Just copy from the input buffer to the output buffer. Return num samples copied. copyInputToOutput( int position)399 private int copyInputToOutput( 400 int position) 401 { 402 int numSamples = remainingInputToCopy; 403 404 if(numSamples > maxRequired) { 405 numSamples = maxRequired; 406 } 407 copyToOutput(inputBuffer, position, numSamples); 408 remainingInputToCopy -= numSamples; 409 return numSamples; 410 } 411 412 // Read data out of the stream. Sometimes no data will be available, and zero 413 // is returned, which is not an error condition. readFloatFromStream( float samples[], int maxSamples)414 public int readFloatFromStream( 415 float samples[], 416 int maxSamples) 417 { 418 int numSamples = numOutputSamples; 419 int remainingSamples = 0; 420 421 if(numSamples == 0) { 422 return 0; 423 } 424 if(numSamples > maxSamples) { 425 remainingSamples = numSamples - maxSamples; 426 numSamples = maxSamples; 427 } 428 for(int xSample = 0; xSample < numSamples*numChannels; xSample++) { 429 samples[xSample] = (outputBuffer[xSample])/32767.0f; 430 } 431 move(outputBuffer, 0, outputBuffer, numSamples, remainingSamples); 432 numOutputSamples = remainingSamples; 433 return numSamples; 434 } 435 436 // Read short data out of the stream. Sometimes no data will be available, and zero 437 // is returned, which is not an error condition. readShortFromStream( short samples[], int maxSamples)438 public int readShortFromStream( 439 short samples[], 440 int maxSamples) 441 { 442 int numSamples = numOutputSamples; 443 int remainingSamples = 0; 444 445 if(numSamples == 0) { 446 return 0; 447 } 448 if(numSamples > maxSamples) { 449 remainingSamples = numSamples - maxSamples; 450 numSamples = maxSamples; 451 } 452 move(samples, 0, outputBuffer, 0, numSamples); 453 move(outputBuffer, 0, outputBuffer, numSamples, remainingSamples); 454 numOutputSamples = remainingSamples; 455 return numSamples; 456 } 457 458 // Read unsigned byte data out of the stream. Sometimes no data will be available, and zero 459 // is returned, which is not an error condition. readUnsignedByteFromStream( byte samples[], int maxSamples)460 public int readUnsignedByteFromStream( 461 byte samples[], 462 int maxSamples) 463 { 464 int numSamples = numOutputSamples; 465 int remainingSamples = 0; 466 467 if(numSamples == 0) { 468 return 0; 469 } 470 if(numSamples > maxSamples) { 471 remainingSamples = numSamples - maxSamples; 472 numSamples = maxSamples; 473 } 474 for(int xSample = 0; xSample < numSamples*numChannels; xSample++) { 475 samples[xSample] = (byte)((outputBuffer[xSample] >> 8) + 128); 476 } 477 move(outputBuffer, 0, outputBuffer, numSamples, remainingSamples); 478 numOutputSamples = remainingSamples; 479 return numSamples; 480 } 481 482 // Read unsigned byte data out of the stream. Sometimes no data will be available, and zero 483 // is returned, which is not an error condition. readBytesFromStream( byte outBuffer[], int maxBytes)484 public int readBytesFromStream( 485 byte outBuffer[], 486 int maxBytes) 487 { 488 int maxSamples = maxBytes/(2*numChannels); 489 int numSamples = numOutputSamples; 490 int remainingSamples = 0; 491 492 if(numSamples == 0 || maxSamples == 0) { 493 return 0; 494 } 495 if(numSamples > maxSamples) { 496 remainingSamples = numSamples - maxSamples; 497 numSamples = maxSamples; 498 } 499 for(int xSample = 0; xSample < numSamples*numChannels; xSample++) { 500 short sample = outputBuffer[xSample]; 501 outBuffer[xSample << 1] = (byte)(sample & 0xff); 502 outBuffer[(xSample << 1) + 1] = (byte)(sample >> 8); 503 } 504 move(outputBuffer, 0, outputBuffer, numSamples, remainingSamples); 505 numOutputSamples = remainingSamples; 506 return 2*numSamples*numChannels; 507 } 508 509 // Force the sonic stream to generate output using whatever data it currently 510 // has. No extra delay will be added to the output, but flushing in the middle of 511 // words could introduce distortion. flushStream()512 public void flushStream() 513 { 514 int remainingSamples = numInputSamples; 515 float s = speed/pitch; 516 float r = rate*pitch; 517 int expectedOutputSamples = numOutputSamples + (int)((remainingSamples/s + numPitchSamples)/r + 0.5f); 518 519 // Add enough silence to flush both input and pitch buffers. 520 enlargeInputBufferIfNeeded(remainingSamples + 2*maxRequired); 521 for(int xSample = 0; xSample < 2*maxRequired*numChannels; xSample++) { 522 inputBuffer[remainingSamples*numChannels + xSample] = 0; 523 } 524 numInputSamples += 2*maxRequired; 525 writeShortToStream(null, 0); 526 // Throw away any extra samples we generated due to the silence we added. 527 if(numOutputSamples > expectedOutputSamples) { 528 numOutputSamples = expectedOutputSamples; 529 } 530 // Empty input and pitch buffers. 531 numInputSamples = 0; 532 remainingInputToCopy = 0; 533 numPitchSamples = 0; 534 } 535 536 // Return the number of samples in the output buffer samplesAvailable()537 public int samplesAvailable() 538 { 539 return numOutputSamples; 540 } 541 542 // If skip is greater than one, average skip samples together and write them to 543 // the down-sample buffer. If numChannels is greater than one, mix the channels 544 // together as we down sample. downSampleInput( short samples[], int position, int skip)545 private void downSampleInput( 546 short samples[], 547 int position, 548 int skip) 549 { 550 int numSamples = maxRequired/skip; 551 int samplesPerValue = numChannels*skip; 552 int value; 553 554 position *= numChannels; 555 for(int i = 0; i < numSamples; i++) { 556 value = 0; 557 for(int j = 0; j < samplesPerValue; j++) { 558 value += samples[position + i*samplesPerValue + j]; 559 } 560 value /= samplesPerValue; 561 downSampleBuffer[i] = (short)value; 562 } 563 } 564 565 // Find the best frequency match in the range, and given a sample skip multiple. 566 // For now, just find the pitch of the first channel. findPitchPeriodInRange( short samples[], int position, int minPeriod, int maxPeriod)567 private int findPitchPeriodInRange( 568 short samples[], 569 int position, 570 int minPeriod, 571 int maxPeriod) 572 { 573 int bestPeriod = 0, worstPeriod = 255; 574 int minDiff = 1, maxDiff = 0; 575 576 position *= numChannels; 577 for(int period = minPeriod; period <= maxPeriod; period++) { 578 int diff = 0; 579 for(int i = 0; i < period; i++) { 580 short sVal = samples[position + i]; 581 short pVal = samples[position + period + i]; 582 diff += sVal >= pVal? sVal - pVal : pVal - sVal; 583 } 584 /* Note that the highest number of samples we add into diff will be less 585 than 256, since we skip samples. Thus, diff is a 24 bit number, and 586 we can safely multiply by numSamples without overflow */ 587 if(diff*bestPeriod < minDiff*period) { 588 minDiff = diff; 589 bestPeriod = period; 590 } 591 if(diff*worstPeriod > maxDiff*period) { 592 maxDiff = diff; 593 worstPeriod = period; 594 } 595 } 596 this.minDiff = minDiff/bestPeriod; 597 this.maxDiff = maxDiff/worstPeriod; 598 599 return bestPeriod; 600 } 601 602 // At abrupt ends of voiced words, we can have pitch periods that are better 603 // approximated by the previous pitch period estimate. Try to detect this case. prevPeriodBetter( int minDiff, int maxDiff, boolean preferNewPeriod)604 private boolean prevPeriodBetter( 605 int minDiff, 606 int maxDiff, 607 boolean preferNewPeriod) 608 { 609 if(minDiff == 0 || prevPeriod == 0) { 610 return false; 611 } 612 if(preferNewPeriod) { 613 if(maxDiff > minDiff*3) { 614 // Got a reasonable match this period 615 return false; 616 } 617 if(minDiff*2 <= prevMinDiff*3) { 618 // Mismatch is not that much greater this period 619 return false; 620 } 621 } else { 622 if(minDiff <= prevMinDiff) { 623 return false; 624 } 625 } 626 return true; 627 } 628 629 // Find the pitch period. This is a critical step, and we may have to try 630 // multiple ways to get a good answer. This version uses AMDF. To improve 631 // speed, we down sample by an integer factor get in the 11KHz range, and then 632 // do it again with a narrower frequency range without down sampling findPitchPeriod( short samples[], int position, boolean preferNewPeriod)633 private int findPitchPeriod( 634 short samples[], 635 int position, 636 boolean preferNewPeriod) 637 { 638 int period, retPeriod; 639 int skip = 1; 640 641 if(sampleRate > SONIC_AMDF_FREQ && quality == 0) { 642 skip = sampleRate/SONIC_AMDF_FREQ; 643 } 644 if(numChannels == 1 && skip == 1) { 645 period = findPitchPeriodInRange(samples, position, minPeriod, maxPeriod); 646 } else { 647 downSampleInput(samples, position, skip); 648 period = findPitchPeriodInRange(downSampleBuffer, 0, minPeriod/skip, 649 maxPeriod/skip); 650 if(skip != 1) { 651 period *= skip; 652 int minP = period - (skip << 2); 653 int maxP = period + (skip << 2); 654 if(minP < minPeriod) { 655 minP = minPeriod; 656 } 657 if(maxP > maxPeriod) { 658 maxP = maxPeriod; 659 } 660 if(numChannels == 1) { 661 period = findPitchPeriodInRange(samples, position, minP, maxP); 662 } else { 663 downSampleInput(samples, position, 1); 664 period = findPitchPeriodInRange(downSampleBuffer, 0, minP, maxP); 665 } 666 } 667 } 668 if(prevPeriodBetter(minDiff, maxDiff, preferNewPeriod)) { 669 retPeriod = prevPeriod; 670 } else { 671 retPeriod = period; 672 } 673 prevMinDiff = minDiff; 674 prevPeriod = period; 675 return retPeriod; 676 } 677 678 // Overlap two sound segments, ramp the volume of one down, while ramping the 679 // other one from zero up, and add them, storing the result at the output. overlapAdd( int numSamples, int numChannels, short out[], int outPos, short rampDown[], int rampDownPos, short rampUp[], int rampUpPos)680 private void overlapAdd( 681 int numSamples, 682 int numChannels, 683 short out[], 684 int outPos, 685 short rampDown[], 686 int rampDownPos, 687 short rampUp[], 688 int rampUpPos) 689 { 690 for(int i = 0; i < numChannels; i++) { 691 int o = outPos*numChannels + i; 692 int u = rampUpPos*numChannels + i; 693 int d = rampDownPos*numChannels + i; 694 for(int t = 0; t < numSamples; t++) { 695 out[o] = (short)((rampDown[d]*(numSamples - t) + rampUp[u]*t)/numSamples); 696 o += numChannels; 697 d += numChannels; 698 u += numChannels; 699 } 700 } 701 } 702 703 // Overlap two sound segments, ramp the volume of one down, while ramping the 704 // other one from zero up, and add them, storing the result at the output. overlapAddWithSeparation( int numSamples, int numChannels, int separation, short out[], int outPos, short rampDown[], int rampDownPos, short rampUp[], int rampUpPos)705 private void overlapAddWithSeparation( 706 int numSamples, 707 int numChannels, 708 int separation, 709 short out[], 710 int outPos, 711 short rampDown[], 712 int rampDownPos, 713 short rampUp[], 714 int rampUpPos) 715 { 716 for(int i = 0; i < numChannels; i++) { 717 int o = outPos*numChannels + i; 718 int u = rampUpPos*numChannels + i; 719 int d = rampDownPos*numChannels + i; 720 for(int t = 0; t < numSamples + separation; t++) { 721 if(t < separation) { 722 out[o] = (short)(rampDown[d]*(numSamples - t)/numSamples); 723 d += numChannels; 724 } else if(t < numSamples) { 725 out[o] = (short)((rampDown[d]*(numSamples - t) + rampUp[u]*(t - separation))/numSamples); 726 d += numChannels; 727 u += numChannels; 728 } else { 729 out[o] = (short)(rampUp[u]*(t - separation)/numSamples); 730 u += numChannels; 731 } 732 o += numChannels; 733 } 734 } 735 } 736 737 // Just move the new samples in the output buffer to the pitch buffer moveNewSamplesToPitchBuffer( int originalNumOutputSamples)738 private void moveNewSamplesToPitchBuffer( 739 int originalNumOutputSamples) 740 { 741 int numSamples = numOutputSamples - originalNumOutputSamples; 742 743 if(numPitchSamples + numSamples > pitchBufferSize) { 744 pitchBufferSize += (pitchBufferSize >> 1) + numSamples; 745 pitchBuffer = resize(pitchBuffer, pitchBufferSize); 746 } 747 move(pitchBuffer, numPitchSamples, outputBuffer, originalNumOutputSamples, numSamples); 748 numOutputSamples = originalNumOutputSamples; 749 numPitchSamples += numSamples; 750 } 751 752 // Remove processed samples from the pitch buffer. removePitchSamples( int numSamples)753 private void removePitchSamples( 754 int numSamples) 755 { 756 if(numSamples == 0) { 757 return; 758 } 759 move(pitchBuffer, 0, pitchBuffer, numSamples, numPitchSamples - numSamples); 760 numPitchSamples -= numSamples; 761 } 762 763 // Change the pitch. The latency this introduces could be reduced by looking at 764 // past samples to determine pitch, rather than future. adjustPitch( int originalNumOutputSamples)765 private void adjustPitch( 766 int originalNumOutputSamples) 767 { 768 int period, newPeriod, separation; 769 int position = 0; 770 771 if(numOutputSamples == originalNumOutputSamples) { 772 return; 773 } 774 moveNewSamplesToPitchBuffer(originalNumOutputSamples); 775 while(numPitchSamples - position >= maxRequired) { 776 period = findPitchPeriod(pitchBuffer, position, false); 777 newPeriod = (int)(period/pitch); 778 enlargeOutputBufferIfNeeded(newPeriod); 779 if(pitch >= 1.0f) { 780 overlapAdd(newPeriod, numChannels, outputBuffer, numOutputSamples, pitchBuffer, 781 position, pitchBuffer, position + period - newPeriod); 782 } else { 783 separation = newPeriod - period; 784 overlapAddWithSeparation(period, numChannels, separation, outputBuffer, numOutputSamples, 785 pitchBuffer, position, pitchBuffer, position); 786 } 787 numOutputSamples += newPeriod; 788 position += period; 789 } 790 removePitchSamples(position); 791 } 792 793 // Approximate the sinc function times a Hann window from the sinc table. findSincCoefficient(int i, int ratio, int width)794 private int findSincCoefficient(int i, int ratio, int width) { 795 int lobePoints = (SINC_TABLE_SIZE-1)/SINC_FILTER_POINTS; 796 int left = i*lobePoints + (ratio*lobePoints)/width; 797 int right = left + 1; 798 int position = i*lobePoints*width + ratio*lobePoints - left*width; 799 int leftVal = sincTable[left]; 800 int rightVal = sincTable[right]; 801 802 return ((leftVal*(width - position) + rightVal*position) << 1)/width; 803 } 804 805 // Return 1 if value >= 0, else -1. This represents the sign of value. getSign(int value)806 private int getSign(int value) { 807 return value >= 0? 1 : -1; 808 } 809 810 // Interpolate the new output sample. interpolate( short in[], int inPos, int oldSampleRate, int newSampleRate)811 private short interpolate( 812 short in[], 813 int inPos, // Index to first sample which already includes channel offset. 814 int oldSampleRate, 815 int newSampleRate) 816 { 817 // Compute N-point sinc FIR-filter here. Clip rather than overflow. 818 int i; 819 int total = 0; 820 int position = newRatePosition*oldSampleRate; 821 int leftPosition = oldRatePosition*newSampleRate; 822 int rightPosition = (oldRatePosition + 1)*newSampleRate; 823 int ratio = rightPosition - position - 1; 824 int width = rightPosition - leftPosition; 825 int weight, value; 826 int oldSign; 827 int overflowCount = 0; 828 829 for (i = 0; i < SINC_FILTER_POINTS; i++) { 830 weight = findSincCoefficient(i, ratio, width); 831 /* printf("%u %f\n", i, weight); */ 832 value = in[inPos + i*numChannels]*weight; 833 oldSign = getSign(total); 834 total += value; 835 if (oldSign != getSign(total) && getSign(value) == oldSign) { 836 /* We must have overflowed. This can happen with a sinc filter. */ 837 overflowCount += oldSign; 838 } 839 } 840 /* It is better to clip than to wrap if there was a overflow. */ 841 if (overflowCount > 0) { 842 return Short.MAX_VALUE; 843 } else if (overflowCount < 0) { 844 return Short.MIN_VALUE; 845 } 846 return (short)(total >> 16); 847 } 848 849 // Change the rate. adjustRate( float rate, int originalNumOutputSamples)850 private void adjustRate( 851 float rate, 852 int originalNumOutputSamples) 853 { 854 int newSampleRate = (int)(sampleRate/rate); 855 int oldSampleRate = sampleRate; 856 int position; 857 int N = SINC_FILTER_POINTS; 858 859 // Set these values to help with the integer math 860 while(newSampleRate > (1 << 14) || oldSampleRate > (1 << 14)) { 861 newSampleRate >>= 1; 862 oldSampleRate >>= 1; 863 } 864 if(numOutputSamples == originalNumOutputSamples) { 865 return; 866 } 867 moveNewSamplesToPitchBuffer(originalNumOutputSamples); 868 // Leave at least N pitch samples in the buffer 869 for(position = 0; position < numPitchSamples - N; position++) { 870 while((oldRatePosition + 1)*newSampleRate > newRatePosition*oldSampleRate) { 871 enlargeOutputBufferIfNeeded(1); 872 for(int i = 0; i < numChannels; i++) { 873 outputBuffer[numOutputSamples*numChannels + i] = interpolate(pitchBuffer, 874 position*numChannels + i, oldSampleRate, newSampleRate); 875 } 876 newRatePosition++; 877 numOutputSamples++; 878 } 879 oldRatePosition++; 880 if(oldRatePosition == oldSampleRate) { 881 oldRatePosition = 0; 882 if(newRatePosition != newSampleRate) { 883 System.out.printf("Assertion failed: newRatePosition != newSampleRate\n"); 884 assert false; 885 } 886 newRatePosition = 0; 887 } 888 } 889 removePitchSamples(position); 890 } 891 892 893 // Skip over a pitch period, and copy period/speed samples to the output skipPitchPeriod( short samples[], int position, float speed, int period)894 private int skipPitchPeriod( 895 short samples[], 896 int position, 897 float speed, 898 int period) 899 { 900 int newSamples; 901 902 if(speed >= 2.0f) { 903 newSamples = (int)(period/(speed - 1.0f)); 904 } else { 905 newSamples = period; 906 remainingInputToCopy = (int)(period*(2.0f - speed)/(speed - 1.0f)); 907 } 908 enlargeOutputBufferIfNeeded(newSamples); 909 overlapAdd(newSamples, numChannels, outputBuffer, numOutputSamples, samples, position, 910 samples, position + period); 911 numOutputSamples += newSamples; 912 return newSamples; 913 } 914 915 // Insert a pitch period, and determine how much input to copy directly. insertPitchPeriod( short samples[], int position, float speed, int period)916 private int insertPitchPeriod( 917 short samples[], 918 int position, 919 float speed, 920 int period) 921 { 922 int newSamples; 923 924 if(speed < 0.5f) { 925 newSamples = (int)(period*speed/(1.0f - speed)); 926 } else { 927 newSamples = period; 928 remainingInputToCopy = (int)(period*(2.0f*speed - 1.0f)/(1.0f - speed)); 929 } 930 enlargeOutputBufferIfNeeded(period + newSamples); 931 move(outputBuffer, numOutputSamples, samples, position, period); 932 overlapAdd(newSamples, numChannels, outputBuffer, numOutputSamples + period, samples, 933 position + period, samples, position); 934 numOutputSamples += period + newSamples; 935 return newSamples; 936 } 937 938 // Resample as many pitch periods as we have buffered on the input. Return 0 if 939 // we fail to resize an input or output buffer. Also scale the output by the volume. changeSpeed( float speed)940 private void changeSpeed( 941 float speed) 942 { 943 int numSamples = numInputSamples; 944 int position = 0, period, newSamples; 945 946 if(numInputSamples < maxRequired) { 947 return; 948 } 949 do { 950 if(remainingInputToCopy > 0) { 951 newSamples = copyInputToOutput(position); 952 position += newSamples; 953 } else { 954 period = findPitchPeriod(inputBuffer, position, true); 955 if(speed > 1.0) { 956 newSamples = skipPitchPeriod(inputBuffer, position, speed, period); 957 position += period + newSamples; 958 } else { 959 newSamples = insertPitchPeriod(inputBuffer, position, speed, period); 960 position += newSamples; 961 } 962 } 963 } while(position + maxRequired <= numSamples); 964 removeInputSamples(position); 965 } 966 967 // Resample as many pitch periods as we have buffered on the input. Scale the output by the volume. processStreamInput()968 private void processStreamInput() 969 { 970 int originalNumOutputSamples = numOutputSamples; 971 float s = speed/pitch; 972 float r = rate; 973 974 if(!useChordPitch) { 975 r *= pitch; 976 } 977 if(s > 1.00001 || s < 0.99999) { 978 changeSpeed(s); 979 } else { 980 copyToOutput(inputBuffer, 0, numInputSamples); 981 numInputSamples = 0; 982 } 983 if(useChordPitch) { 984 if(pitch != 1.0f) { 985 adjustPitch(originalNumOutputSamples); 986 } 987 } else if(r != 1.0f) { 988 adjustRate(r, originalNumOutputSamples); 989 } 990 if(volume != 1.0f) { 991 // Adjust output volume. 992 scaleSamples(outputBuffer, originalNumOutputSamples, numOutputSamples - originalNumOutputSamples, 993 volume); 994 } 995 } 996 997 // Write floating point data to the input buffer and process it. writeFloatToStream( float samples[], int numSamples)998 public void writeFloatToStream( 999 float samples[], 1000 int numSamples) 1001 { 1002 addFloatSamplesToInputBuffer(samples, numSamples); 1003 processStreamInput(); 1004 } 1005 1006 // Write the data to the input stream, and process it. writeShortToStream( short samples[], int numSamples)1007 public void writeShortToStream( 1008 short samples[], 1009 int numSamples) 1010 { 1011 addShortSamplesToInputBuffer(samples, numSamples); 1012 processStreamInput(); 1013 } 1014 1015 // Simple wrapper around sonicWriteFloatToStream that does the unsigned byte to short 1016 // conversion for you. writeUnsignedByteToStream( byte samples[], int numSamples)1017 public void writeUnsignedByteToStream( 1018 byte samples[], 1019 int numSamples) 1020 { 1021 addUnsignedByteSamplesToInputBuffer(samples, numSamples); 1022 processStreamInput(); 1023 } 1024 1025 // Simple wrapper around sonicWriteBytesToStream that does the byte to 16-bit LE conversion. writeBytesToStream( byte inBuffer[], int numBytes)1026 public void writeBytesToStream( 1027 byte inBuffer[], 1028 int numBytes) 1029 { 1030 addBytesToInputBuffer(inBuffer, numBytes); 1031 processStreamInput(); 1032 } 1033 1034 // This is a non-stream oriented interface to just change the speed of a sound sample changeFloatSpeed( float samples[], int numSamples, float speed, float pitch, float rate, float volume, boolean useChordPitch, int sampleRate, int numChannels)1035 public static int changeFloatSpeed( 1036 float samples[], 1037 int numSamples, 1038 float speed, 1039 float pitch, 1040 float rate, 1041 float volume, 1042 boolean useChordPitch, 1043 int sampleRate, 1044 int numChannels) 1045 { 1046 Sonic stream = new Sonic(sampleRate, numChannels); 1047 1048 stream.setSpeed(speed); 1049 stream.setPitch(pitch); 1050 stream.setRate(rate); 1051 stream.setVolume(volume); 1052 stream.setChordPitch(useChordPitch); 1053 stream.writeFloatToStream(samples, numSamples); 1054 stream.flushStream(); 1055 numSamples = stream.samplesAvailable(); 1056 stream.readFloatFromStream(samples, numSamples); 1057 return numSamples; 1058 } 1059 1060 /* This is a non-stream oriented interface to just change the speed of a sound sample */ sonicChangeShortSpeed( short samples[], int numSamples, float speed, float pitch, float rate, float volume, boolean useChordPitch, int sampleRate, int numChannels)1061 public int sonicChangeShortSpeed( 1062 short samples[], 1063 int numSamples, 1064 float speed, 1065 float pitch, 1066 float rate, 1067 float volume, 1068 boolean useChordPitch, 1069 int sampleRate, 1070 int numChannels) 1071 { 1072 Sonic stream = new Sonic(sampleRate, numChannels); 1073 1074 stream.setSpeed(speed); 1075 stream.setPitch(pitch); 1076 stream.setRate(rate); 1077 stream.setVolume(volume); 1078 stream.setChordPitch(useChordPitch); 1079 stream.writeShortToStream(samples, numSamples); 1080 stream.flushStream(); 1081 numSamples = stream.samplesAvailable(); 1082 stream.readShortFromStream(samples, numSamples); 1083 return numSamples; 1084 } 1085 } 1086