1 /* This code is imported several times in lpc_intrin_sse2.c with different 2 * values for MAX_LAG. Comments are for MAX_LAG == 14 */ 3 int i; 4 __m128d sum0, sum1, sum2, sum3; 5 __m128d d0, d1, d2, d3; 6 #if MAX_LAG > 8 7 __m128d d4; 8 __m128d sum4; 9 #endif 10 #if MAX_LAG > 10 11 __m128d d5, d6; 12 __m128d sum5, sum6; 13 #endif 14 15 (void) lag; 16 FLAC__ASSERT(lag <= MAX_LAG); 17 18 /* Initialize all sum vectors with zero */ 19 sum0 = _mm_setzero_pd(); 20 sum1 = _mm_setzero_pd(); 21 sum2 = _mm_setzero_pd(); 22 sum3 = _mm_setzero_pd(); 23 d0 = _mm_setzero_pd(); 24 d1 = _mm_setzero_pd(); 25 d2 = _mm_setzero_pd(); 26 d3 = _mm_setzero_pd(); 27 #if MAX_LAG > 8 28 sum4 = _mm_setzero_pd(); 29 d4 = _mm_setzero_pd(); 30 #endif 31 #if MAX_LAG > 10 32 sum5 = _mm_setzero_pd(); 33 sum6 = _mm_setzero_pd(); 34 d5 = _mm_setzero_pd(); 35 d6 = _mm_setzero_pd(); 36 #endif 37 38 /* Loop backwards through samples from data_len to limit */ 39 for(i = data_len-1; i >= 0; i--) { 40 __m128d d = _mm_set1_pd(data[i]); 41 42 /* The next lines of code work like a queue. For more 43 * information see the lag8 version of this function */ 44 #if MAX_LAG > 10 45 d6 = _mm_shuffle_pd(d5, d6, _MM_SHUFFLE(0,0,0,1)); 46 d5 = _mm_shuffle_pd(d4, d5, _MM_SHUFFLE(0,0,0,1)); 47 #endif 48 #if MAX_LAG > 8 49 d4 = _mm_shuffle_pd(d3, d4, _MM_SHUFFLE(0,0,0,1)); 50 #endif 51 d3 = _mm_shuffle_pd(d2, d3, _MM_SHUFFLE(0,0,0,1)); 52 d2 = _mm_shuffle_pd(d1, d2, _MM_SHUFFLE(0,0,0,1)); 53 d1 = _mm_shuffle_pd(d0, d1, _MM_SHUFFLE(0,0,0,1)); 54 d0 = _mm_shuffle_pd(d, d0, _MM_SHUFFLE(0,0,0,1)); 55 56 /* sumn += d*dn */ 57 sum0 = _mm_add_pd(sum0, _mm_mul_pd(d, d0)); 58 sum1 = _mm_add_pd(sum1, _mm_mul_pd(d, d1)); 59 sum2 = _mm_add_pd(sum2, _mm_mul_pd(d, d2)); 60 sum3 = _mm_add_pd(sum3, _mm_mul_pd(d, d3)); 61 #if MAX_LAG > 8 62 sum4 = _mm_add_pd(sum4, _mm_mul_pd(d, d4)); 63 #endif 64 #if MAX_LAG > 10 65 sum5 = _mm_add_pd(sum5, _mm_mul_pd(d, d5)); 66 sum6 = _mm_add_pd(sum6, _mm_mul_pd(d, d6)); 67 #endif 68 } 69 70 /* Store sum0..sum6 in autoc[0..14] */ 71 _mm_storeu_pd(autoc, sum0); 72 _mm_storeu_pd(autoc+2, sum1); 73 _mm_storeu_pd(autoc+4, sum2); 74 _mm_storeu_pd(autoc+6 ,sum3); 75 #if MAX_LAG > 8 76 _mm_storeu_pd(autoc+8, sum4); 77 #endif 78 #if MAX_LAG > 10 79 _mm_storeu_pd(autoc+10,sum5); 80 _mm_storeu_pd(autoc+12,sum6); 81 #endif 82