xref: /aosp_15_r20/external/pffft/bench_mixers.c (revision 3f1979aa0d7ad34fcf3763de7b7b8f8cd67e5bdd)
1*3f1979aaSAndroid Build Coastguard Worker /*
2*3f1979aaSAndroid Build Coastguard Worker   Copyright (c) 2020  Hayati Ayguen ( [email protected] )
3*3f1979aaSAndroid Build Coastguard Worker 
4*3f1979aaSAndroid Build Coastguard Worker   bench for mixer algorithm/implementations
5*3f1979aaSAndroid Build Coastguard Worker 
6*3f1979aaSAndroid Build Coastguard Worker  */
7*3f1979aaSAndroid Build Coastguard Worker 
8*3f1979aaSAndroid Build Coastguard Worker #include <pf_mixer.h>
9*3f1979aaSAndroid Build Coastguard Worker 
10*3f1979aaSAndroid Build Coastguard Worker #include <math.h>
11*3f1979aaSAndroid Build Coastguard Worker #include <stdio.h>
12*3f1979aaSAndroid Build Coastguard Worker #include <stdlib.h>
13*3f1979aaSAndroid Build Coastguard Worker #include <time.h>
14*3f1979aaSAndroid Build Coastguard Worker #include <assert.h>
15*3f1979aaSAndroid Build Coastguard Worker #include <string.h>
16*3f1979aaSAndroid Build Coastguard Worker 
17*3f1979aaSAndroid Build Coastguard Worker #define HAVE_SYS_TIMES
18*3f1979aaSAndroid Build Coastguard Worker 
19*3f1979aaSAndroid Build Coastguard Worker #ifdef HAVE_SYS_TIMES
20*3f1979aaSAndroid Build Coastguard Worker #  include <sys/times.h>
21*3f1979aaSAndroid Build Coastguard Worker #  include <unistd.h>
22*3f1979aaSAndroid Build Coastguard Worker #endif
23*3f1979aaSAndroid Build Coastguard Worker 
24*3f1979aaSAndroid Build Coastguard Worker #define BENCH_REF_TRIG_FUNC       1
25*3f1979aaSAndroid Build Coastguard Worker #define BENCH_OUT_OF_PLACE_ALGOS  0
26*3f1979aaSAndroid Build Coastguard Worker #define BENCH_INPLACE_ALGOS       1
27*3f1979aaSAndroid Build Coastguard Worker 
28*3f1979aaSAndroid Build Coastguard Worker #define SAVE_BY_DEFAULT  0
29*3f1979aaSAndroid Build Coastguard Worker #define SAVE_LIMIT_MSPS           16
30*3f1979aaSAndroid Build Coastguard Worker 
31*3f1979aaSAndroid Build Coastguard Worker #if 0
32*3f1979aaSAndroid Build Coastguard Worker   #define BENCH_FILE_SHIFT_MATH_CC           "/home/ayguen/WindowsDesktop/mixer_test/A_shift_math_cc.bin"
33*3f1979aaSAndroid Build Coastguard Worker   #define BENCH_FILE_ADD_FAST_CC             "/home/ayguen/WindowsDesktop/mixer_test/C_shift_addfast_cc.bin"
34*3f1979aaSAndroid Build Coastguard Worker   #define BENCH_FILE_ADD_FAST_INP_C          "/home/ayguen/WindowsDesktop/mixer_test/C_shift_addfast_inp_c.bin"
35*3f1979aaSAndroid Build Coastguard Worker   #define BENCH_FILE_UNROLL_INP_C            "/home/ayguen/WindowsDesktop/mixer_test/D_shift_unroll_inp_c.bin"
36*3f1979aaSAndroid Build Coastguard Worker   #define BENCH_FILE_LTD_UNROLL_INP_C        "/home/ayguen/WindowsDesktop/mixer_test/E_shift_limited_unroll_inp_c.bin"
37*3f1979aaSAndroid Build Coastguard Worker   #define BENCH_FILE_LTD_UNROLL_A_SSE_INP_C  "/home/ayguen/WindowsDesktop/mixer_test/F_shift_limited_unroll_A_sse_inp_c.bin"
38*3f1979aaSAndroid Build Coastguard Worker   #define BENCH_FILE_LTD_UNROLL_B_SSE_INP_C  "/home/ayguen/WindowsDesktop/mixer_test/G_shift_limited_unroll_B_sse_inp_c.bin"
39*3f1979aaSAndroid Build Coastguard Worker   #define BENCH_FILE_LTD_UNROLL_C_SSE_INP_C  "/home/ayguen/WindowsDesktop/mixer_test/H_shift_limited_unroll_C_sse_inp_c.bin"
40*3f1979aaSAndroid Build Coastguard Worker   #define BENCH_FILE_REC_OSC_CC              ""
41*3f1979aaSAndroid Build Coastguard Worker   #define BENCH_FILE_REC_OSC_INP_C           "/home/ayguen/WindowsDesktop/mixer_test/I_shift_recursive_osc_inp_c.bin"
42*3f1979aaSAndroid Build Coastguard Worker   #define BENCH_FILE_REC_OSC_SSE_INP_C       "/home/ayguen/WindowsDesktop/mixer_test/J_shift_recursive_osc_sse_inp_c.bin"
43*3f1979aaSAndroid Build Coastguard Worker #else
44*3f1979aaSAndroid Build Coastguard Worker   #define BENCH_FILE_SHIFT_MATH_CC           ""
45*3f1979aaSAndroid Build Coastguard Worker   #define BENCH_FILE_ADD_FAST_CC             ""
46*3f1979aaSAndroid Build Coastguard Worker   #define BENCH_FILE_ADD_FAST_INP_C          ""
47*3f1979aaSAndroid Build Coastguard Worker   #define BENCH_FILE_UNROLL_INP_C            ""
48*3f1979aaSAndroid Build Coastguard Worker   #define BENCH_FILE_LTD_UNROLL_INP_C        ""
49*3f1979aaSAndroid Build Coastguard Worker   #define BENCH_FILE_LTD_UNROLL_A_SSE_INP_C  ""
50*3f1979aaSAndroid Build Coastguard Worker   #define BENCH_FILE_LTD_UNROLL_B_SSE_INP_C  ""
51*3f1979aaSAndroid Build Coastguard Worker   #define BENCH_FILE_LTD_UNROLL_C_SSE_INP_C  ""
52*3f1979aaSAndroid Build Coastguard Worker   #define BENCH_FILE_REC_OSC_CC              ""
53*3f1979aaSAndroid Build Coastguard Worker   #define BENCH_FILE_REC_OSC_INP_C           ""
54*3f1979aaSAndroid Build Coastguard Worker   #define BENCH_FILE_REC_OSC_SSE_INP_C       ""
55*3f1979aaSAndroid Build Coastguard Worker #endif
56*3f1979aaSAndroid Build Coastguard Worker 
57*3f1979aaSAndroid Build Coastguard Worker 
58*3f1979aaSAndroid Build Coastguard Worker 
59*3f1979aaSAndroid Build Coastguard Worker #if defined(HAVE_SYS_TIMES)
60*3f1979aaSAndroid Build Coastguard Worker     static double ttclk = 0.;
61*3f1979aaSAndroid Build Coastguard Worker 
uclock_sec(int find_start)62*3f1979aaSAndroid Build Coastguard Worker     static double uclock_sec(int find_start)
63*3f1979aaSAndroid Build Coastguard Worker     {
64*3f1979aaSAndroid Build Coastguard Worker         struct tms t0, t;
65*3f1979aaSAndroid Build Coastguard Worker         if (ttclk == 0.)
66*3f1979aaSAndroid Build Coastguard Worker         {
67*3f1979aaSAndroid Build Coastguard Worker             ttclk = sysconf(_SC_CLK_TCK);
68*3f1979aaSAndroid Build Coastguard Worker             fprintf(stderr, "sysconf(_SC_CLK_TCK) => %f\n", ttclk);
69*3f1979aaSAndroid Build Coastguard Worker         }
70*3f1979aaSAndroid Build Coastguard Worker         times(&t);
71*3f1979aaSAndroid Build Coastguard Worker         if (find_start)
72*3f1979aaSAndroid Build Coastguard Worker         {
73*3f1979aaSAndroid Build Coastguard Worker             t0 = t;
74*3f1979aaSAndroid Build Coastguard Worker             while (t0.tms_utime == t.tms_utime)
75*3f1979aaSAndroid Build Coastguard Worker                 times(&t);
76*3f1979aaSAndroid Build Coastguard Worker         }
77*3f1979aaSAndroid Build Coastguard Worker         /* use only the user time of this process - not realtime, which depends on OS-scheduler .. */
78*3f1979aaSAndroid Build Coastguard Worker         return ((double)t.tms_utime) / ttclk;
79*3f1979aaSAndroid Build Coastguard Worker     }
80*3f1979aaSAndroid Build Coastguard Worker 
81*3f1979aaSAndroid Build Coastguard Worker #elif 0
82*3f1979aaSAndroid Build Coastguard Worker     // https://docs.microsoft.com/en-us/windows/win32/api/processthreadsapi/nf-processthreadsapi-getprocesstimes
uclock_sec(int find_start)83*3f1979aaSAndroid Build Coastguard Worker     double uclock_sec(int find_start)
84*3f1979aaSAndroid Build Coastguard Worker     {
85*3f1979aaSAndroid Build Coastguard Worker         FILETIME a, b, c, d;
86*3f1979aaSAndroid Build Coastguard Worker         if (GetProcessTimes(GetCurrentProcess(), &a, &b, &c, &d) != 0)
87*3f1979aaSAndroid Build Coastguard Worker         {
88*3f1979aaSAndroid Build Coastguard Worker             //  Returns total user time.
89*3f1979aaSAndroid Build Coastguard Worker             //  Can be tweaked to include kernel times as well.
90*3f1979aaSAndroid Build Coastguard Worker             return
91*3f1979aaSAndroid Build Coastguard Worker                 (double)(d.dwLowDateTime |
92*3f1979aaSAndroid Build Coastguard Worker                     ((unsigned long long)d.dwHighDateTime << 32)) * 0.0000001;
93*3f1979aaSAndroid Build Coastguard Worker         }
94*3f1979aaSAndroid Build Coastguard Worker         else {
95*3f1979aaSAndroid Build Coastguard Worker             //  Handle error
96*3f1979aaSAndroid Build Coastguard Worker             return 0;
97*3f1979aaSAndroid Build Coastguard Worker         }
98*3f1979aaSAndroid Build Coastguard Worker     }
99*3f1979aaSAndroid Build Coastguard Worker 
100*3f1979aaSAndroid Build Coastguard Worker #else
uclock_sec(int find_start)101*3f1979aaSAndroid Build Coastguard Worker     double uclock_sec(int find_start)
102*3f1979aaSAndroid Build Coastguard Worker     { return (double)clock()/(double)CLOCKS_PER_SEC; }
103*3f1979aaSAndroid Build Coastguard Worker #endif
104*3f1979aaSAndroid Build Coastguard Worker 
105*3f1979aaSAndroid Build Coastguard Worker 
save(complexf * d,int B,int N,const char * fn)106*3f1979aaSAndroid Build Coastguard Worker void save(complexf * d, int B, int N, const char * fn)
107*3f1979aaSAndroid Build Coastguard Worker {
108*3f1979aaSAndroid Build Coastguard Worker     if (!fn || !fn[0])
109*3f1979aaSAndroid Build Coastguard Worker     {
110*3f1979aaSAndroid Build Coastguard Worker         if (! SAVE_BY_DEFAULT)
111*3f1979aaSAndroid Build Coastguard Worker             return;
112*3f1979aaSAndroid Build Coastguard Worker         fn = "/dev/shm/bench.bin";
113*3f1979aaSAndroid Build Coastguard Worker     }
114*3f1979aaSAndroid Build Coastguard Worker     FILE * f = fopen(fn, "wb");
115*3f1979aaSAndroid Build Coastguard Worker     if (!f) {
116*3f1979aaSAndroid Build Coastguard Worker         fprintf(stderr, "error writing result to %s\n", fn);
117*3f1979aaSAndroid Build Coastguard Worker         return;
118*3f1979aaSAndroid Build Coastguard Worker     }
119*3f1979aaSAndroid Build Coastguard Worker     if ( N >= SAVE_LIMIT_MSPS * 1024 * 1024 )
120*3f1979aaSAndroid Build Coastguard Worker         N = SAVE_LIMIT_MSPS * 1024 * 1024;
121*3f1979aaSAndroid Build Coastguard Worker     for (int off = 0; off + B <= N; off += B)
122*3f1979aaSAndroid Build Coastguard Worker     {
123*3f1979aaSAndroid Build Coastguard Worker         fwrite(d+off, sizeof(complexf), B, f);
124*3f1979aaSAndroid Build Coastguard Worker     }
125*3f1979aaSAndroid Build Coastguard Worker     fclose(f);
126*3f1979aaSAndroid Build Coastguard Worker }
127*3f1979aaSAndroid Build Coastguard Worker 
128*3f1979aaSAndroid Build Coastguard Worker 
bench_shift_math_cc(int B,int N)129*3f1979aaSAndroid Build Coastguard Worker double bench_shift_math_cc(int B, int N) {
130*3f1979aaSAndroid Build Coastguard Worker     double t0, t1, tstop, T, nI;
131*3f1979aaSAndroid Build Coastguard Worker     int iter, off;
132*3f1979aaSAndroid Build Coastguard Worker     float phase = 0.0F;
133*3f1979aaSAndroid Build Coastguard Worker     complexf *input = (complexf *)malloc(N * sizeof(complexf));
134*3f1979aaSAndroid Build Coastguard Worker     complexf *output = (complexf *)malloc(N * sizeof(complexf));
135*3f1979aaSAndroid Build Coastguard Worker     shift_recursive_osc_t gen_state;
136*3f1979aaSAndroid Build Coastguard Worker     shift_recursive_osc_conf_t gen_conf;
137*3f1979aaSAndroid Build Coastguard Worker 
138*3f1979aaSAndroid Build Coastguard Worker     shift_recursive_osc_init(0.001F, 0.0F, &gen_conf, &gen_state);
139*3f1979aaSAndroid Build Coastguard Worker     gen_recursive_osc_c(input, N, &gen_conf, &gen_state);
140*3f1979aaSAndroid Build Coastguard Worker 
141*3f1979aaSAndroid Build Coastguard Worker     iter = 0;
142*3f1979aaSAndroid Build Coastguard Worker     off = 0;
143*3f1979aaSAndroid Build Coastguard Worker     t0 = uclock_sec(1);
144*3f1979aaSAndroid Build Coastguard Worker     tstop = t0 + 0.5;  /* benchmark duration: 500 ms */
145*3f1979aaSAndroid Build Coastguard Worker     do {
146*3f1979aaSAndroid Build Coastguard Worker         // work
147*3f1979aaSAndroid Build Coastguard Worker         phase = shift_math_cc(input+off, output+off, B, -0.0009F, phase);
148*3f1979aaSAndroid Build Coastguard Worker         off += B;
149*3f1979aaSAndroid Build Coastguard Worker         ++iter;
150*3f1979aaSAndroid Build Coastguard Worker         t1 = uclock_sec(0);
151*3f1979aaSAndroid Build Coastguard Worker     } while ( t1 < tstop && off + B < N );
152*3f1979aaSAndroid Build Coastguard Worker 
153*3f1979aaSAndroid Build Coastguard Worker     save(output, B, off, BENCH_FILE_SHIFT_MATH_CC);
154*3f1979aaSAndroid Build Coastguard Worker 
155*3f1979aaSAndroid Build Coastguard Worker     free(input);
156*3f1979aaSAndroid Build Coastguard Worker     free(output);
157*3f1979aaSAndroid Build Coastguard Worker     T = ( t1 - t0 );  /* duration per fft() */
158*3f1979aaSAndroid Build Coastguard Worker     printf("processed %f Msamples in %f ms\n", off * 1E-6, T*1E3);
159*3f1979aaSAndroid Build Coastguard Worker     nI = ((double)iter) * B;  /* number of iterations "normalized" to O(N) = N */
160*3f1979aaSAndroid Build Coastguard Worker     return (nI / T);    /* normalized iterations per second */
161*3f1979aaSAndroid Build Coastguard Worker }
162*3f1979aaSAndroid Build Coastguard Worker 
163*3f1979aaSAndroid Build Coastguard Worker 
bench_shift_table_cc(int B,int N)164*3f1979aaSAndroid Build Coastguard Worker double bench_shift_table_cc(int B, int N) {
165*3f1979aaSAndroid Build Coastguard Worker     double t0, t1, tstop, T, nI;
166*3f1979aaSAndroid Build Coastguard Worker     int iter, off;
167*3f1979aaSAndroid Build Coastguard Worker     int table_size=65536;
168*3f1979aaSAndroid Build Coastguard Worker     float phase = 0.0F;
169*3f1979aaSAndroid Build Coastguard Worker     complexf *input = (complexf *)malloc(N * sizeof(complexf));
170*3f1979aaSAndroid Build Coastguard Worker     complexf *output = (complexf *)malloc(N * sizeof(complexf));
171*3f1979aaSAndroid Build Coastguard Worker     shift_recursive_osc_t gen_state;
172*3f1979aaSAndroid Build Coastguard Worker     shift_recursive_osc_conf_t gen_conf;
173*3f1979aaSAndroid Build Coastguard Worker 
174*3f1979aaSAndroid Build Coastguard Worker     shift_table_data_t table_data = shift_table_init(table_size);
175*3f1979aaSAndroid Build Coastguard Worker 
176*3f1979aaSAndroid Build Coastguard Worker     shift_recursive_osc_init(0.001F, 0.0F, &gen_conf, &gen_state);
177*3f1979aaSAndroid Build Coastguard Worker     gen_recursive_osc_c(input, N, &gen_conf, &gen_state);
178*3f1979aaSAndroid Build Coastguard Worker 
179*3f1979aaSAndroid Build Coastguard Worker     iter = 0;
180*3f1979aaSAndroid Build Coastguard Worker     off = 0;
181*3f1979aaSAndroid Build Coastguard Worker     t0 = uclock_sec(1);
182*3f1979aaSAndroid Build Coastguard Worker     tstop = t0 + 0.5;  /* benchmark duration: 500 ms */
183*3f1979aaSAndroid Build Coastguard Worker     do {
184*3f1979aaSAndroid Build Coastguard Worker         // work
185*3f1979aaSAndroid Build Coastguard Worker         phase = shift_table_cc(input+off, output+off, B, -0.0009F, table_data, phase);
186*3f1979aaSAndroid Build Coastguard Worker 
187*3f1979aaSAndroid Build Coastguard Worker         off += B;
188*3f1979aaSAndroid Build Coastguard Worker         ++iter;
189*3f1979aaSAndroid Build Coastguard Worker         t1 = uclock_sec(0);
190*3f1979aaSAndroid Build Coastguard Worker     } while ( t1 < tstop && off + B < N );
191*3f1979aaSAndroid Build Coastguard Worker 
192*3f1979aaSAndroid Build Coastguard Worker     save(output, B, off, NULL);
193*3f1979aaSAndroid Build Coastguard Worker     free(input);
194*3f1979aaSAndroid Build Coastguard Worker     free(output);
195*3f1979aaSAndroid Build Coastguard Worker     T = ( t1 - t0 );  /* duration per fft() */
196*3f1979aaSAndroid Build Coastguard Worker     printf("processed %f Msamples in %f ms\n", off * 1E-6, T*1E3);
197*3f1979aaSAndroid Build Coastguard Worker     nI = ((double)iter) * B;  /* number of iterations "normalized" to O(N) = N */
198*3f1979aaSAndroid Build Coastguard Worker     return (nI / T);    /* normalized iterations per second */
199*3f1979aaSAndroid Build Coastguard Worker }
200*3f1979aaSAndroid Build Coastguard Worker 
201*3f1979aaSAndroid Build Coastguard Worker 
bench_shift_addfast(int B,int N)202*3f1979aaSAndroid Build Coastguard Worker double bench_shift_addfast(int B, int N) {
203*3f1979aaSAndroid Build Coastguard Worker     double t0, t1, tstop, T, nI;
204*3f1979aaSAndroid Build Coastguard Worker     int iter, off;
205*3f1979aaSAndroid Build Coastguard Worker     float phase = 0.0F;
206*3f1979aaSAndroid Build Coastguard Worker     complexf *input = (complexf *)malloc(N * sizeof(complexf));
207*3f1979aaSAndroid Build Coastguard Worker     complexf *output = (complexf *)malloc(N * sizeof(complexf));
208*3f1979aaSAndroid Build Coastguard Worker     shift_recursive_osc_t gen_state;
209*3f1979aaSAndroid Build Coastguard Worker     shift_recursive_osc_conf_t gen_conf;
210*3f1979aaSAndroid Build Coastguard Worker     shift_addfast_data_t state = shift_addfast_init(-0.0009F);
211*3f1979aaSAndroid Build Coastguard Worker 
212*3f1979aaSAndroid Build Coastguard Worker     shift_recursive_osc_init(0.001F, 0.0F, &gen_conf, &gen_state);
213*3f1979aaSAndroid Build Coastguard Worker     gen_recursive_osc_c(input, N, &gen_conf, &gen_state);
214*3f1979aaSAndroid Build Coastguard Worker 
215*3f1979aaSAndroid Build Coastguard Worker     iter = 0;
216*3f1979aaSAndroid Build Coastguard Worker     off = 0;
217*3f1979aaSAndroid Build Coastguard Worker     t0 = uclock_sec(1);
218*3f1979aaSAndroid Build Coastguard Worker     tstop = t0 + 0.5;  /* benchmark duration: 500 ms */
219*3f1979aaSAndroid Build Coastguard Worker     do {
220*3f1979aaSAndroid Build Coastguard Worker         // work
221*3f1979aaSAndroid Build Coastguard Worker         phase = shift_addfast_cc(input+off, output+off, B, &state, phase);
222*3f1979aaSAndroid Build Coastguard Worker 
223*3f1979aaSAndroid Build Coastguard Worker         off += B;
224*3f1979aaSAndroid Build Coastguard Worker         ++iter;
225*3f1979aaSAndroid Build Coastguard Worker         t1 = uclock_sec(0);
226*3f1979aaSAndroid Build Coastguard Worker     } while ( t1 < tstop && off + B < N );
227*3f1979aaSAndroid Build Coastguard Worker 
228*3f1979aaSAndroid Build Coastguard Worker     save(output, B, off, BENCH_FILE_ADD_FAST_CC);
229*3f1979aaSAndroid Build Coastguard Worker 
230*3f1979aaSAndroid Build Coastguard Worker     free(input);
231*3f1979aaSAndroid Build Coastguard Worker     free(output);
232*3f1979aaSAndroid Build Coastguard Worker     T = ( t1 - t0 );  /* duration per fft() */
233*3f1979aaSAndroid Build Coastguard Worker     printf("processed %f Msamples in %f ms\n", off * 1E-6, T*1E3);
234*3f1979aaSAndroid Build Coastguard Worker     nI = ((double)iter) * B;  /* number of iterations "normalized" to O(N) = N */
235*3f1979aaSAndroid Build Coastguard Worker     return (nI / T);    /* normalized iterations per second */
236*3f1979aaSAndroid Build Coastguard Worker }
237*3f1979aaSAndroid Build Coastguard Worker 
bench_shift_addfast_inp(int B,int N)238*3f1979aaSAndroid Build Coastguard Worker double bench_shift_addfast_inp(int B, int N) {
239*3f1979aaSAndroid Build Coastguard Worker     double t0, t1, tstop, T, nI;
240*3f1979aaSAndroid Build Coastguard Worker     int iter, off;
241*3f1979aaSAndroid Build Coastguard Worker     float phase = 0.0F;
242*3f1979aaSAndroid Build Coastguard Worker     complexf *input = (complexf *)malloc(N * sizeof(complexf));
243*3f1979aaSAndroid Build Coastguard Worker     shift_recursive_osc_t gen_state;
244*3f1979aaSAndroid Build Coastguard Worker     shift_recursive_osc_conf_t gen_conf;
245*3f1979aaSAndroid Build Coastguard Worker     shift_addfast_data_t state = shift_addfast_init(-0.0009F);
246*3f1979aaSAndroid Build Coastguard Worker 
247*3f1979aaSAndroid Build Coastguard Worker     shift_recursive_osc_init(0.001F, 0.0F, &gen_conf, &gen_state);
248*3f1979aaSAndroid Build Coastguard Worker     gen_recursive_osc_c(input, N, &gen_conf, &gen_state);
249*3f1979aaSAndroid Build Coastguard Worker 
250*3f1979aaSAndroid Build Coastguard Worker     iter = 0;
251*3f1979aaSAndroid Build Coastguard Worker     off = 0;
252*3f1979aaSAndroid Build Coastguard Worker     t0 = uclock_sec(1);
253*3f1979aaSAndroid Build Coastguard Worker     tstop = t0 + 0.5;  /* benchmark duration: 500 ms */
254*3f1979aaSAndroid Build Coastguard Worker     do {
255*3f1979aaSAndroid Build Coastguard Worker         // work
256*3f1979aaSAndroid Build Coastguard Worker         phase = shift_addfast_inp_c(input+off, B, &state, phase);
257*3f1979aaSAndroid Build Coastguard Worker 
258*3f1979aaSAndroid Build Coastguard Worker         off += B;
259*3f1979aaSAndroid Build Coastguard Worker         ++iter;
260*3f1979aaSAndroid Build Coastguard Worker         t1 = uclock_sec(0);
261*3f1979aaSAndroid Build Coastguard Worker     } while ( t1 < tstop && off + B < N );
262*3f1979aaSAndroid Build Coastguard Worker 
263*3f1979aaSAndroid Build Coastguard Worker     save(input, B, off, BENCH_FILE_ADD_FAST_INP_C);
264*3f1979aaSAndroid Build Coastguard Worker 
265*3f1979aaSAndroid Build Coastguard Worker     free(input);
266*3f1979aaSAndroid Build Coastguard Worker     T = ( t1 - t0 );  /* duration per fft() */
267*3f1979aaSAndroid Build Coastguard Worker     printf("processed %f Msamples in %f ms\n", off * 1E-6, T*1E3);
268*3f1979aaSAndroid Build Coastguard Worker     nI = ((double)iter) * B;  /* number of iterations "normalized" to O(N) = N */
269*3f1979aaSAndroid Build Coastguard Worker     return (nI / T);    /* normalized iterations per second */
270*3f1979aaSAndroid Build Coastguard Worker }
271*3f1979aaSAndroid Build Coastguard Worker 
272*3f1979aaSAndroid Build Coastguard Worker 
bench_shift_unroll_oop(int B,int N)273*3f1979aaSAndroid Build Coastguard Worker double bench_shift_unroll_oop(int B, int N) {
274*3f1979aaSAndroid Build Coastguard Worker     double t0, t1, tstop, T, nI;
275*3f1979aaSAndroid Build Coastguard Worker     int iter, off;
276*3f1979aaSAndroid Build Coastguard Worker     float phase = 0.0F;
277*3f1979aaSAndroid Build Coastguard Worker     complexf *input = (complexf *)malloc(N * sizeof(complexf));
278*3f1979aaSAndroid Build Coastguard Worker     complexf *output = (complexf *)malloc(N * sizeof(complexf));
279*3f1979aaSAndroid Build Coastguard Worker     shift_recursive_osc_t gen_state;
280*3f1979aaSAndroid Build Coastguard Worker     shift_recursive_osc_conf_t gen_conf;
281*3f1979aaSAndroid Build Coastguard Worker     shift_unroll_data_t state = shift_unroll_init(-0.0009F, B);
282*3f1979aaSAndroid Build Coastguard Worker 
283*3f1979aaSAndroid Build Coastguard Worker     shift_recursive_osc_init(0.001F, 0.0F, &gen_conf, &gen_state);
284*3f1979aaSAndroid Build Coastguard Worker     gen_recursive_osc_c(input, N, &gen_conf, &gen_state);
285*3f1979aaSAndroid Build Coastguard Worker 
286*3f1979aaSAndroid Build Coastguard Worker     iter = 0;
287*3f1979aaSAndroid Build Coastguard Worker     off = 0;
288*3f1979aaSAndroid Build Coastguard Worker     t0 = uclock_sec(1);
289*3f1979aaSAndroid Build Coastguard Worker     tstop = t0 + 0.5;  /* benchmark duration: 500 ms */
290*3f1979aaSAndroid Build Coastguard Worker     do {
291*3f1979aaSAndroid Build Coastguard Worker         // work
292*3f1979aaSAndroid Build Coastguard Worker         phase = shift_unroll_cc(input+off, output+off, B, &state, phase);
293*3f1979aaSAndroid Build Coastguard Worker 
294*3f1979aaSAndroid Build Coastguard Worker         off += B;
295*3f1979aaSAndroid Build Coastguard Worker         ++iter;
296*3f1979aaSAndroid Build Coastguard Worker         t1 = uclock_sec(0);
297*3f1979aaSAndroid Build Coastguard Worker     } while ( t1 < tstop && off + B < N );
298*3f1979aaSAndroid Build Coastguard Worker 
299*3f1979aaSAndroid Build Coastguard Worker     save(output, B, off, NULL);
300*3f1979aaSAndroid Build Coastguard Worker     free(input);
301*3f1979aaSAndroid Build Coastguard Worker     free(output);
302*3f1979aaSAndroid Build Coastguard Worker     T = ( t1 - t0 );  /* duration per fft() */
303*3f1979aaSAndroid Build Coastguard Worker     printf("processed %f Msamples in %f ms\n", off * 1E-6, T*1E3);
304*3f1979aaSAndroid Build Coastguard Worker     nI = ((double)iter) * B;  /* number of iterations "normalized" to O(N) = N */
305*3f1979aaSAndroid Build Coastguard Worker     return (nI / T);    /* normalized iterations per second */
306*3f1979aaSAndroid Build Coastguard Worker }
307*3f1979aaSAndroid Build Coastguard Worker 
bench_shift_unroll_inp(int B,int N)308*3f1979aaSAndroid Build Coastguard Worker double bench_shift_unroll_inp(int B, int N) {
309*3f1979aaSAndroid Build Coastguard Worker     double t0, t1, tstop, T, nI;
310*3f1979aaSAndroid Build Coastguard Worker     int iter, off;
311*3f1979aaSAndroid Build Coastguard Worker     float phase = 0.0F;
312*3f1979aaSAndroid Build Coastguard Worker     complexf *input = (complexf *)malloc(N * sizeof(complexf));
313*3f1979aaSAndroid Build Coastguard Worker     shift_recursive_osc_t gen_state;
314*3f1979aaSAndroid Build Coastguard Worker     shift_recursive_osc_conf_t gen_conf;
315*3f1979aaSAndroid Build Coastguard Worker     shift_unroll_data_t state = shift_unroll_init(-0.0009F, B);
316*3f1979aaSAndroid Build Coastguard Worker 
317*3f1979aaSAndroid Build Coastguard Worker     shift_recursive_osc_init(0.001F, 0.0F, &gen_conf, &gen_state);
318*3f1979aaSAndroid Build Coastguard Worker     gen_recursive_osc_c(input, N, &gen_conf, &gen_state);
319*3f1979aaSAndroid Build Coastguard Worker 
320*3f1979aaSAndroid Build Coastguard Worker     iter = 0;
321*3f1979aaSAndroid Build Coastguard Worker     off = 0;
322*3f1979aaSAndroid Build Coastguard Worker     t0 = uclock_sec(1);
323*3f1979aaSAndroid Build Coastguard Worker     tstop = t0 + 0.5;  /* benchmark duration: 500 ms */
324*3f1979aaSAndroid Build Coastguard Worker     do {
325*3f1979aaSAndroid Build Coastguard Worker         // work
326*3f1979aaSAndroid Build Coastguard Worker         phase = shift_unroll_inp_c(input+off, B, &state, phase);
327*3f1979aaSAndroid Build Coastguard Worker 
328*3f1979aaSAndroid Build Coastguard Worker         off += B;
329*3f1979aaSAndroid Build Coastguard Worker         ++iter;
330*3f1979aaSAndroid Build Coastguard Worker         t1 = uclock_sec(0);
331*3f1979aaSAndroid Build Coastguard Worker     } while ( t1 < tstop && off + B < N );
332*3f1979aaSAndroid Build Coastguard Worker 
333*3f1979aaSAndroid Build Coastguard Worker     save(input, B, off, BENCH_FILE_UNROLL_INP_C);
334*3f1979aaSAndroid Build Coastguard Worker 
335*3f1979aaSAndroid Build Coastguard Worker     free(input);
336*3f1979aaSAndroid Build Coastguard Worker     T = ( t1 - t0 );  /* duration per fft() */
337*3f1979aaSAndroid Build Coastguard Worker     printf("processed %f Msamples in %f ms\n", off * 1E-6, T*1E3);
338*3f1979aaSAndroid Build Coastguard Worker     nI = ((double)iter) * B;  /* number of iterations "normalized" to O(N) = N */
339*3f1979aaSAndroid Build Coastguard Worker     return (nI / T);    /* normalized iterations per second */
340*3f1979aaSAndroid Build Coastguard Worker }
341*3f1979aaSAndroid Build Coastguard Worker 
342*3f1979aaSAndroid Build Coastguard Worker 
343*3f1979aaSAndroid Build Coastguard Worker 
bench_shift_limited_unroll_oop(int B,int N)344*3f1979aaSAndroid Build Coastguard Worker double bench_shift_limited_unroll_oop(int B, int N) {
345*3f1979aaSAndroid Build Coastguard Worker     double t0, t1, tstop, T, nI;
346*3f1979aaSAndroid Build Coastguard Worker     int iter, off;
347*3f1979aaSAndroid Build Coastguard Worker     complexf *input = (complexf *)malloc(N * sizeof(complexf));
348*3f1979aaSAndroid Build Coastguard Worker     complexf *output = (complexf *)malloc(N * sizeof(complexf));
349*3f1979aaSAndroid Build Coastguard Worker     shift_recursive_osc_t gen_state;
350*3f1979aaSAndroid Build Coastguard Worker     shift_recursive_osc_conf_t gen_conf;
351*3f1979aaSAndroid Build Coastguard Worker     shift_limited_unroll_data_t state = shift_limited_unroll_init(-0.0009F);
352*3f1979aaSAndroid Build Coastguard Worker 
353*3f1979aaSAndroid Build Coastguard Worker     shift_recursive_osc_init(0.001F, 0.0F, &gen_conf, &gen_state);
354*3f1979aaSAndroid Build Coastguard Worker     gen_recursive_osc_c(input, N, &gen_conf, &gen_state);
355*3f1979aaSAndroid Build Coastguard Worker 
356*3f1979aaSAndroid Build Coastguard Worker     iter = 0;
357*3f1979aaSAndroid Build Coastguard Worker     off = 0;
358*3f1979aaSAndroid Build Coastguard Worker     t0 = uclock_sec(1);
359*3f1979aaSAndroid Build Coastguard Worker     tstop = t0 + 0.5;  /* benchmark duration: 500 ms */
360*3f1979aaSAndroid Build Coastguard Worker     do {
361*3f1979aaSAndroid Build Coastguard Worker         // work
362*3f1979aaSAndroid Build Coastguard Worker         shift_limited_unroll_cc(input+off, output+off, B, &state);
363*3f1979aaSAndroid Build Coastguard Worker 
364*3f1979aaSAndroid Build Coastguard Worker         off += B;
365*3f1979aaSAndroid Build Coastguard Worker         ++iter;
366*3f1979aaSAndroid Build Coastguard Worker         t1 = uclock_sec(0);
367*3f1979aaSAndroid Build Coastguard Worker     } while ( t1 < tstop && off + B < N );
368*3f1979aaSAndroid Build Coastguard Worker 
369*3f1979aaSAndroid Build Coastguard Worker     save(output, B, off, NULL);
370*3f1979aaSAndroid Build Coastguard Worker     free(input);
371*3f1979aaSAndroid Build Coastguard Worker     free(output);
372*3f1979aaSAndroid Build Coastguard Worker     T = ( t1 - t0 );  /* duration per fft() */
373*3f1979aaSAndroid Build Coastguard Worker     printf("processed %f Msamples in %f ms\n", off * 1E-6, T*1E3);
374*3f1979aaSAndroid Build Coastguard Worker     nI = ((double)iter) * B;  /* number of iterations "normalized" to O(N) = N */
375*3f1979aaSAndroid Build Coastguard Worker     return (nI / T);    /* normalized iterations per second */
376*3f1979aaSAndroid Build Coastguard Worker }
377*3f1979aaSAndroid Build Coastguard Worker 
378*3f1979aaSAndroid Build Coastguard Worker 
bench_shift_limited_unroll_inp(int B,int N)379*3f1979aaSAndroid Build Coastguard Worker double bench_shift_limited_unroll_inp(int B, int N) {
380*3f1979aaSAndroid Build Coastguard Worker     double t0, t1, tstop, T, nI;
381*3f1979aaSAndroid Build Coastguard Worker     int iter, off;
382*3f1979aaSAndroid Build Coastguard Worker     complexf *input = (complexf *)malloc(N * sizeof(complexf));
383*3f1979aaSAndroid Build Coastguard Worker     shift_recursive_osc_t gen_state;
384*3f1979aaSAndroid Build Coastguard Worker     shift_recursive_osc_conf_t gen_conf;
385*3f1979aaSAndroid Build Coastguard Worker     shift_limited_unroll_data_t state = shift_limited_unroll_init(-0.0009F);
386*3f1979aaSAndroid Build Coastguard Worker 
387*3f1979aaSAndroid Build Coastguard Worker     shift_recursive_osc_init(0.001F, 0.0F, &gen_conf, &gen_state);
388*3f1979aaSAndroid Build Coastguard Worker     gen_recursive_osc_c(input, N, &gen_conf, &gen_state);
389*3f1979aaSAndroid Build Coastguard Worker 
390*3f1979aaSAndroid Build Coastguard Worker     iter = 0;
391*3f1979aaSAndroid Build Coastguard Worker     off = 0;
392*3f1979aaSAndroid Build Coastguard Worker     t0 = uclock_sec(1);
393*3f1979aaSAndroid Build Coastguard Worker     tstop = t0 + 0.5;  /* benchmark duration: 500 ms */
394*3f1979aaSAndroid Build Coastguard Worker     do {
395*3f1979aaSAndroid Build Coastguard Worker         // work
396*3f1979aaSAndroid Build Coastguard Worker         shift_limited_unroll_inp_c(input+off, B, &state);
397*3f1979aaSAndroid Build Coastguard Worker 
398*3f1979aaSAndroid Build Coastguard Worker         off += B;
399*3f1979aaSAndroid Build Coastguard Worker         ++iter;
400*3f1979aaSAndroid Build Coastguard Worker         t1 = uclock_sec(0);
401*3f1979aaSAndroid Build Coastguard Worker     } while ( t1 < tstop && off + B < N );
402*3f1979aaSAndroid Build Coastguard Worker 
403*3f1979aaSAndroid Build Coastguard Worker     save(input, B, off, BENCH_FILE_LTD_UNROLL_INP_C);
404*3f1979aaSAndroid Build Coastguard Worker 
405*3f1979aaSAndroid Build Coastguard Worker     free(input);
406*3f1979aaSAndroid Build Coastguard Worker     T = ( t1 - t0 );  /* duration per fft() */
407*3f1979aaSAndroid Build Coastguard Worker     printf("processed %f Msamples in %f ms\n", off * 1E-6, T*1E3);
408*3f1979aaSAndroid Build Coastguard Worker     nI = ((double)iter) * B;  /* number of iterations "normalized" to O(N) = N */
409*3f1979aaSAndroid Build Coastguard Worker     return (nI / T);    /* normalized iterations per second */
410*3f1979aaSAndroid Build Coastguard Worker }
411*3f1979aaSAndroid Build Coastguard Worker 
412*3f1979aaSAndroid Build Coastguard Worker 
bench_shift_limited_unroll_A_sse_inp(int B,int N)413*3f1979aaSAndroid Build Coastguard Worker double bench_shift_limited_unroll_A_sse_inp(int B, int N) {
414*3f1979aaSAndroid Build Coastguard Worker     double t0, t1, tstop, T, nI;
415*3f1979aaSAndroid Build Coastguard Worker     int iter, off;
416*3f1979aaSAndroid Build Coastguard Worker     complexf *input = (complexf *)malloc(N * sizeof(complexf));
417*3f1979aaSAndroid Build Coastguard Worker     shift_recursive_osc_t gen_state;
418*3f1979aaSAndroid Build Coastguard Worker     shift_recursive_osc_conf_t gen_conf;
419*3f1979aaSAndroid Build Coastguard Worker     shift_limited_unroll_A_sse_data_t *state = malloc(sizeof(shift_limited_unroll_A_sse_data_t));
420*3f1979aaSAndroid Build Coastguard Worker 
421*3f1979aaSAndroid Build Coastguard Worker     *state = shift_limited_unroll_A_sse_init(-0.0009F, 0.0F);
422*3f1979aaSAndroid Build Coastguard Worker 
423*3f1979aaSAndroid Build Coastguard Worker     shift_recursive_osc_init(0.001F, 0.0F, &gen_conf, &gen_state);
424*3f1979aaSAndroid Build Coastguard Worker     gen_recursive_osc_c(input, N, &gen_conf, &gen_state);
425*3f1979aaSAndroid Build Coastguard Worker 
426*3f1979aaSAndroid Build Coastguard Worker     iter = 0;
427*3f1979aaSAndroid Build Coastguard Worker     off = 0;
428*3f1979aaSAndroid Build Coastguard Worker     t0 = uclock_sec(1);
429*3f1979aaSAndroid Build Coastguard Worker     tstop = t0 + 0.5;  /* benchmark duration: 500 ms */
430*3f1979aaSAndroid Build Coastguard Worker     do {
431*3f1979aaSAndroid Build Coastguard Worker         // work
432*3f1979aaSAndroid Build Coastguard Worker         shift_limited_unroll_A_sse_inp_c(input+off, B, state);
433*3f1979aaSAndroid Build Coastguard Worker 
434*3f1979aaSAndroid Build Coastguard Worker         off += B;
435*3f1979aaSAndroid Build Coastguard Worker         ++iter;
436*3f1979aaSAndroid Build Coastguard Worker         t1 = uclock_sec(0);
437*3f1979aaSAndroid Build Coastguard Worker     } while ( t1 < tstop && off + B < N );
438*3f1979aaSAndroid Build Coastguard Worker 
439*3f1979aaSAndroid Build Coastguard Worker     save(input, B, off, BENCH_FILE_LTD_UNROLL_A_SSE_INP_C);
440*3f1979aaSAndroid Build Coastguard Worker 
441*3f1979aaSAndroid Build Coastguard Worker     free(input);
442*3f1979aaSAndroid Build Coastguard Worker     T = ( t1 - t0 );  /* duration per fft() */
443*3f1979aaSAndroid Build Coastguard Worker     printf("processed %f Msamples in %f ms\n", off * 1E-6, T*1E3);
444*3f1979aaSAndroid Build Coastguard Worker     nI = ((double)iter) * B;  /* number of iterations "normalized" to O(N) = N */
445*3f1979aaSAndroid Build Coastguard Worker     return (nI / T);    /* normalized iterations per second */
446*3f1979aaSAndroid Build Coastguard Worker }
447*3f1979aaSAndroid Build Coastguard Worker 
bench_shift_limited_unroll_B_sse_inp(int B,int N)448*3f1979aaSAndroid Build Coastguard Worker double bench_shift_limited_unroll_B_sse_inp(int B, int N) {
449*3f1979aaSAndroid Build Coastguard Worker     double t0, t1, tstop, T, nI;
450*3f1979aaSAndroid Build Coastguard Worker     int iter, off;
451*3f1979aaSAndroid Build Coastguard Worker     complexf *input = (complexf *)malloc(N * sizeof(complexf));
452*3f1979aaSAndroid Build Coastguard Worker     shift_recursive_osc_t gen_state;
453*3f1979aaSAndroid Build Coastguard Worker     shift_recursive_osc_conf_t gen_conf;
454*3f1979aaSAndroid Build Coastguard Worker     shift_limited_unroll_B_sse_data_t *state = malloc(sizeof(shift_limited_unroll_B_sse_data_t));
455*3f1979aaSAndroid Build Coastguard Worker 
456*3f1979aaSAndroid Build Coastguard Worker     *state = shift_limited_unroll_B_sse_init(-0.0009F, 0.0F);
457*3f1979aaSAndroid Build Coastguard Worker 
458*3f1979aaSAndroid Build Coastguard Worker     shift_recursive_osc_init(0.001F, 0.0F, &gen_conf, &gen_state);
459*3f1979aaSAndroid Build Coastguard Worker     //shift_recursive_osc_init(0.0F, 0.0F, &gen_conf, &gen_state);
460*3f1979aaSAndroid Build Coastguard Worker     gen_recursive_osc_c(input, N, &gen_conf, &gen_state);
461*3f1979aaSAndroid Build Coastguard Worker 
462*3f1979aaSAndroid Build Coastguard Worker     iter = 0;
463*3f1979aaSAndroid Build Coastguard Worker     off = 0;
464*3f1979aaSAndroid Build Coastguard Worker     t0 = uclock_sec(1);
465*3f1979aaSAndroid Build Coastguard Worker     tstop = t0 + 0.5;  /* benchmark duration: 500 ms */
466*3f1979aaSAndroid Build Coastguard Worker     do {
467*3f1979aaSAndroid Build Coastguard Worker         // work
468*3f1979aaSAndroid Build Coastguard Worker         shift_limited_unroll_B_sse_inp_c(input+off, B, state);
469*3f1979aaSAndroid Build Coastguard Worker 
470*3f1979aaSAndroid Build Coastguard Worker         off += B;
471*3f1979aaSAndroid Build Coastguard Worker         ++iter;
472*3f1979aaSAndroid Build Coastguard Worker         t1 = uclock_sec(0);
473*3f1979aaSAndroid Build Coastguard Worker     } while ( t1 < tstop && off + B < N );
474*3f1979aaSAndroid Build Coastguard Worker 
475*3f1979aaSAndroid Build Coastguard Worker     save(input, B, off, BENCH_FILE_LTD_UNROLL_B_SSE_INP_C);
476*3f1979aaSAndroid Build Coastguard Worker 
477*3f1979aaSAndroid Build Coastguard Worker     free(input);
478*3f1979aaSAndroid Build Coastguard Worker     T = ( t1 - t0 );  /* duration per fft() */
479*3f1979aaSAndroid Build Coastguard Worker     printf("processed %f Msamples in %f ms\n", off * 1E-6, T*1E3);
480*3f1979aaSAndroid Build Coastguard Worker     nI = ((double)iter) * B;  /* number of iterations "normalized" to O(N) = N */
481*3f1979aaSAndroid Build Coastguard Worker     return (nI / T);    /* normalized iterations per second */
482*3f1979aaSAndroid Build Coastguard Worker }
483*3f1979aaSAndroid Build Coastguard Worker 
bench_shift_limited_unroll_C_sse_inp(int B,int N)484*3f1979aaSAndroid Build Coastguard Worker double bench_shift_limited_unroll_C_sse_inp(int B, int N) {
485*3f1979aaSAndroid Build Coastguard Worker     double t0, t1, tstop, T, nI;
486*3f1979aaSAndroid Build Coastguard Worker     int iter, off;
487*3f1979aaSAndroid Build Coastguard Worker     complexf *input = (complexf *)malloc(N * sizeof(complexf));
488*3f1979aaSAndroid Build Coastguard Worker     shift_recursive_osc_t gen_state;
489*3f1979aaSAndroid Build Coastguard Worker     shift_recursive_osc_conf_t gen_conf;
490*3f1979aaSAndroid Build Coastguard Worker     shift_limited_unroll_C_sse_data_t *state = malloc(sizeof(shift_limited_unroll_C_sse_data_t));
491*3f1979aaSAndroid Build Coastguard Worker 
492*3f1979aaSAndroid Build Coastguard Worker     *state = shift_limited_unroll_C_sse_init(-0.0009F, 0.0F);
493*3f1979aaSAndroid Build Coastguard Worker 
494*3f1979aaSAndroid Build Coastguard Worker     shift_recursive_osc_init(0.001F, 0.0F, &gen_conf, &gen_state);
495*3f1979aaSAndroid Build Coastguard Worker     gen_recursive_osc_c(input, N, &gen_conf, &gen_state);
496*3f1979aaSAndroid Build Coastguard Worker 
497*3f1979aaSAndroid Build Coastguard Worker     iter = 0;
498*3f1979aaSAndroid Build Coastguard Worker     off = 0;
499*3f1979aaSAndroid Build Coastguard Worker     t0 = uclock_sec(1);
500*3f1979aaSAndroid Build Coastguard Worker     tstop = t0 + 0.5;  /* benchmark duration: 500 ms */
501*3f1979aaSAndroid Build Coastguard Worker     do {
502*3f1979aaSAndroid Build Coastguard Worker         // work
503*3f1979aaSAndroid Build Coastguard Worker         shift_limited_unroll_C_sse_inp_c(input+off, B, state);
504*3f1979aaSAndroid Build Coastguard Worker 
505*3f1979aaSAndroid Build Coastguard Worker         off += B;
506*3f1979aaSAndroid Build Coastguard Worker         ++iter;
507*3f1979aaSAndroid Build Coastguard Worker         t1 = uclock_sec(0);
508*3f1979aaSAndroid Build Coastguard Worker     } while ( t1 < tstop && off + B < N );
509*3f1979aaSAndroid Build Coastguard Worker 
510*3f1979aaSAndroid Build Coastguard Worker     save(input, B, off, BENCH_FILE_LTD_UNROLL_C_SSE_INP_C);
511*3f1979aaSAndroid Build Coastguard Worker 
512*3f1979aaSAndroid Build Coastguard Worker     free(input);
513*3f1979aaSAndroid Build Coastguard Worker     T = ( t1 - t0 );  /* duration per fft() */
514*3f1979aaSAndroid Build Coastguard Worker     printf("processed %f Msamples in %f ms\n", off * 1E-6, T*1E3);
515*3f1979aaSAndroid Build Coastguard Worker     nI = ((double)iter) * B;  /* number of iterations "normalized" to O(N) = N */
516*3f1979aaSAndroid Build Coastguard Worker     return (nI / T);    /* normalized iterations per second */
517*3f1979aaSAndroid Build Coastguard Worker }
518*3f1979aaSAndroid Build Coastguard Worker 
519*3f1979aaSAndroid Build Coastguard Worker 
bench_shift_rec_osc_cc_oop(int B,int N)520*3f1979aaSAndroid Build Coastguard Worker double bench_shift_rec_osc_cc_oop(int B, int N) {
521*3f1979aaSAndroid Build Coastguard Worker     double t0, t1, tstop, T, nI;
522*3f1979aaSAndroid Build Coastguard Worker     int iter, off;
523*3f1979aaSAndroid Build Coastguard Worker     float phase = 0.0F;
524*3f1979aaSAndroid Build Coastguard Worker     complexf *input = (complexf *)malloc(N * sizeof(complexf));
525*3f1979aaSAndroid Build Coastguard Worker     complexf *output = (complexf *)malloc(N * sizeof(complexf));
526*3f1979aaSAndroid Build Coastguard Worker     shift_recursive_osc_t gen_state, shift_state;
527*3f1979aaSAndroid Build Coastguard Worker     shift_recursive_osc_conf_t gen_conf, shift_conf;
528*3f1979aaSAndroid Build Coastguard Worker 
529*3f1979aaSAndroid Build Coastguard Worker     shift_recursive_osc_init(-0.0009F, 0.0F, &shift_conf, &shift_state);
530*3f1979aaSAndroid Build Coastguard Worker     shift_recursive_osc_init(0.001F, 0.0F, &gen_conf, &gen_state);
531*3f1979aaSAndroid Build Coastguard Worker     gen_recursive_osc_c(input, N, &gen_conf, &gen_state);
532*3f1979aaSAndroid Build Coastguard Worker 
533*3f1979aaSAndroid Build Coastguard Worker     iter = 0;
534*3f1979aaSAndroid Build Coastguard Worker     off = 0;
535*3f1979aaSAndroid Build Coastguard Worker     t0 = uclock_sec(1);
536*3f1979aaSAndroid Build Coastguard Worker     tstop = t0 + 0.5;  /* benchmark duration: 500 ms */
537*3f1979aaSAndroid Build Coastguard Worker     do {
538*3f1979aaSAndroid Build Coastguard Worker         // work
539*3f1979aaSAndroid Build Coastguard Worker         shift_recursive_osc_cc(input+off, output+off, B, &shift_conf, &shift_state);
540*3f1979aaSAndroid Build Coastguard Worker 
541*3f1979aaSAndroid Build Coastguard Worker         off += B;
542*3f1979aaSAndroid Build Coastguard Worker         ++iter;
543*3f1979aaSAndroid Build Coastguard Worker         t1 = uclock_sec(0);
544*3f1979aaSAndroid Build Coastguard Worker     } while ( t1 < tstop && off + B < N );
545*3f1979aaSAndroid Build Coastguard Worker 
546*3f1979aaSAndroid Build Coastguard Worker     save(input, B, off, BENCH_FILE_REC_OSC_CC);
547*3f1979aaSAndroid Build Coastguard Worker 
548*3f1979aaSAndroid Build Coastguard Worker     save(output, B, off, NULL);
549*3f1979aaSAndroid Build Coastguard Worker     free(input);
550*3f1979aaSAndroid Build Coastguard Worker     free(output);
551*3f1979aaSAndroid Build Coastguard Worker     T = ( t1 - t0 );  /* duration per fft() */
552*3f1979aaSAndroid Build Coastguard Worker     printf("processed %f Msamples in %f ms\n", off * 1E-6, T*1E3);
553*3f1979aaSAndroid Build Coastguard Worker     nI = ((double)iter) * B;  /* number of iterations "normalized" to O(N) = N */
554*3f1979aaSAndroid Build Coastguard Worker     return (nI / T);    /* normalized iterations per second */
555*3f1979aaSAndroid Build Coastguard Worker }
556*3f1979aaSAndroid Build Coastguard Worker 
557*3f1979aaSAndroid Build Coastguard Worker 
bench_shift_rec_osc_cc_inp(int B,int N)558*3f1979aaSAndroid Build Coastguard Worker double bench_shift_rec_osc_cc_inp(int B, int N) {
559*3f1979aaSAndroid Build Coastguard Worker     double t0, t1, tstop, T, nI;
560*3f1979aaSAndroid Build Coastguard Worker     int iter, off;
561*3f1979aaSAndroid Build Coastguard Worker     float phase = 0.0F;
562*3f1979aaSAndroid Build Coastguard Worker     complexf *input = (complexf *)malloc(N * sizeof(complexf));
563*3f1979aaSAndroid Build Coastguard Worker     shift_recursive_osc_t gen_state, shift_state;
564*3f1979aaSAndroid Build Coastguard Worker     shift_recursive_osc_conf_t gen_conf, shift_conf;
565*3f1979aaSAndroid Build Coastguard Worker 
566*3f1979aaSAndroid Build Coastguard Worker     shift_recursive_osc_init(0.001F, 0.0F, &gen_conf, &gen_state);
567*3f1979aaSAndroid Build Coastguard Worker     gen_recursive_osc_c(input, N, &gen_conf, &gen_state);
568*3f1979aaSAndroid Build Coastguard Worker     shift_recursive_osc_init(-0.0009F, 0.0F, &shift_conf, &shift_state);
569*3f1979aaSAndroid Build Coastguard Worker 
570*3f1979aaSAndroid Build Coastguard Worker     iter = 0;
571*3f1979aaSAndroid Build Coastguard Worker     off = 0;
572*3f1979aaSAndroid Build Coastguard Worker     t0 = uclock_sec(1);
573*3f1979aaSAndroid Build Coastguard Worker     tstop = t0 + 0.5;  /* benchmark duration: 500 ms */
574*3f1979aaSAndroid Build Coastguard Worker     do {
575*3f1979aaSAndroid Build Coastguard Worker         // work
576*3f1979aaSAndroid Build Coastguard Worker         shift_recursive_osc_inp_c(input+off, B, &shift_conf, &shift_state);
577*3f1979aaSAndroid Build Coastguard Worker 
578*3f1979aaSAndroid Build Coastguard Worker         off += B;
579*3f1979aaSAndroid Build Coastguard Worker         ++iter;
580*3f1979aaSAndroid Build Coastguard Worker         t1 = uclock_sec(0);
581*3f1979aaSAndroid Build Coastguard Worker     } while ( t1 < tstop && off + B < N );
582*3f1979aaSAndroid Build Coastguard Worker 
583*3f1979aaSAndroid Build Coastguard Worker     save(input, B, off, BENCH_FILE_REC_OSC_INP_C);
584*3f1979aaSAndroid Build Coastguard Worker     free(input);
585*3f1979aaSAndroid Build Coastguard Worker     T = ( t1 - t0 );  /* duration per fft() */
586*3f1979aaSAndroid Build Coastguard Worker     printf("processed %f Msamples in %f ms\n", off * 1E-6, T*1E3);
587*3f1979aaSAndroid Build Coastguard Worker     nI = ((double)iter) * B;  /* number of iterations "normalized" to O(N) = N */
588*3f1979aaSAndroid Build Coastguard Worker     return (nI / T);    /* normalized iterations per second */
589*3f1979aaSAndroid Build Coastguard Worker }
590*3f1979aaSAndroid Build Coastguard Worker 
591*3f1979aaSAndroid Build Coastguard Worker 
bench_shift_rec_osc_sse_c_inp(int B,int N)592*3f1979aaSAndroid Build Coastguard Worker double bench_shift_rec_osc_sse_c_inp(int B, int N) {
593*3f1979aaSAndroid Build Coastguard Worker     double t0, t1, tstop, T, nI;
594*3f1979aaSAndroid Build Coastguard Worker     int iter, off;
595*3f1979aaSAndroid Build Coastguard Worker     float phase = 0.0F;
596*3f1979aaSAndroid Build Coastguard Worker     complexf *input = (complexf *)malloc(N * sizeof(complexf));
597*3f1979aaSAndroid Build Coastguard Worker     shift_recursive_osc_t gen_state;
598*3f1979aaSAndroid Build Coastguard Worker     shift_recursive_osc_conf_t gen_conf;
599*3f1979aaSAndroid Build Coastguard Worker 
600*3f1979aaSAndroid Build Coastguard Worker     shift_recursive_osc_sse_t *shift_state = malloc(sizeof(shift_recursive_osc_sse_t));
601*3f1979aaSAndroid Build Coastguard Worker     shift_recursive_osc_sse_conf_t shift_conf;
602*3f1979aaSAndroid Build Coastguard Worker 
603*3f1979aaSAndroid Build Coastguard Worker     shift_recursive_osc_init(0.001F, 0.0F, &gen_conf, &gen_state);
604*3f1979aaSAndroid Build Coastguard Worker     gen_recursive_osc_c(input, N, &gen_conf, &gen_state);
605*3f1979aaSAndroid Build Coastguard Worker 
606*3f1979aaSAndroid Build Coastguard Worker     shift_recursive_osc_sse_init(-0.0009F, 0.0F, &shift_conf, shift_state);
607*3f1979aaSAndroid Build Coastguard Worker 
608*3f1979aaSAndroid Build Coastguard Worker     iter = 0;
609*3f1979aaSAndroid Build Coastguard Worker     off = 0;
610*3f1979aaSAndroid Build Coastguard Worker     t0 = uclock_sec(1);
611*3f1979aaSAndroid Build Coastguard Worker     tstop = t0 + 0.5;  /* benchmark duration: 500 ms */
612*3f1979aaSAndroid Build Coastguard Worker     do {
613*3f1979aaSAndroid Build Coastguard Worker         // work
614*3f1979aaSAndroid Build Coastguard Worker         shift_recursive_osc_sse_inp_c(input+off, B, &shift_conf, shift_state);
615*3f1979aaSAndroid Build Coastguard Worker 
616*3f1979aaSAndroid Build Coastguard Worker         off += B;
617*3f1979aaSAndroid Build Coastguard Worker         ++iter;
618*3f1979aaSAndroid Build Coastguard Worker         t1 = uclock_sec(0);
619*3f1979aaSAndroid Build Coastguard Worker     } while ( t1 < tstop && off + B < N );
620*3f1979aaSAndroid Build Coastguard Worker 
621*3f1979aaSAndroid Build Coastguard Worker     save(input, B, off, BENCH_FILE_REC_OSC_SSE_INP_C);
622*3f1979aaSAndroid Build Coastguard Worker     free(input);
623*3f1979aaSAndroid Build Coastguard Worker     T = ( t1 - t0 );  /* duration per fft() */
624*3f1979aaSAndroid Build Coastguard Worker     printf("processed %f Msamples in %f ms\n", off * 1E-6, T*1E3);
625*3f1979aaSAndroid Build Coastguard Worker     nI = ((double)iter) * B;  /* number of iterations "normalized" to O(N) = N */
626*3f1979aaSAndroid Build Coastguard Worker     return (nI / T);    /* normalized iterations per second */
627*3f1979aaSAndroid Build Coastguard Worker }
628*3f1979aaSAndroid Build Coastguard Worker 
629*3f1979aaSAndroid Build Coastguard Worker 
630*3f1979aaSAndroid Build Coastguard Worker 
main(int argc,char ** argv)631*3f1979aaSAndroid Build Coastguard Worker int main(int argc, char **argv)
632*3f1979aaSAndroid Build Coastguard Worker {
633*3f1979aaSAndroid Build Coastguard Worker     double rt;
634*3f1979aaSAndroid Build Coastguard Worker 
635*3f1979aaSAndroid Build Coastguard Worker     // process up to 64 MSample (512 MByte) in blocks of 8 kSamples (=64 kByte)
636*3f1979aaSAndroid Build Coastguard Worker     int B = 8 * 1024;
637*3f1979aaSAndroid Build Coastguard Worker     int N = 64 * 1024 * 1024;
638*3f1979aaSAndroid Build Coastguard Worker     int showUsage = 0;
639*3f1979aaSAndroid Build Coastguard Worker 
640*3f1979aaSAndroid Build Coastguard Worker     if (argc == 1)
641*3f1979aaSAndroid Build Coastguard Worker         showUsage = 1;
642*3f1979aaSAndroid Build Coastguard Worker 
643*3f1979aaSAndroid Build Coastguard Worker     if (1 < argc)
644*3f1979aaSAndroid Build Coastguard Worker         B = atoi(argv[1]);
645*3f1979aaSAndroid Build Coastguard Worker     if (2 < argc)
646*3f1979aaSAndroid Build Coastguard Worker         N = atoi(argv[2]) * 1024 * 1024;
647*3f1979aaSAndroid Build Coastguard Worker 
648*3f1979aaSAndroid Build Coastguard Worker     if ( !B || !N || showUsage )
649*3f1979aaSAndroid Build Coastguard Worker     {
650*3f1979aaSAndroid Build Coastguard Worker         fprintf(stderr, "%s [<blockLength in samples> [<total # of MSamples>] ]\n", argv[0]);
651*3f1979aaSAndroid Build Coastguard Worker         if ( !B || !N )
652*3f1979aaSAndroid Build Coastguard Worker             return 0;
653*3f1979aaSAndroid Build Coastguard Worker     }
654*3f1979aaSAndroid Build Coastguard Worker 
655*3f1979aaSAndroid Build Coastguard Worker     fprintf(stderr, "processing up to N = %d MSamples with blocke length of %d samples\n",
656*3f1979aaSAndroid Build Coastguard Worker         N / (1024 * 1024), B );
657*3f1979aaSAndroid Build Coastguard Worker 
658*3f1979aaSAndroid Build Coastguard Worker 
659*3f1979aaSAndroid Build Coastguard Worker #if BENCH_REF_TRIG_FUNC
660*3f1979aaSAndroid Build Coastguard Worker     printf("\nstarting bench of shift_math_cc (out-of-place) with trig functions ..\n");
661*3f1979aaSAndroid Build Coastguard Worker     rt = bench_shift_math_cc(B, N);
662*3f1979aaSAndroid Build Coastguard Worker     printf("  %f MSamples/sec\n\n", rt * 1E-6);
663*3f1979aaSAndroid Build Coastguard Worker #endif
664*3f1979aaSAndroid Build Coastguard Worker 
665*3f1979aaSAndroid Build Coastguard Worker #if BENCH_OUT_OF_PLACE_ALGOS
666*3f1979aaSAndroid Build Coastguard Worker     printf("starting bench of shift_table_cc (out-of-place) ..\n");
667*3f1979aaSAndroid Build Coastguard Worker     rt = bench_shift_table_cc(B, N);
668*3f1979aaSAndroid Build Coastguard Worker     printf("  %f MSamples/sec\n\n", rt * 1E-6);
669*3f1979aaSAndroid Build Coastguard Worker 
670*3f1979aaSAndroid Build Coastguard Worker     printf("starting bench of shift_addfast_cc (out-of-place) ..\n");
671*3f1979aaSAndroid Build Coastguard Worker     rt = bench_shift_addfast(B, N);
672*3f1979aaSAndroid Build Coastguard Worker     printf("  %f MSamples/sec\n\n", rt * 1E-6);
673*3f1979aaSAndroid Build Coastguard Worker 
674*3f1979aaSAndroid Build Coastguard Worker     printf("\nstarting bench of shift_unroll_cc (out-of-place) ..\n");
675*3f1979aaSAndroid Build Coastguard Worker     rt = bench_shift_unroll_oop(B, N);
676*3f1979aaSAndroid Build Coastguard Worker     printf("  %f MSamples/sec\n\n", rt * 1E-6);
677*3f1979aaSAndroid Build Coastguard Worker 
678*3f1979aaSAndroid Build Coastguard Worker     printf("\nstarting bench of shift_limited_unroll_cc (out-of-place) ..\n");
679*3f1979aaSAndroid Build Coastguard Worker     rt = bench_shift_limited_unroll_oop(B, N);
680*3f1979aaSAndroid Build Coastguard Worker     printf("  %f MSamples/sec\n\n", rt * 1E-6);
681*3f1979aaSAndroid Build Coastguard Worker 
682*3f1979aaSAndroid Build Coastguard Worker     printf("\nstarting bench of shift_recursive_osc_cc (out-of-place) ..\n");
683*3f1979aaSAndroid Build Coastguard Worker     rt = bench_shift_rec_osc_cc_oop(B, N);
684*3f1979aaSAndroid Build Coastguard Worker     printf("  %f MSamples/sec\n\n", rt * 1E-6);
685*3f1979aaSAndroid Build Coastguard Worker #endif
686*3f1979aaSAndroid Build Coastguard Worker 
687*3f1979aaSAndroid Build Coastguard Worker #if BENCH_INPLACE_ALGOS
688*3f1979aaSAndroid Build Coastguard Worker 
689*3f1979aaSAndroid Build Coastguard Worker     printf("starting bench of shift_addfast_inp_c in-place ..\n");
690*3f1979aaSAndroid Build Coastguard Worker     rt = bench_shift_addfast_inp(B, N);
691*3f1979aaSAndroid Build Coastguard Worker     printf("  %f MSamples/sec\n\n", rt * 1E-6);
692*3f1979aaSAndroid Build Coastguard Worker 
693*3f1979aaSAndroid Build Coastguard Worker     printf("starting bench of shift_unroll_inp_c in-place ..\n");
694*3f1979aaSAndroid Build Coastguard Worker     rt = bench_shift_unroll_inp(B, N);
695*3f1979aaSAndroid Build Coastguard Worker     printf("  %f MSamples/sec\n\n", rt * 1E-6);
696*3f1979aaSAndroid Build Coastguard Worker 
697*3f1979aaSAndroid Build Coastguard Worker     printf("starting bench of shift_limited_unroll_inp_c in-place ..\n");
698*3f1979aaSAndroid Build Coastguard Worker     rt = bench_shift_limited_unroll_inp(B, N);
699*3f1979aaSAndroid Build Coastguard Worker     printf("  %f MSamples/sec\n\n", rt * 1E-6);
700*3f1979aaSAndroid Build Coastguard Worker 
701*3f1979aaSAndroid Build Coastguard Worker     if ( have_sse_shift_mixer_impl() )
702*3f1979aaSAndroid Build Coastguard Worker     {
703*3f1979aaSAndroid Build Coastguard Worker         printf("starting bench of shift_limited_unroll_A_sse_inp_c in-place ..\n");
704*3f1979aaSAndroid Build Coastguard Worker         rt = bench_shift_limited_unroll_A_sse_inp(B, N);
705*3f1979aaSAndroid Build Coastguard Worker         printf("  %f MSamples/sec\n\n", rt * 1E-6);
706*3f1979aaSAndroid Build Coastguard Worker 
707*3f1979aaSAndroid Build Coastguard Worker         printf("starting bench of shift_limited_unroll_B_sse_inp_c in-place ..\n");
708*3f1979aaSAndroid Build Coastguard Worker         rt = bench_shift_limited_unroll_B_sse_inp(B, N);
709*3f1979aaSAndroid Build Coastguard Worker         printf("  %f MSamples/sec\n\n", rt * 1E-6);
710*3f1979aaSAndroid Build Coastguard Worker 
711*3f1979aaSAndroid Build Coastguard Worker         printf("starting bench of shift_limited_unroll_C_sse_inp_c in-place ..\n");
712*3f1979aaSAndroid Build Coastguard Worker         rt = bench_shift_limited_unroll_C_sse_inp(B, N);
713*3f1979aaSAndroid Build Coastguard Worker         printf("  %f MSamples/sec\n\n", rt * 1E-6);
714*3f1979aaSAndroid Build Coastguard Worker     }
715*3f1979aaSAndroid Build Coastguard Worker 
716*3f1979aaSAndroid Build Coastguard Worker     printf("starting bench of shift_recursive_osc_cc in-place ..\n");
717*3f1979aaSAndroid Build Coastguard Worker     rt = bench_shift_rec_osc_cc_inp(B, N);
718*3f1979aaSAndroid Build Coastguard Worker     printf("  %f MSamples/sec\n\n", rt * 1E-6);
719*3f1979aaSAndroid Build Coastguard Worker 
720*3f1979aaSAndroid Build Coastguard Worker     if ( have_sse_shift_mixer_impl() )
721*3f1979aaSAndroid Build Coastguard Worker     {
722*3f1979aaSAndroid Build Coastguard Worker         printf("starting bench of shift_recursive_osc_sse_c in-place ..\n");
723*3f1979aaSAndroid Build Coastguard Worker         rt = bench_shift_rec_osc_sse_c_inp(B, N);
724*3f1979aaSAndroid Build Coastguard Worker         printf("  %f MSamples/sec\n\n", rt * 1E-6);
725*3f1979aaSAndroid Build Coastguard Worker     }
726*3f1979aaSAndroid Build Coastguard Worker #endif
727*3f1979aaSAndroid Build Coastguard Worker 
728*3f1979aaSAndroid Build Coastguard Worker     return 0;
729*3f1979aaSAndroid Build Coastguard Worker }
730*3f1979aaSAndroid Build Coastguard Worker 
731