1*3f1979aaSAndroid Build Coastguard Worker /*
2*3f1979aaSAndroid Build Coastguard Worker Copyright (c) 2020 Hayati Ayguen ( [email protected] )
3*3f1979aaSAndroid Build Coastguard Worker
4*3f1979aaSAndroid Build Coastguard Worker bench for mixer algorithm/implementations
5*3f1979aaSAndroid Build Coastguard Worker
6*3f1979aaSAndroid Build Coastguard Worker */
7*3f1979aaSAndroid Build Coastguard Worker
8*3f1979aaSAndroid Build Coastguard Worker #include <pf_mixer.h>
9*3f1979aaSAndroid Build Coastguard Worker
10*3f1979aaSAndroid Build Coastguard Worker #include <math.h>
11*3f1979aaSAndroid Build Coastguard Worker #include <stdio.h>
12*3f1979aaSAndroid Build Coastguard Worker #include <stdlib.h>
13*3f1979aaSAndroid Build Coastguard Worker #include <time.h>
14*3f1979aaSAndroid Build Coastguard Worker #include <assert.h>
15*3f1979aaSAndroid Build Coastguard Worker #include <string.h>
16*3f1979aaSAndroid Build Coastguard Worker
17*3f1979aaSAndroid Build Coastguard Worker #define HAVE_SYS_TIMES
18*3f1979aaSAndroid Build Coastguard Worker
19*3f1979aaSAndroid Build Coastguard Worker #ifdef HAVE_SYS_TIMES
20*3f1979aaSAndroid Build Coastguard Worker # include <sys/times.h>
21*3f1979aaSAndroid Build Coastguard Worker # include <unistd.h>
22*3f1979aaSAndroid Build Coastguard Worker #endif
23*3f1979aaSAndroid Build Coastguard Worker
24*3f1979aaSAndroid Build Coastguard Worker #define BENCH_REF_TRIG_FUNC 1
25*3f1979aaSAndroid Build Coastguard Worker #define BENCH_OUT_OF_PLACE_ALGOS 0
26*3f1979aaSAndroid Build Coastguard Worker #define BENCH_INPLACE_ALGOS 1
27*3f1979aaSAndroid Build Coastguard Worker
28*3f1979aaSAndroid Build Coastguard Worker #define SAVE_BY_DEFAULT 0
29*3f1979aaSAndroid Build Coastguard Worker #define SAVE_LIMIT_MSPS 16
30*3f1979aaSAndroid Build Coastguard Worker
31*3f1979aaSAndroid Build Coastguard Worker #if 0
32*3f1979aaSAndroid Build Coastguard Worker #define BENCH_FILE_SHIFT_MATH_CC "/home/ayguen/WindowsDesktop/mixer_test/A_shift_math_cc.bin"
33*3f1979aaSAndroid Build Coastguard Worker #define BENCH_FILE_ADD_FAST_CC "/home/ayguen/WindowsDesktop/mixer_test/C_shift_addfast_cc.bin"
34*3f1979aaSAndroid Build Coastguard Worker #define BENCH_FILE_ADD_FAST_INP_C "/home/ayguen/WindowsDesktop/mixer_test/C_shift_addfast_inp_c.bin"
35*3f1979aaSAndroid Build Coastguard Worker #define BENCH_FILE_UNROLL_INP_C "/home/ayguen/WindowsDesktop/mixer_test/D_shift_unroll_inp_c.bin"
36*3f1979aaSAndroid Build Coastguard Worker #define BENCH_FILE_LTD_UNROLL_INP_C "/home/ayguen/WindowsDesktop/mixer_test/E_shift_limited_unroll_inp_c.bin"
37*3f1979aaSAndroid Build Coastguard Worker #define BENCH_FILE_LTD_UNROLL_A_SSE_INP_C "/home/ayguen/WindowsDesktop/mixer_test/F_shift_limited_unroll_A_sse_inp_c.bin"
38*3f1979aaSAndroid Build Coastguard Worker #define BENCH_FILE_LTD_UNROLL_B_SSE_INP_C "/home/ayguen/WindowsDesktop/mixer_test/G_shift_limited_unroll_B_sse_inp_c.bin"
39*3f1979aaSAndroid Build Coastguard Worker #define BENCH_FILE_LTD_UNROLL_C_SSE_INP_C "/home/ayguen/WindowsDesktop/mixer_test/H_shift_limited_unroll_C_sse_inp_c.bin"
40*3f1979aaSAndroid Build Coastguard Worker #define BENCH_FILE_REC_OSC_CC ""
41*3f1979aaSAndroid Build Coastguard Worker #define BENCH_FILE_REC_OSC_INP_C "/home/ayguen/WindowsDesktop/mixer_test/I_shift_recursive_osc_inp_c.bin"
42*3f1979aaSAndroid Build Coastguard Worker #define BENCH_FILE_REC_OSC_SSE_INP_C "/home/ayguen/WindowsDesktop/mixer_test/J_shift_recursive_osc_sse_inp_c.bin"
43*3f1979aaSAndroid Build Coastguard Worker #else
44*3f1979aaSAndroid Build Coastguard Worker #define BENCH_FILE_SHIFT_MATH_CC ""
45*3f1979aaSAndroid Build Coastguard Worker #define BENCH_FILE_ADD_FAST_CC ""
46*3f1979aaSAndroid Build Coastguard Worker #define BENCH_FILE_ADD_FAST_INP_C ""
47*3f1979aaSAndroid Build Coastguard Worker #define BENCH_FILE_UNROLL_INP_C ""
48*3f1979aaSAndroid Build Coastguard Worker #define BENCH_FILE_LTD_UNROLL_INP_C ""
49*3f1979aaSAndroid Build Coastguard Worker #define BENCH_FILE_LTD_UNROLL_A_SSE_INP_C ""
50*3f1979aaSAndroid Build Coastguard Worker #define BENCH_FILE_LTD_UNROLL_B_SSE_INP_C ""
51*3f1979aaSAndroid Build Coastguard Worker #define BENCH_FILE_LTD_UNROLL_C_SSE_INP_C ""
52*3f1979aaSAndroid Build Coastguard Worker #define BENCH_FILE_REC_OSC_CC ""
53*3f1979aaSAndroid Build Coastguard Worker #define BENCH_FILE_REC_OSC_INP_C ""
54*3f1979aaSAndroid Build Coastguard Worker #define BENCH_FILE_REC_OSC_SSE_INP_C ""
55*3f1979aaSAndroid Build Coastguard Worker #endif
56*3f1979aaSAndroid Build Coastguard Worker
57*3f1979aaSAndroid Build Coastguard Worker
58*3f1979aaSAndroid Build Coastguard Worker
59*3f1979aaSAndroid Build Coastguard Worker #if defined(HAVE_SYS_TIMES)
60*3f1979aaSAndroid Build Coastguard Worker static double ttclk = 0.;
61*3f1979aaSAndroid Build Coastguard Worker
uclock_sec(int find_start)62*3f1979aaSAndroid Build Coastguard Worker static double uclock_sec(int find_start)
63*3f1979aaSAndroid Build Coastguard Worker {
64*3f1979aaSAndroid Build Coastguard Worker struct tms t0, t;
65*3f1979aaSAndroid Build Coastguard Worker if (ttclk == 0.)
66*3f1979aaSAndroid Build Coastguard Worker {
67*3f1979aaSAndroid Build Coastguard Worker ttclk = sysconf(_SC_CLK_TCK);
68*3f1979aaSAndroid Build Coastguard Worker fprintf(stderr, "sysconf(_SC_CLK_TCK) => %f\n", ttclk);
69*3f1979aaSAndroid Build Coastguard Worker }
70*3f1979aaSAndroid Build Coastguard Worker times(&t);
71*3f1979aaSAndroid Build Coastguard Worker if (find_start)
72*3f1979aaSAndroid Build Coastguard Worker {
73*3f1979aaSAndroid Build Coastguard Worker t0 = t;
74*3f1979aaSAndroid Build Coastguard Worker while (t0.tms_utime == t.tms_utime)
75*3f1979aaSAndroid Build Coastguard Worker times(&t);
76*3f1979aaSAndroid Build Coastguard Worker }
77*3f1979aaSAndroid Build Coastguard Worker /* use only the user time of this process - not realtime, which depends on OS-scheduler .. */
78*3f1979aaSAndroid Build Coastguard Worker return ((double)t.tms_utime) / ttclk;
79*3f1979aaSAndroid Build Coastguard Worker }
80*3f1979aaSAndroid Build Coastguard Worker
81*3f1979aaSAndroid Build Coastguard Worker #elif 0
82*3f1979aaSAndroid Build Coastguard Worker // https://docs.microsoft.com/en-us/windows/win32/api/processthreadsapi/nf-processthreadsapi-getprocesstimes
uclock_sec(int find_start)83*3f1979aaSAndroid Build Coastguard Worker double uclock_sec(int find_start)
84*3f1979aaSAndroid Build Coastguard Worker {
85*3f1979aaSAndroid Build Coastguard Worker FILETIME a, b, c, d;
86*3f1979aaSAndroid Build Coastguard Worker if (GetProcessTimes(GetCurrentProcess(), &a, &b, &c, &d) != 0)
87*3f1979aaSAndroid Build Coastguard Worker {
88*3f1979aaSAndroid Build Coastguard Worker // Returns total user time.
89*3f1979aaSAndroid Build Coastguard Worker // Can be tweaked to include kernel times as well.
90*3f1979aaSAndroid Build Coastguard Worker return
91*3f1979aaSAndroid Build Coastguard Worker (double)(d.dwLowDateTime |
92*3f1979aaSAndroid Build Coastguard Worker ((unsigned long long)d.dwHighDateTime << 32)) * 0.0000001;
93*3f1979aaSAndroid Build Coastguard Worker }
94*3f1979aaSAndroid Build Coastguard Worker else {
95*3f1979aaSAndroid Build Coastguard Worker // Handle error
96*3f1979aaSAndroid Build Coastguard Worker return 0;
97*3f1979aaSAndroid Build Coastguard Worker }
98*3f1979aaSAndroid Build Coastguard Worker }
99*3f1979aaSAndroid Build Coastguard Worker
100*3f1979aaSAndroid Build Coastguard Worker #else
uclock_sec(int find_start)101*3f1979aaSAndroid Build Coastguard Worker double uclock_sec(int find_start)
102*3f1979aaSAndroid Build Coastguard Worker { return (double)clock()/(double)CLOCKS_PER_SEC; }
103*3f1979aaSAndroid Build Coastguard Worker #endif
104*3f1979aaSAndroid Build Coastguard Worker
105*3f1979aaSAndroid Build Coastguard Worker
save(complexf * d,int B,int N,const char * fn)106*3f1979aaSAndroid Build Coastguard Worker void save(complexf * d, int B, int N, const char * fn)
107*3f1979aaSAndroid Build Coastguard Worker {
108*3f1979aaSAndroid Build Coastguard Worker if (!fn || !fn[0])
109*3f1979aaSAndroid Build Coastguard Worker {
110*3f1979aaSAndroid Build Coastguard Worker if (! SAVE_BY_DEFAULT)
111*3f1979aaSAndroid Build Coastguard Worker return;
112*3f1979aaSAndroid Build Coastguard Worker fn = "/dev/shm/bench.bin";
113*3f1979aaSAndroid Build Coastguard Worker }
114*3f1979aaSAndroid Build Coastguard Worker FILE * f = fopen(fn, "wb");
115*3f1979aaSAndroid Build Coastguard Worker if (!f) {
116*3f1979aaSAndroid Build Coastguard Worker fprintf(stderr, "error writing result to %s\n", fn);
117*3f1979aaSAndroid Build Coastguard Worker return;
118*3f1979aaSAndroid Build Coastguard Worker }
119*3f1979aaSAndroid Build Coastguard Worker if ( N >= SAVE_LIMIT_MSPS * 1024 * 1024 )
120*3f1979aaSAndroid Build Coastguard Worker N = SAVE_LIMIT_MSPS * 1024 * 1024;
121*3f1979aaSAndroid Build Coastguard Worker for (int off = 0; off + B <= N; off += B)
122*3f1979aaSAndroid Build Coastguard Worker {
123*3f1979aaSAndroid Build Coastguard Worker fwrite(d+off, sizeof(complexf), B, f);
124*3f1979aaSAndroid Build Coastguard Worker }
125*3f1979aaSAndroid Build Coastguard Worker fclose(f);
126*3f1979aaSAndroid Build Coastguard Worker }
127*3f1979aaSAndroid Build Coastguard Worker
128*3f1979aaSAndroid Build Coastguard Worker
bench_shift_math_cc(int B,int N)129*3f1979aaSAndroid Build Coastguard Worker double bench_shift_math_cc(int B, int N) {
130*3f1979aaSAndroid Build Coastguard Worker double t0, t1, tstop, T, nI;
131*3f1979aaSAndroid Build Coastguard Worker int iter, off;
132*3f1979aaSAndroid Build Coastguard Worker float phase = 0.0F;
133*3f1979aaSAndroid Build Coastguard Worker complexf *input = (complexf *)malloc(N * sizeof(complexf));
134*3f1979aaSAndroid Build Coastguard Worker complexf *output = (complexf *)malloc(N * sizeof(complexf));
135*3f1979aaSAndroid Build Coastguard Worker shift_recursive_osc_t gen_state;
136*3f1979aaSAndroid Build Coastguard Worker shift_recursive_osc_conf_t gen_conf;
137*3f1979aaSAndroid Build Coastguard Worker
138*3f1979aaSAndroid Build Coastguard Worker shift_recursive_osc_init(0.001F, 0.0F, &gen_conf, &gen_state);
139*3f1979aaSAndroid Build Coastguard Worker gen_recursive_osc_c(input, N, &gen_conf, &gen_state);
140*3f1979aaSAndroid Build Coastguard Worker
141*3f1979aaSAndroid Build Coastguard Worker iter = 0;
142*3f1979aaSAndroid Build Coastguard Worker off = 0;
143*3f1979aaSAndroid Build Coastguard Worker t0 = uclock_sec(1);
144*3f1979aaSAndroid Build Coastguard Worker tstop = t0 + 0.5; /* benchmark duration: 500 ms */
145*3f1979aaSAndroid Build Coastguard Worker do {
146*3f1979aaSAndroid Build Coastguard Worker // work
147*3f1979aaSAndroid Build Coastguard Worker phase = shift_math_cc(input+off, output+off, B, -0.0009F, phase);
148*3f1979aaSAndroid Build Coastguard Worker off += B;
149*3f1979aaSAndroid Build Coastguard Worker ++iter;
150*3f1979aaSAndroid Build Coastguard Worker t1 = uclock_sec(0);
151*3f1979aaSAndroid Build Coastguard Worker } while ( t1 < tstop && off + B < N );
152*3f1979aaSAndroid Build Coastguard Worker
153*3f1979aaSAndroid Build Coastguard Worker save(output, B, off, BENCH_FILE_SHIFT_MATH_CC);
154*3f1979aaSAndroid Build Coastguard Worker
155*3f1979aaSAndroid Build Coastguard Worker free(input);
156*3f1979aaSAndroid Build Coastguard Worker free(output);
157*3f1979aaSAndroid Build Coastguard Worker T = ( t1 - t0 ); /* duration per fft() */
158*3f1979aaSAndroid Build Coastguard Worker printf("processed %f Msamples in %f ms\n", off * 1E-6, T*1E3);
159*3f1979aaSAndroid Build Coastguard Worker nI = ((double)iter) * B; /* number of iterations "normalized" to O(N) = N */
160*3f1979aaSAndroid Build Coastguard Worker return (nI / T); /* normalized iterations per second */
161*3f1979aaSAndroid Build Coastguard Worker }
162*3f1979aaSAndroid Build Coastguard Worker
163*3f1979aaSAndroid Build Coastguard Worker
bench_shift_table_cc(int B,int N)164*3f1979aaSAndroid Build Coastguard Worker double bench_shift_table_cc(int B, int N) {
165*3f1979aaSAndroid Build Coastguard Worker double t0, t1, tstop, T, nI;
166*3f1979aaSAndroid Build Coastguard Worker int iter, off;
167*3f1979aaSAndroid Build Coastguard Worker int table_size=65536;
168*3f1979aaSAndroid Build Coastguard Worker float phase = 0.0F;
169*3f1979aaSAndroid Build Coastguard Worker complexf *input = (complexf *)malloc(N * sizeof(complexf));
170*3f1979aaSAndroid Build Coastguard Worker complexf *output = (complexf *)malloc(N * sizeof(complexf));
171*3f1979aaSAndroid Build Coastguard Worker shift_recursive_osc_t gen_state;
172*3f1979aaSAndroid Build Coastguard Worker shift_recursive_osc_conf_t gen_conf;
173*3f1979aaSAndroid Build Coastguard Worker
174*3f1979aaSAndroid Build Coastguard Worker shift_table_data_t table_data = shift_table_init(table_size);
175*3f1979aaSAndroid Build Coastguard Worker
176*3f1979aaSAndroid Build Coastguard Worker shift_recursive_osc_init(0.001F, 0.0F, &gen_conf, &gen_state);
177*3f1979aaSAndroid Build Coastguard Worker gen_recursive_osc_c(input, N, &gen_conf, &gen_state);
178*3f1979aaSAndroid Build Coastguard Worker
179*3f1979aaSAndroid Build Coastguard Worker iter = 0;
180*3f1979aaSAndroid Build Coastguard Worker off = 0;
181*3f1979aaSAndroid Build Coastguard Worker t0 = uclock_sec(1);
182*3f1979aaSAndroid Build Coastguard Worker tstop = t0 + 0.5; /* benchmark duration: 500 ms */
183*3f1979aaSAndroid Build Coastguard Worker do {
184*3f1979aaSAndroid Build Coastguard Worker // work
185*3f1979aaSAndroid Build Coastguard Worker phase = shift_table_cc(input+off, output+off, B, -0.0009F, table_data, phase);
186*3f1979aaSAndroid Build Coastguard Worker
187*3f1979aaSAndroid Build Coastguard Worker off += B;
188*3f1979aaSAndroid Build Coastguard Worker ++iter;
189*3f1979aaSAndroid Build Coastguard Worker t1 = uclock_sec(0);
190*3f1979aaSAndroid Build Coastguard Worker } while ( t1 < tstop && off + B < N );
191*3f1979aaSAndroid Build Coastguard Worker
192*3f1979aaSAndroid Build Coastguard Worker save(output, B, off, NULL);
193*3f1979aaSAndroid Build Coastguard Worker free(input);
194*3f1979aaSAndroid Build Coastguard Worker free(output);
195*3f1979aaSAndroid Build Coastguard Worker T = ( t1 - t0 ); /* duration per fft() */
196*3f1979aaSAndroid Build Coastguard Worker printf("processed %f Msamples in %f ms\n", off * 1E-6, T*1E3);
197*3f1979aaSAndroid Build Coastguard Worker nI = ((double)iter) * B; /* number of iterations "normalized" to O(N) = N */
198*3f1979aaSAndroid Build Coastguard Worker return (nI / T); /* normalized iterations per second */
199*3f1979aaSAndroid Build Coastguard Worker }
200*3f1979aaSAndroid Build Coastguard Worker
201*3f1979aaSAndroid Build Coastguard Worker
bench_shift_addfast(int B,int N)202*3f1979aaSAndroid Build Coastguard Worker double bench_shift_addfast(int B, int N) {
203*3f1979aaSAndroid Build Coastguard Worker double t0, t1, tstop, T, nI;
204*3f1979aaSAndroid Build Coastguard Worker int iter, off;
205*3f1979aaSAndroid Build Coastguard Worker float phase = 0.0F;
206*3f1979aaSAndroid Build Coastguard Worker complexf *input = (complexf *)malloc(N * sizeof(complexf));
207*3f1979aaSAndroid Build Coastguard Worker complexf *output = (complexf *)malloc(N * sizeof(complexf));
208*3f1979aaSAndroid Build Coastguard Worker shift_recursive_osc_t gen_state;
209*3f1979aaSAndroid Build Coastguard Worker shift_recursive_osc_conf_t gen_conf;
210*3f1979aaSAndroid Build Coastguard Worker shift_addfast_data_t state = shift_addfast_init(-0.0009F);
211*3f1979aaSAndroid Build Coastguard Worker
212*3f1979aaSAndroid Build Coastguard Worker shift_recursive_osc_init(0.001F, 0.0F, &gen_conf, &gen_state);
213*3f1979aaSAndroid Build Coastguard Worker gen_recursive_osc_c(input, N, &gen_conf, &gen_state);
214*3f1979aaSAndroid Build Coastguard Worker
215*3f1979aaSAndroid Build Coastguard Worker iter = 0;
216*3f1979aaSAndroid Build Coastguard Worker off = 0;
217*3f1979aaSAndroid Build Coastguard Worker t0 = uclock_sec(1);
218*3f1979aaSAndroid Build Coastguard Worker tstop = t0 + 0.5; /* benchmark duration: 500 ms */
219*3f1979aaSAndroid Build Coastguard Worker do {
220*3f1979aaSAndroid Build Coastguard Worker // work
221*3f1979aaSAndroid Build Coastguard Worker phase = shift_addfast_cc(input+off, output+off, B, &state, phase);
222*3f1979aaSAndroid Build Coastguard Worker
223*3f1979aaSAndroid Build Coastguard Worker off += B;
224*3f1979aaSAndroid Build Coastguard Worker ++iter;
225*3f1979aaSAndroid Build Coastguard Worker t1 = uclock_sec(0);
226*3f1979aaSAndroid Build Coastguard Worker } while ( t1 < tstop && off + B < N );
227*3f1979aaSAndroid Build Coastguard Worker
228*3f1979aaSAndroid Build Coastguard Worker save(output, B, off, BENCH_FILE_ADD_FAST_CC);
229*3f1979aaSAndroid Build Coastguard Worker
230*3f1979aaSAndroid Build Coastguard Worker free(input);
231*3f1979aaSAndroid Build Coastguard Worker free(output);
232*3f1979aaSAndroid Build Coastguard Worker T = ( t1 - t0 ); /* duration per fft() */
233*3f1979aaSAndroid Build Coastguard Worker printf("processed %f Msamples in %f ms\n", off * 1E-6, T*1E3);
234*3f1979aaSAndroid Build Coastguard Worker nI = ((double)iter) * B; /* number of iterations "normalized" to O(N) = N */
235*3f1979aaSAndroid Build Coastguard Worker return (nI / T); /* normalized iterations per second */
236*3f1979aaSAndroid Build Coastguard Worker }
237*3f1979aaSAndroid Build Coastguard Worker
bench_shift_addfast_inp(int B,int N)238*3f1979aaSAndroid Build Coastguard Worker double bench_shift_addfast_inp(int B, int N) {
239*3f1979aaSAndroid Build Coastguard Worker double t0, t1, tstop, T, nI;
240*3f1979aaSAndroid Build Coastguard Worker int iter, off;
241*3f1979aaSAndroid Build Coastguard Worker float phase = 0.0F;
242*3f1979aaSAndroid Build Coastguard Worker complexf *input = (complexf *)malloc(N * sizeof(complexf));
243*3f1979aaSAndroid Build Coastguard Worker shift_recursive_osc_t gen_state;
244*3f1979aaSAndroid Build Coastguard Worker shift_recursive_osc_conf_t gen_conf;
245*3f1979aaSAndroid Build Coastguard Worker shift_addfast_data_t state = shift_addfast_init(-0.0009F);
246*3f1979aaSAndroid Build Coastguard Worker
247*3f1979aaSAndroid Build Coastguard Worker shift_recursive_osc_init(0.001F, 0.0F, &gen_conf, &gen_state);
248*3f1979aaSAndroid Build Coastguard Worker gen_recursive_osc_c(input, N, &gen_conf, &gen_state);
249*3f1979aaSAndroid Build Coastguard Worker
250*3f1979aaSAndroid Build Coastguard Worker iter = 0;
251*3f1979aaSAndroid Build Coastguard Worker off = 0;
252*3f1979aaSAndroid Build Coastguard Worker t0 = uclock_sec(1);
253*3f1979aaSAndroid Build Coastguard Worker tstop = t0 + 0.5; /* benchmark duration: 500 ms */
254*3f1979aaSAndroid Build Coastguard Worker do {
255*3f1979aaSAndroid Build Coastguard Worker // work
256*3f1979aaSAndroid Build Coastguard Worker phase = shift_addfast_inp_c(input+off, B, &state, phase);
257*3f1979aaSAndroid Build Coastguard Worker
258*3f1979aaSAndroid Build Coastguard Worker off += B;
259*3f1979aaSAndroid Build Coastguard Worker ++iter;
260*3f1979aaSAndroid Build Coastguard Worker t1 = uclock_sec(0);
261*3f1979aaSAndroid Build Coastguard Worker } while ( t1 < tstop && off + B < N );
262*3f1979aaSAndroid Build Coastguard Worker
263*3f1979aaSAndroid Build Coastguard Worker save(input, B, off, BENCH_FILE_ADD_FAST_INP_C);
264*3f1979aaSAndroid Build Coastguard Worker
265*3f1979aaSAndroid Build Coastguard Worker free(input);
266*3f1979aaSAndroid Build Coastguard Worker T = ( t1 - t0 ); /* duration per fft() */
267*3f1979aaSAndroid Build Coastguard Worker printf("processed %f Msamples in %f ms\n", off * 1E-6, T*1E3);
268*3f1979aaSAndroid Build Coastguard Worker nI = ((double)iter) * B; /* number of iterations "normalized" to O(N) = N */
269*3f1979aaSAndroid Build Coastguard Worker return (nI / T); /* normalized iterations per second */
270*3f1979aaSAndroid Build Coastguard Worker }
271*3f1979aaSAndroid Build Coastguard Worker
272*3f1979aaSAndroid Build Coastguard Worker
bench_shift_unroll_oop(int B,int N)273*3f1979aaSAndroid Build Coastguard Worker double bench_shift_unroll_oop(int B, int N) {
274*3f1979aaSAndroid Build Coastguard Worker double t0, t1, tstop, T, nI;
275*3f1979aaSAndroid Build Coastguard Worker int iter, off;
276*3f1979aaSAndroid Build Coastguard Worker float phase = 0.0F;
277*3f1979aaSAndroid Build Coastguard Worker complexf *input = (complexf *)malloc(N * sizeof(complexf));
278*3f1979aaSAndroid Build Coastguard Worker complexf *output = (complexf *)malloc(N * sizeof(complexf));
279*3f1979aaSAndroid Build Coastguard Worker shift_recursive_osc_t gen_state;
280*3f1979aaSAndroid Build Coastguard Worker shift_recursive_osc_conf_t gen_conf;
281*3f1979aaSAndroid Build Coastguard Worker shift_unroll_data_t state = shift_unroll_init(-0.0009F, B);
282*3f1979aaSAndroid Build Coastguard Worker
283*3f1979aaSAndroid Build Coastguard Worker shift_recursive_osc_init(0.001F, 0.0F, &gen_conf, &gen_state);
284*3f1979aaSAndroid Build Coastguard Worker gen_recursive_osc_c(input, N, &gen_conf, &gen_state);
285*3f1979aaSAndroid Build Coastguard Worker
286*3f1979aaSAndroid Build Coastguard Worker iter = 0;
287*3f1979aaSAndroid Build Coastguard Worker off = 0;
288*3f1979aaSAndroid Build Coastguard Worker t0 = uclock_sec(1);
289*3f1979aaSAndroid Build Coastguard Worker tstop = t0 + 0.5; /* benchmark duration: 500 ms */
290*3f1979aaSAndroid Build Coastguard Worker do {
291*3f1979aaSAndroid Build Coastguard Worker // work
292*3f1979aaSAndroid Build Coastguard Worker phase = shift_unroll_cc(input+off, output+off, B, &state, phase);
293*3f1979aaSAndroid Build Coastguard Worker
294*3f1979aaSAndroid Build Coastguard Worker off += B;
295*3f1979aaSAndroid Build Coastguard Worker ++iter;
296*3f1979aaSAndroid Build Coastguard Worker t1 = uclock_sec(0);
297*3f1979aaSAndroid Build Coastguard Worker } while ( t1 < tstop && off + B < N );
298*3f1979aaSAndroid Build Coastguard Worker
299*3f1979aaSAndroid Build Coastguard Worker save(output, B, off, NULL);
300*3f1979aaSAndroid Build Coastguard Worker free(input);
301*3f1979aaSAndroid Build Coastguard Worker free(output);
302*3f1979aaSAndroid Build Coastguard Worker T = ( t1 - t0 ); /* duration per fft() */
303*3f1979aaSAndroid Build Coastguard Worker printf("processed %f Msamples in %f ms\n", off * 1E-6, T*1E3);
304*3f1979aaSAndroid Build Coastguard Worker nI = ((double)iter) * B; /* number of iterations "normalized" to O(N) = N */
305*3f1979aaSAndroid Build Coastguard Worker return (nI / T); /* normalized iterations per second */
306*3f1979aaSAndroid Build Coastguard Worker }
307*3f1979aaSAndroid Build Coastguard Worker
bench_shift_unroll_inp(int B,int N)308*3f1979aaSAndroid Build Coastguard Worker double bench_shift_unroll_inp(int B, int N) {
309*3f1979aaSAndroid Build Coastguard Worker double t0, t1, tstop, T, nI;
310*3f1979aaSAndroid Build Coastguard Worker int iter, off;
311*3f1979aaSAndroid Build Coastguard Worker float phase = 0.0F;
312*3f1979aaSAndroid Build Coastguard Worker complexf *input = (complexf *)malloc(N * sizeof(complexf));
313*3f1979aaSAndroid Build Coastguard Worker shift_recursive_osc_t gen_state;
314*3f1979aaSAndroid Build Coastguard Worker shift_recursive_osc_conf_t gen_conf;
315*3f1979aaSAndroid Build Coastguard Worker shift_unroll_data_t state = shift_unroll_init(-0.0009F, B);
316*3f1979aaSAndroid Build Coastguard Worker
317*3f1979aaSAndroid Build Coastguard Worker shift_recursive_osc_init(0.001F, 0.0F, &gen_conf, &gen_state);
318*3f1979aaSAndroid Build Coastguard Worker gen_recursive_osc_c(input, N, &gen_conf, &gen_state);
319*3f1979aaSAndroid Build Coastguard Worker
320*3f1979aaSAndroid Build Coastguard Worker iter = 0;
321*3f1979aaSAndroid Build Coastguard Worker off = 0;
322*3f1979aaSAndroid Build Coastguard Worker t0 = uclock_sec(1);
323*3f1979aaSAndroid Build Coastguard Worker tstop = t0 + 0.5; /* benchmark duration: 500 ms */
324*3f1979aaSAndroid Build Coastguard Worker do {
325*3f1979aaSAndroid Build Coastguard Worker // work
326*3f1979aaSAndroid Build Coastguard Worker phase = shift_unroll_inp_c(input+off, B, &state, phase);
327*3f1979aaSAndroid Build Coastguard Worker
328*3f1979aaSAndroid Build Coastguard Worker off += B;
329*3f1979aaSAndroid Build Coastguard Worker ++iter;
330*3f1979aaSAndroid Build Coastguard Worker t1 = uclock_sec(0);
331*3f1979aaSAndroid Build Coastguard Worker } while ( t1 < tstop && off + B < N );
332*3f1979aaSAndroid Build Coastguard Worker
333*3f1979aaSAndroid Build Coastguard Worker save(input, B, off, BENCH_FILE_UNROLL_INP_C);
334*3f1979aaSAndroid Build Coastguard Worker
335*3f1979aaSAndroid Build Coastguard Worker free(input);
336*3f1979aaSAndroid Build Coastguard Worker T = ( t1 - t0 ); /* duration per fft() */
337*3f1979aaSAndroid Build Coastguard Worker printf("processed %f Msamples in %f ms\n", off * 1E-6, T*1E3);
338*3f1979aaSAndroid Build Coastguard Worker nI = ((double)iter) * B; /* number of iterations "normalized" to O(N) = N */
339*3f1979aaSAndroid Build Coastguard Worker return (nI / T); /* normalized iterations per second */
340*3f1979aaSAndroid Build Coastguard Worker }
341*3f1979aaSAndroid Build Coastguard Worker
342*3f1979aaSAndroid Build Coastguard Worker
343*3f1979aaSAndroid Build Coastguard Worker
bench_shift_limited_unroll_oop(int B,int N)344*3f1979aaSAndroid Build Coastguard Worker double bench_shift_limited_unroll_oop(int B, int N) {
345*3f1979aaSAndroid Build Coastguard Worker double t0, t1, tstop, T, nI;
346*3f1979aaSAndroid Build Coastguard Worker int iter, off;
347*3f1979aaSAndroid Build Coastguard Worker complexf *input = (complexf *)malloc(N * sizeof(complexf));
348*3f1979aaSAndroid Build Coastguard Worker complexf *output = (complexf *)malloc(N * sizeof(complexf));
349*3f1979aaSAndroid Build Coastguard Worker shift_recursive_osc_t gen_state;
350*3f1979aaSAndroid Build Coastguard Worker shift_recursive_osc_conf_t gen_conf;
351*3f1979aaSAndroid Build Coastguard Worker shift_limited_unroll_data_t state = shift_limited_unroll_init(-0.0009F);
352*3f1979aaSAndroid Build Coastguard Worker
353*3f1979aaSAndroid Build Coastguard Worker shift_recursive_osc_init(0.001F, 0.0F, &gen_conf, &gen_state);
354*3f1979aaSAndroid Build Coastguard Worker gen_recursive_osc_c(input, N, &gen_conf, &gen_state);
355*3f1979aaSAndroid Build Coastguard Worker
356*3f1979aaSAndroid Build Coastguard Worker iter = 0;
357*3f1979aaSAndroid Build Coastguard Worker off = 0;
358*3f1979aaSAndroid Build Coastguard Worker t0 = uclock_sec(1);
359*3f1979aaSAndroid Build Coastguard Worker tstop = t0 + 0.5; /* benchmark duration: 500 ms */
360*3f1979aaSAndroid Build Coastguard Worker do {
361*3f1979aaSAndroid Build Coastguard Worker // work
362*3f1979aaSAndroid Build Coastguard Worker shift_limited_unroll_cc(input+off, output+off, B, &state);
363*3f1979aaSAndroid Build Coastguard Worker
364*3f1979aaSAndroid Build Coastguard Worker off += B;
365*3f1979aaSAndroid Build Coastguard Worker ++iter;
366*3f1979aaSAndroid Build Coastguard Worker t1 = uclock_sec(0);
367*3f1979aaSAndroid Build Coastguard Worker } while ( t1 < tstop && off + B < N );
368*3f1979aaSAndroid Build Coastguard Worker
369*3f1979aaSAndroid Build Coastguard Worker save(output, B, off, NULL);
370*3f1979aaSAndroid Build Coastguard Worker free(input);
371*3f1979aaSAndroid Build Coastguard Worker free(output);
372*3f1979aaSAndroid Build Coastguard Worker T = ( t1 - t0 ); /* duration per fft() */
373*3f1979aaSAndroid Build Coastguard Worker printf("processed %f Msamples in %f ms\n", off * 1E-6, T*1E3);
374*3f1979aaSAndroid Build Coastguard Worker nI = ((double)iter) * B; /* number of iterations "normalized" to O(N) = N */
375*3f1979aaSAndroid Build Coastguard Worker return (nI / T); /* normalized iterations per second */
376*3f1979aaSAndroid Build Coastguard Worker }
377*3f1979aaSAndroid Build Coastguard Worker
378*3f1979aaSAndroid Build Coastguard Worker
bench_shift_limited_unroll_inp(int B,int N)379*3f1979aaSAndroid Build Coastguard Worker double bench_shift_limited_unroll_inp(int B, int N) {
380*3f1979aaSAndroid Build Coastguard Worker double t0, t1, tstop, T, nI;
381*3f1979aaSAndroid Build Coastguard Worker int iter, off;
382*3f1979aaSAndroid Build Coastguard Worker complexf *input = (complexf *)malloc(N * sizeof(complexf));
383*3f1979aaSAndroid Build Coastguard Worker shift_recursive_osc_t gen_state;
384*3f1979aaSAndroid Build Coastguard Worker shift_recursive_osc_conf_t gen_conf;
385*3f1979aaSAndroid Build Coastguard Worker shift_limited_unroll_data_t state = shift_limited_unroll_init(-0.0009F);
386*3f1979aaSAndroid Build Coastguard Worker
387*3f1979aaSAndroid Build Coastguard Worker shift_recursive_osc_init(0.001F, 0.0F, &gen_conf, &gen_state);
388*3f1979aaSAndroid Build Coastguard Worker gen_recursive_osc_c(input, N, &gen_conf, &gen_state);
389*3f1979aaSAndroid Build Coastguard Worker
390*3f1979aaSAndroid Build Coastguard Worker iter = 0;
391*3f1979aaSAndroid Build Coastguard Worker off = 0;
392*3f1979aaSAndroid Build Coastguard Worker t0 = uclock_sec(1);
393*3f1979aaSAndroid Build Coastguard Worker tstop = t0 + 0.5; /* benchmark duration: 500 ms */
394*3f1979aaSAndroid Build Coastguard Worker do {
395*3f1979aaSAndroid Build Coastguard Worker // work
396*3f1979aaSAndroid Build Coastguard Worker shift_limited_unroll_inp_c(input+off, B, &state);
397*3f1979aaSAndroid Build Coastguard Worker
398*3f1979aaSAndroid Build Coastguard Worker off += B;
399*3f1979aaSAndroid Build Coastguard Worker ++iter;
400*3f1979aaSAndroid Build Coastguard Worker t1 = uclock_sec(0);
401*3f1979aaSAndroid Build Coastguard Worker } while ( t1 < tstop && off + B < N );
402*3f1979aaSAndroid Build Coastguard Worker
403*3f1979aaSAndroid Build Coastguard Worker save(input, B, off, BENCH_FILE_LTD_UNROLL_INP_C);
404*3f1979aaSAndroid Build Coastguard Worker
405*3f1979aaSAndroid Build Coastguard Worker free(input);
406*3f1979aaSAndroid Build Coastguard Worker T = ( t1 - t0 ); /* duration per fft() */
407*3f1979aaSAndroid Build Coastguard Worker printf("processed %f Msamples in %f ms\n", off * 1E-6, T*1E3);
408*3f1979aaSAndroid Build Coastguard Worker nI = ((double)iter) * B; /* number of iterations "normalized" to O(N) = N */
409*3f1979aaSAndroid Build Coastguard Worker return (nI / T); /* normalized iterations per second */
410*3f1979aaSAndroid Build Coastguard Worker }
411*3f1979aaSAndroid Build Coastguard Worker
412*3f1979aaSAndroid Build Coastguard Worker
bench_shift_limited_unroll_A_sse_inp(int B,int N)413*3f1979aaSAndroid Build Coastguard Worker double bench_shift_limited_unroll_A_sse_inp(int B, int N) {
414*3f1979aaSAndroid Build Coastguard Worker double t0, t1, tstop, T, nI;
415*3f1979aaSAndroid Build Coastguard Worker int iter, off;
416*3f1979aaSAndroid Build Coastguard Worker complexf *input = (complexf *)malloc(N * sizeof(complexf));
417*3f1979aaSAndroid Build Coastguard Worker shift_recursive_osc_t gen_state;
418*3f1979aaSAndroid Build Coastguard Worker shift_recursive_osc_conf_t gen_conf;
419*3f1979aaSAndroid Build Coastguard Worker shift_limited_unroll_A_sse_data_t *state = malloc(sizeof(shift_limited_unroll_A_sse_data_t));
420*3f1979aaSAndroid Build Coastguard Worker
421*3f1979aaSAndroid Build Coastguard Worker *state = shift_limited_unroll_A_sse_init(-0.0009F, 0.0F);
422*3f1979aaSAndroid Build Coastguard Worker
423*3f1979aaSAndroid Build Coastguard Worker shift_recursive_osc_init(0.001F, 0.0F, &gen_conf, &gen_state);
424*3f1979aaSAndroid Build Coastguard Worker gen_recursive_osc_c(input, N, &gen_conf, &gen_state);
425*3f1979aaSAndroid Build Coastguard Worker
426*3f1979aaSAndroid Build Coastguard Worker iter = 0;
427*3f1979aaSAndroid Build Coastguard Worker off = 0;
428*3f1979aaSAndroid Build Coastguard Worker t0 = uclock_sec(1);
429*3f1979aaSAndroid Build Coastguard Worker tstop = t0 + 0.5; /* benchmark duration: 500 ms */
430*3f1979aaSAndroid Build Coastguard Worker do {
431*3f1979aaSAndroid Build Coastguard Worker // work
432*3f1979aaSAndroid Build Coastguard Worker shift_limited_unroll_A_sse_inp_c(input+off, B, state);
433*3f1979aaSAndroid Build Coastguard Worker
434*3f1979aaSAndroid Build Coastguard Worker off += B;
435*3f1979aaSAndroid Build Coastguard Worker ++iter;
436*3f1979aaSAndroid Build Coastguard Worker t1 = uclock_sec(0);
437*3f1979aaSAndroid Build Coastguard Worker } while ( t1 < tstop && off + B < N );
438*3f1979aaSAndroid Build Coastguard Worker
439*3f1979aaSAndroid Build Coastguard Worker save(input, B, off, BENCH_FILE_LTD_UNROLL_A_SSE_INP_C);
440*3f1979aaSAndroid Build Coastguard Worker
441*3f1979aaSAndroid Build Coastguard Worker free(input);
442*3f1979aaSAndroid Build Coastguard Worker T = ( t1 - t0 ); /* duration per fft() */
443*3f1979aaSAndroid Build Coastguard Worker printf("processed %f Msamples in %f ms\n", off * 1E-6, T*1E3);
444*3f1979aaSAndroid Build Coastguard Worker nI = ((double)iter) * B; /* number of iterations "normalized" to O(N) = N */
445*3f1979aaSAndroid Build Coastguard Worker return (nI / T); /* normalized iterations per second */
446*3f1979aaSAndroid Build Coastguard Worker }
447*3f1979aaSAndroid Build Coastguard Worker
bench_shift_limited_unroll_B_sse_inp(int B,int N)448*3f1979aaSAndroid Build Coastguard Worker double bench_shift_limited_unroll_B_sse_inp(int B, int N) {
449*3f1979aaSAndroid Build Coastguard Worker double t0, t1, tstop, T, nI;
450*3f1979aaSAndroid Build Coastguard Worker int iter, off;
451*3f1979aaSAndroid Build Coastguard Worker complexf *input = (complexf *)malloc(N * sizeof(complexf));
452*3f1979aaSAndroid Build Coastguard Worker shift_recursive_osc_t gen_state;
453*3f1979aaSAndroid Build Coastguard Worker shift_recursive_osc_conf_t gen_conf;
454*3f1979aaSAndroid Build Coastguard Worker shift_limited_unroll_B_sse_data_t *state = malloc(sizeof(shift_limited_unroll_B_sse_data_t));
455*3f1979aaSAndroid Build Coastguard Worker
456*3f1979aaSAndroid Build Coastguard Worker *state = shift_limited_unroll_B_sse_init(-0.0009F, 0.0F);
457*3f1979aaSAndroid Build Coastguard Worker
458*3f1979aaSAndroid Build Coastguard Worker shift_recursive_osc_init(0.001F, 0.0F, &gen_conf, &gen_state);
459*3f1979aaSAndroid Build Coastguard Worker //shift_recursive_osc_init(0.0F, 0.0F, &gen_conf, &gen_state);
460*3f1979aaSAndroid Build Coastguard Worker gen_recursive_osc_c(input, N, &gen_conf, &gen_state);
461*3f1979aaSAndroid Build Coastguard Worker
462*3f1979aaSAndroid Build Coastguard Worker iter = 0;
463*3f1979aaSAndroid Build Coastguard Worker off = 0;
464*3f1979aaSAndroid Build Coastguard Worker t0 = uclock_sec(1);
465*3f1979aaSAndroid Build Coastguard Worker tstop = t0 + 0.5; /* benchmark duration: 500 ms */
466*3f1979aaSAndroid Build Coastguard Worker do {
467*3f1979aaSAndroid Build Coastguard Worker // work
468*3f1979aaSAndroid Build Coastguard Worker shift_limited_unroll_B_sse_inp_c(input+off, B, state);
469*3f1979aaSAndroid Build Coastguard Worker
470*3f1979aaSAndroid Build Coastguard Worker off += B;
471*3f1979aaSAndroid Build Coastguard Worker ++iter;
472*3f1979aaSAndroid Build Coastguard Worker t1 = uclock_sec(0);
473*3f1979aaSAndroid Build Coastguard Worker } while ( t1 < tstop && off + B < N );
474*3f1979aaSAndroid Build Coastguard Worker
475*3f1979aaSAndroid Build Coastguard Worker save(input, B, off, BENCH_FILE_LTD_UNROLL_B_SSE_INP_C);
476*3f1979aaSAndroid Build Coastguard Worker
477*3f1979aaSAndroid Build Coastguard Worker free(input);
478*3f1979aaSAndroid Build Coastguard Worker T = ( t1 - t0 ); /* duration per fft() */
479*3f1979aaSAndroid Build Coastguard Worker printf("processed %f Msamples in %f ms\n", off * 1E-6, T*1E3);
480*3f1979aaSAndroid Build Coastguard Worker nI = ((double)iter) * B; /* number of iterations "normalized" to O(N) = N */
481*3f1979aaSAndroid Build Coastguard Worker return (nI / T); /* normalized iterations per second */
482*3f1979aaSAndroid Build Coastguard Worker }
483*3f1979aaSAndroid Build Coastguard Worker
bench_shift_limited_unroll_C_sse_inp(int B,int N)484*3f1979aaSAndroid Build Coastguard Worker double bench_shift_limited_unroll_C_sse_inp(int B, int N) {
485*3f1979aaSAndroid Build Coastguard Worker double t0, t1, tstop, T, nI;
486*3f1979aaSAndroid Build Coastguard Worker int iter, off;
487*3f1979aaSAndroid Build Coastguard Worker complexf *input = (complexf *)malloc(N * sizeof(complexf));
488*3f1979aaSAndroid Build Coastguard Worker shift_recursive_osc_t gen_state;
489*3f1979aaSAndroid Build Coastguard Worker shift_recursive_osc_conf_t gen_conf;
490*3f1979aaSAndroid Build Coastguard Worker shift_limited_unroll_C_sse_data_t *state = malloc(sizeof(shift_limited_unroll_C_sse_data_t));
491*3f1979aaSAndroid Build Coastguard Worker
492*3f1979aaSAndroid Build Coastguard Worker *state = shift_limited_unroll_C_sse_init(-0.0009F, 0.0F);
493*3f1979aaSAndroid Build Coastguard Worker
494*3f1979aaSAndroid Build Coastguard Worker shift_recursive_osc_init(0.001F, 0.0F, &gen_conf, &gen_state);
495*3f1979aaSAndroid Build Coastguard Worker gen_recursive_osc_c(input, N, &gen_conf, &gen_state);
496*3f1979aaSAndroid Build Coastguard Worker
497*3f1979aaSAndroid Build Coastguard Worker iter = 0;
498*3f1979aaSAndroid Build Coastguard Worker off = 0;
499*3f1979aaSAndroid Build Coastguard Worker t0 = uclock_sec(1);
500*3f1979aaSAndroid Build Coastguard Worker tstop = t0 + 0.5; /* benchmark duration: 500 ms */
501*3f1979aaSAndroid Build Coastguard Worker do {
502*3f1979aaSAndroid Build Coastguard Worker // work
503*3f1979aaSAndroid Build Coastguard Worker shift_limited_unroll_C_sse_inp_c(input+off, B, state);
504*3f1979aaSAndroid Build Coastguard Worker
505*3f1979aaSAndroid Build Coastguard Worker off += B;
506*3f1979aaSAndroid Build Coastguard Worker ++iter;
507*3f1979aaSAndroid Build Coastguard Worker t1 = uclock_sec(0);
508*3f1979aaSAndroid Build Coastguard Worker } while ( t1 < tstop && off + B < N );
509*3f1979aaSAndroid Build Coastguard Worker
510*3f1979aaSAndroid Build Coastguard Worker save(input, B, off, BENCH_FILE_LTD_UNROLL_C_SSE_INP_C);
511*3f1979aaSAndroid Build Coastguard Worker
512*3f1979aaSAndroid Build Coastguard Worker free(input);
513*3f1979aaSAndroid Build Coastguard Worker T = ( t1 - t0 ); /* duration per fft() */
514*3f1979aaSAndroid Build Coastguard Worker printf("processed %f Msamples in %f ms\n", off * 1E-6, T*1E3);
515*3f1979aaSAndroid Build Coastguard Worker nI = ((double)iter) * B; /* number of iterations "normalized" to O(N) = N */
516*3f1979aaSAndroid Build Coastguard Worker return (nI / T); /* normalized iterations per second */
517*3f1979aaSAndroid Build Coastguard Worker }
518*3f1979aaSAndroid Build Coastguard Worker
519*3f1979aaSAndroid Build Coastguard Worker
bench_shift_rec_osc_cc_oop(int B,int N)520*3f1979aaSAndroid Build Coastguard Worker double bench_shift_rec_osc_cc_oop(int B, int N) {
521*3f1979aaSAndroid Build Coastguard Worker double t0, t1, tstop, T, nI;
522*3f1979aaSAndroid Build Coastguard Worker int iter, off;
523*3f1979aaSAndroid Build Coastguard Worker float phase = 0.0F;
524*3f1979aaSAndroid Build Coastguard Worker complexf *input = (complexf *)malloc(N * sizeof(complexf));
525*3f1979aaSAndroid Build Coastguard Worker complexf *output = (complexf *)malloc(N * sizeof(complexf));
526*3f1979aaSAndroid Build Coastguard Worker shift_recursive_osc_t gen_state, shift_state;
527*3f1979aaSAndroid Build Coastguard Worker shift_recursive_osc_conf_t gen_conf, shift_conf;
528*3f1979aaSAndroid Build Coastguard Worker
529*3f1979aaSAndroid Build Coastguard Worker shift_recursive_osc_init(-0.0009F, 0.0F, &shift_conf, &shift_state);
530*3f1979aaSAndroid Build Coastguard Worker shift_recursive_osc_init(0.001F, 0.0F, &gen_conf, &gen_state);
531*3f1979aaSAndroid Build Coastguard Worker gen_recursive_osc_c(input, N, &gen_conf, &gen_state);
532*3f1979aaSAndroid Build Coastguard Worker
533*3f1979aaSAndroid Build Coastguard Worker iter = 0;
534*3f1979aaSAndroid Build Coastguard Worker off = 0;
535*3f1979aaSAndroid Build Coastguard Worker t0 = uclock_sec(1);
536*3f1979aaSAndroid Build Coastguard Worker tstop = t0 + 0.5; /* benchmark duration: 500 ms */
537*3f1979aaSAndroid Build Coastguard Worker do {
538*3f1979aaSAndroid Build Coastguard Worker // work
539*3f1979aaSAndroid Build Coastguard Worker shift_recursive_osc_cc(input+off, output+off, B, &shift_conf, &shift_state);
540*3f1979aaSAndroid Build Coastguard Worker
541*3f1979aaSAndroid Build Coastguard Worker off += B;
542*3f1979aaSAndroid Build Coastguard Worker ++iter;
543*3f1979aaSAndroid Build Coastguard Worker t1 = uclock_sec(0);
544*3f1979aaSAndroid Build Coastguard Worker } while ( t1 < tstop && off + B < N );
545*3f1979aaSAndroid Build Coastguard Worker
546*3f1979aaSAndroid Build Coastguard Worker save(input, B, off, BENCH_FILE_REC_OSC_CC);
547*3f1979aaSAndroid Build Coastguard Worker
548*3f1979aaSAndroid Build Coastguard Worker save(output, B, off, NULL);
549*3f1979aaSAndroid Build Coastguard Worker free(input);
550*3f1979aaSAndroid Build Coastguard Worker free(output);
551*3f1979aaSAndroid Build Coastguard Worker T = ( t1 - t0 ); /* duration per fft() */
552*3f1979aaSAndroid Build Coastguard Worker printf("processed %f Msamples in %f ms\n", off * 1E-6, T*1E3);
553*3f1979aaSAndroid Build Coastguard Worker nI = ((double)iter) * B; /* number of iterations "normalized" to O(N) = N */
554*3f1979aaSAndroid Build Coastguard Worker return (nI / T); /* normalized iterations per second */
555*3f1979aaSAndroid Build Coastguard Worker }
556*3f1979aaSAndroid Build Coastguard Worker
557*3f1979aaSAndroid Build Coastguard Worker
bench_shift_rec_osc_cc_inp(int B,int N)558*3f1979aaSAndroid Build Coastguard Worker double bench_shift_rec_osc_cc_inp(int B, int N) {
559*3f1979aaSAndroid Build Coastguard Worker double t0, t1, tstop, T, nI;
560*3f1979aaSAndroid Build Coastguard Worker int iter, off;
561*3f1979aaSAndroid Build Coastguard Worker float phase = 0.0F;
562*3f1979aaSAndroid Build Coastguard Worker complexf *input = (complexf *)malloc(N * sizeof(complexf));
563*3f1979aaSAndroid Build Coastguard Worker shift_recursive_osc_t gen_state, shift_state;
564*3f1979aaSAndroid Build Coastguard Worker shift_recursive_osc_conf_t gen_conf, shift_conf;
565*3f1979aaSAndroid Build Coastguard Worker
566*3f1979aaSAndroid Build Coastguard Worker shift_recursive_osc_init(0.001F, 0.0F, &gen_conf, &gen_state);
567*3f1979aaSAndroid Build Coastguard Worker gen_recursive_osc_c(input, N, &gen_conf, &gen_state);
568*3f1979aaSAndroid Build Coastguard Worker shift_recursive_osc_init(-0.0009F, 0.0F, &shift_conf, &shift_state);
569*3f1979aaSAndroid Build Coastguard Worker
570*3f1979aaSAndroid Build Coastguard Worker iter = 0;
571*3f1979aaSAndroid Build Coastguard Worker off = 0;
572*3f1979aaSAndroid Build Coastguard Worker t0 = uclock_sec(1);
573*3f1979aaSAndroid Build Coastguard Worker tstop = t0 + 0.5; /* benchmark duration: 500 ms */
574*3f1979aaSAndroid Build Coastguard Worker do {
575*3f1979aaSAndroid Build Coastguard Worker // work
576*3f1979aaSAndroid Build Coastguard Worker shift_recursive_osc_inp_c(input+off, B, &shift_conf, &shift_state);
577*3f1979aaSAndroid Build Coastguard Worker
578*3f1979aaSAndroid Build Coastguard Worker off += B;
579*3f1979aaSAndroid Build Coastguard Worker ++iter;
580*3f1979aaSAndroid Build Coastguard Worker t1 = uclock_sec(0);
581*3f1979aaSAndroid Build Coastguard Worker } while ( t1 < tstop && off + B < N );
582*3f1979aaSAndroid Build Coastguard Worker
583*3f1979aaSAndroid Build Coastguard Worker save(input, B, off, BENCH_FILE_REC_OSC_INP_C);
584*3f1979aaSAndroid Build Coastguard Worker free(input);
585*3f1979aaSAndroid Build Coastguard Worker T = ( t1 - t0 ); /* duration per fft() */
586*3f1979aaSAndroid Build Coastguard Worker printf("processed %f Msamples in %f ms\n", off * 1E-6, T*1E3);
587*3f1979aaSAndroid Build Coastguard Worker nI = ((double)iter) * B; /* number of iterations "normalized" to O(N) = N */
588*3f1979aaSAndroid Build Coastguard Worker return (nI / T); /* normalized iterations per second */
589*3f1979aaSAndroid Build Coastguard Worker }
590*3f1979aaSAndroid Build Coastguard Worker
591*3f1979aaSAndroid Build Coastguard Worker
bench_shift_rec_osc_sse_c_inp(int B,int N)592*3f1979aaSAndroid Build Coastguard Worker double bench_shift_rec_osc_sse_c_inp(int B, int N) {
593*3f1979aaSAndroid Build Coastguard Worker double t0, t1, tstop, T, nI;
594*3f1979aaSAndroid Build Coastguard Worker int iter, off;
595*3f1979aaSAndroid Build Coastguard Worker float phase = 0.0F;
596*3f1979aaSAndroid Build Coastguard Worker complexf *input = (complexf *)malloc(N * sizeof(complexf));
597*3f1979aaSAndroid Build Coastguard Worker shift_recursive_osc_t gen_state;
598*3f1979aaSAndroid Build Coastguard Worker shift_recursive_osc_conf_t gen_conf;
599*3f1979aaSAndroid Build Coastguard Worker
600*3f1979aaSAndroid Build Coastguard Worker shift_recursive_osc_sse_t *shift_state = malloc(sizeof(shift_recursive_osc_sse_t));
601*3f1979aaSAndroid Build Coastguard Worker shift_recursive_osc_sse_conf_t shift_conf;
602*3f1979aaSAndroid Build Coastguard Worker
603*3f1979aaSAndroid Build Coastguard Worker shift_recursive_osc_init(0.001F, 0.0F, &gen_conf, &gen_state);
604*3f1979aaSAndroid Build Coastguard Worker gen_recursive_osc_c(input, N, &gen_conf, &gen_state);
605*3f1979aaSAndroid Build Coastguard Worker
606*3f1979aaSAndroid Build Coastguard Worker shift_recursive_osc_sse_init(-0.0009F, 0.0F, &shift_conf, shift_state);
607*3f1979aaSAndroid Build Coastguard Worker
608*3f1979aaSAndroid Build Coastguard Worker iter = 0;
609*3f1979aaSAndroid Build Coastguard Worker off = 0;
610*3f1979aaSAndroid Build Coastguard Worker t0 = uclock_sec(1);
611*3f1979aaSAndroid Build Coastguard Worker tstop = t0 + 0.5; /* benchmark duration: 500 ms */
612*3f1979aaSAndroid Build Coastguard Worker do {
613*3f1979aaSAndroid Build Coastguard Worker // work
614*3f1979aaSAndroid Build Coastguard Worker shift_recursive_osc_sse_inp_c(input+off, B, &shift_conf, shift_state);
615*3f1979aaSAndroid Build Coastguard Worker
616*3f1979aaSAndroid Build Coastguard Worker off += B;
617*3f1979aaSAndroid Build Coastguard Worker ++iter;
618*3f1979aaSAndroid Build Coastguard Worker t1 = uclock_sec(0);
619*3f1979aaSAndroid Build Coastguard Worker } while ( t1 < tstop && off + B < N );
620*3f1979aaSAndroid Build Coastguard Worker
621*3f1979aaSAndroid Build Coastguard Worker save(input, B, off, BENCH_FILE_REC_OSC_SSE_INP_C);
622*3f1979aaSAndroid Build Coastguard Worker free(input);
623*3f1979aaSAndroid Build Coastguard Worker T = ( t1 - t0 ); /* duration per fft() */
624*3f1979aaSAndroid Build Coastguard Worker printf("processed %f Msamples in %f ms\n", off * 1E-6, T*1E3);
625*3f1979aaSAndroid Build Coastguard Worker nI = ((double)iter) * B; /* number of iterations "normalized" to O(N) = N */
626*3f1979aaSAndroid Build Coastguard Worker return (nI / T); /* normalized iterations per second */
627*3f1979aaSAndroid Build Coastguard Worker }
628*3f1979aaSAndroid Build Coastguard Worker
629*3f1979aaSAndroid Build Coastguard Worker
630*3f1979aaSAndroid Build Coastguard Worker
main(int argc,char ** argv)631*3f1979aaSAndroid Build Coastguard Worker int main(int argc, char **argv)
632*3f1979aaSAndroid Build Coastguard Worker {
633*3f1979aaSAndroid Build Coastguard Worker double rt;
634*3f1979aaSAndroid Build Coastguard Worker
635*3f1979aaSAndroid Build Coastguard Worker // process up to 64 MSample (512 MByte) in blocks of 8 kSamples (=64 kByte)
636*3f1979aaSAndroid Build Coastguard Worker int B = 8 * 1024;
637*3f1979aaSAndroid Build Coastguard Worker int N = 64 * 1024 * 1024;
638*3f1979aaSAndroid Build Coastguard Worker int showUsage = 0;
639*3f1979aaSAndroid Build Coastguard Worker
640*3f1979aaSAndroid Build Coastguard Worker if (argc == 1)
641*3f1979aaSAndroid Build Coastguard Worker showUsage = 1;
642*3f1979aaSAndroid Build Coastguard Worker
643*3f1979aaSAndroid Build Coastguard Worker if (1 < argc)
644*3f1979aaSAndroid Build Coastguard Worker B = atoi(argv[1]);
645*3f1979aaSAndroid Build Coastguard Worker if (2 < argc)
646*3f1979aaSAndroid Build Coastguard Worker N = atoi(argv[2]) * 1024 * 1024;
647*3f1979aaSAndroid Build Coastguard Worker
648*3f1979aaSAndroid Build Coastguard Worker if ( !B || !N || showUsage )
649*3f1979aaSAndroid Build Coastguard Worker {
650*3f1979aaSAndroid Build Coastguard Worker fprintf(stderr, "%s [<blockLength in samples> [<total # of MSamples>] ]\n", argv[0]);
651*3f1979aaSAndroid Build Coastguard Worker if ( !B || !N )
652*3f1979aaSAndroid Build Coastguard Worker return 0;
653*3f1979aaSAndroid Build Coastguard Worker }
654*3f1979aaSAndroid Build Coastguard Worker
655*3f1979aaSAndroid Build Coastguard Worker fprintf(stderr, "processing up to N = %d MSamples with blocke length of %d samples\n",
656*3f1979aaSAndroid Build Coastguard Worker N / (1024 * 1024), B );
657*3f1979aaSAndroid Build Coastguard Worker
658*3f1979aaSAndroid Build Coastguard Worker
659*3f1979aaSAndroid Build Coastguard Worker #if BENCH_REF_TRIG_FUNC
660*3f1979aaSAndroid Build Coastguard Worker printf("\nstarting bench of shift_math_cc (out-of-place) with trig functions ..\n");
661*3f1979aaSAndroid Build Coastguard Worker rt = bench_shift_math_cc(B, N);
662*3f1979aaSAndroid Build Coastguard Worker printf(" %f MSamples/sec\n\n", rt * 1E-6);
663*3f1979aaSAndroid Build Coastguard Worker #endif
664*3f1979aaSAndroid Build Coastguard Worker
665*3f1979aaSAndroid Build Coastguard Worker #if BENCH_OUT_OF_PLACE_ALGOS
666*3f1979aaSAndroid Build Coastguard Worker printf("starting bench of shift_table_cc (out-of-place) ..\n");
667*3f1979aaSAndroid Build Coastguard Worker rt = bench_shift_table_cc(B, N);
668*3f1979aaSAndroid Build Coastguard Worker printf(" %f MSamples/sec\n\n", rt * 1E-6);
669*3f1979aaSAndroid Build Coastguard Worker
670*3f1979aaSAndroid Build Coastguard Worker printf("starting bench of shift_addfast_cc (out-of-place) ..\n");
671*3f1979aaSAndroid Build Coastguard Worker rt = bench_shift_addfast(B, N);
672*3f1979aaSAndroid Build Coastguard Worker printf(" %f MSamples/sec\n\n", rt * 1E-6);
673*3f1979aaSAndroid Build Coastguard Worker
674*3f1979aaSAndroid Build Coastguard Worker printf("\nstarting bench of shift_unroll_cc (out-of-place) ..\n");
675*3f1979aaSAndroid Build Coastguard Worker rt = bench_shift_unroll_oop(B, N);
676*3f1979aaSAndroid Build Coastguard Worker printf(" %f MSamples/sec\n\n", rt * 1E-6);
677*3f1979aaSAndroid Build Coastguard Worker
678*3f1979aaSAndroid Build Coastguard Worker printf("\nstarting bench of shift_limited_unroll_cc (out-of-place) ..\n");
679*3f1979aaSAndroid Build Coastguard Worker rt = bench_shift_limited_unroll_oop(B, N);
680*3f1979aaSAndroid Build Coastguard Worker printf(" %f MSamples/sec\n\n", rt * 1E-6);
681*3f1979aaSAndroid Build Coastguard Worker
682*3f1979aaSAndroid Build Coastguard Worker printf("\nstarting bench of shift_recursive_osc_cc (out-of-place) ..\n");
683*3f1979aaSAndroid Build Coastguard Worker rt = bench_shift_rec_osc_cc_oop(B, N);
684*3f1979aaSAndroid Build Coastguard Worker printf(" %f MSamples/sec\n\n", rt * 1E-6);
685*3f1979aaSAndroid Build Coastguard Worker #endif
686*3f1979aaSAndroid Build Coastguard Worker
687*3f1979aaSAndroid Build Coastguard Worker #if BENCH_INPLACE_ALGOS
688*3f1979aaSAndroid Build Coastguard Worker
689*3f1979aaSAndroid Build Coastguard Worker printf("starting bench of shift_addfast_inp_c in-place ..\n");
690*3f1979aaSAndroid Build Coastguard Worker rt = bench_shift_addfast_inp(B, N);
691*3f1979aaSAndroid Build Coastguard Worker printf(" %f MSamples/sec\n\n", rt * 1E-6);
692*3f1979aaSAndroid Build Coastguard Worker
693*3f1979aaSAndroid Build Coastguard Worker printf("starting bench of shift_unroll_inp_c in-place ..\n");
694*3f1979aaSAndroid Build Coastguard Worker rt = bench_shift_unroll_inp(B, N);
695*3f1979aaSAndroid Build Coastguard Worker printf(" %f MSamples/sec\n\n", rt * 1E-6);
696*3f1979aaSAndroid Build Coastguard Worker
697*3f1979aaSAndroid Build Coastguard Worker printf("starting bench of shift_limited_unroll_inp_c in-place ..\n");
698*3f1979aaSAndroid Build Coastguard Worker rt = bench_shift_limited_unroll_inp(B, N);
699*3f1979aaSAndroid Build Coastguard Worker printf(" %f MSamples/sec\n\n", rt * 1E-6);
700*3f1979aaSAndroid Build Coastguard Worker
701*3f1979aaSAndroid Build Coastguard Worker if ( have_sse_shift_mixer_impl() )
702*3f1979aaSAndroid Build Coastguard Worker {
703*3f1979aaSAndroid Build Coastguard Worker printf("starting bench of shift_limited_unroll_A_sse_inp_c in-place ..\n");
704*3f1979aaSAndroid Build Coastguard Worker rt = bench_shift_limited_unroll_A_sse_inp(B, N);
705*3f1979aaSAndroid Build Coastguard Worker printf(" %f MSamples/sec\n\n", rt * 1E-6);
706*3f1979aaSAndroid Build Coastguard Worker
707*3f1979aaSAndroid Build Coastguard Worker printf("starting bench of shift_limited_unroll_B_sse_inp_c in-place ..\n");
708*3f1979aaSAndroid Build Coastguard Worker rt = bench_shift_limited_unroll_B_sse_inp(B, N);
709*3f1979aaSAndroid Build Coastguard Worker printf(" %f MSamples/sec\n\n", rt * 1E-6);
710*3f1979aaSAndroid Build Coastguard Worker
711*3f1979aaSAndroid Build Coastguard Worker printf("starting bench of shift_limited_unroll_C_sse_inp_c in-place ..\n");
712*3f1979aaSAndroid Build Coastguard Worker rt = bench_shift_limited_unroll_C_sse_inp(B, N);
713*3f1979aaSAndroid Build Coastguard Worker printf(" %f MSamples/sec\n\n", rt * 1E-6);
714*3f1979aaSAndroid Build Coastguard Worker }
715*3f1979aaSAndroid Build Coastguard Worker
716*3f1979aaSAndroid Build Coastguard Worker printf("starting bench of shift_recursive_osc_cc in-place ..\n");
717*3f1979aaSAndroid Build Coastguard Worker rt = bench_shift_rec_osc_cc_inp(B, N);
718*3f1979aaSAndroid Build Coastguard Worker printf(" %f MSamples/sec\n\n", rt * 1E-6);
719*3f1979aaSAndroid Build Coastguard Worker
720*3f1979aaSAndroid Build Coastguard Worker if ( have_sse_shift_mixer_impl() )
721*3f1979aaSAndroid Build Coastguard Worker {
722*3f1979aaSAndroid Build Coastguard Worker printf("starting bench of shift_recursive_osc_sse_c in-place ..\n");
723*3f1979aaSAndroid Build Coastguard Worker rt = bench_shift_rec_osc_sse_c_inp(B, N);
724*3f1979aaSAndroid Build Coastguard Worker printf(" %f MSamples/sec\n\n", rt * 1E-6);
725*3f1979aaSAndroid Build Coastguard Worker }
726*3f1979aaSAndroid Build Coastguard Worker #endif
727*3f1979aaSAndroid Build Coastguard Worker
728*3f1979aaSAndroid Build Coastguard Worker return 0;
729*3f1979aaSAndroid Build Coastguard Worker }
730*3f1979aaSAndroid Build Coastguard Worker
731