xref: /aosp_15_r20/external/fft2d/src/fft2d/fft2d/fftsg.c (revision fa0ad63f8b666836f56a823de546390a6e4ff4b6)
1 /*
2 Fast Fourier/Cosine/Sine Transform
3     dimension   :one
4     data length :power of 2
5     decimation  :frequency
6     radix       :split-radix
7     data        :inplace
8     table       :use
9 functions
10     cdft: Complex Discrete Fourier Transform
11     rdft: Real Discrete Fourier Transform
12     ddct: Discrete Cosine Transform
13     ddst: Discrete Sine Transform
14     dfct: Cosine Transform of RDFT (Real Symmetric DFT)
15     dfst: Sine Transform of RDFT (Real Anti-symmetric DFT)
16 function prototypes
17     void cdft(int, int, double *, int *, double *);
18     void rdft(int, int, double *, int *, double *);
19     void ddct(int, int, double *, int *, double *);
20     void ddst(int, int, double *, int *, double *);
21     void dfct(int, double *, double *, int *, double *);
22     void dfst(int, double *, double *, int *, double *);
23 macro definitions
24     USE_CDFT_PTHREADS : default=not defined
25         CDFT_THREADS_BEGIN_N  : must be >= 512, default=8192
26         CDFT_4THREADS_BEGIN_N : must be >= 512, default=65536
27     USE_CDFT_WINTHREADS : default=not defined
28         CDFT_THREADS_BEGIN_N  : must be >= 512, default=32768
29         CDFT_4THREADS_BEGIN_N : must be >= 512, default=524288
30 
31 
32 -------- Complex DFT (Discrete Fourier Transform) --------
33     [definition]
34         <case1>
35             X[k] = sum_j=0^n-1 x[j]*exp(2*pi*i*j*k/n), 0<=k<n
36         <case2>
37             X[k] = sum_j=0^n-1 x[j]*exp(-2*pi*i*j*k/n), 0<=k<n
38         (notes: sum_j=0^n-1 is a summation from j=0 to n-1)
39     [usage]
40         <case1>
41             ip[0] = 0; // first time only
42             cdft(2*n, 1, a, ip, w);
43         <case2>
44             ip[0] = 0; // first time only
45             cdft(2*n, -1, a, ip, w);
46     [parameters]
47         2*n            :data length (int)
48                         n >= 1, n = power of 2
49         a[0...2*n-1]   :input/output data (double *)
50                         input data
51                             a[2*j] = Re(x[j]),
52                             a[2*j+1] = Im(x[j]), 0<=j<n
53                         output data
54                             a[2*k] = Re(X[k]),
55                             a[2*k+1] = Im(X[k]), 0<=k<n
56         ip[0...*]      :work area for bit reversal (int *)
57                         length of ip >= 2+sqrt(n)
58                         strictly,
59                         length of ip >=
60                             2+(1<<(int)(log(n+0.5)/log(2))/2).
61                         ip[0],ip[1] are pointers of the cos/sin table.
62         w[0...n/2-1]   :cos/sin table (double *)
63                         w[],ip[] are initialized if ip[0] == 0.
64     [remark]
65         Inverse of
66             cdft(2*n, -1, a, ip, w);
67         is
68             cdft(2*n, 1, a, ip, w);
69             for (j = 0; j <= 2 * n - 1; j++) {
70                 a[j] *= 1.0 / n;
71             }
72         .
73 
74 
75 -------- Real DFT / Inverse of Real DFT --------
76     [definition]
77         <case1> RDFT
78             R[k] = sum_j=0^n-1 a[j]*cos(2*pi*j*k/n), 0<=k<=n/2
79             I[k] = sum_j=0^n-1 a[j]*sin(2*pi*j*k/n), 0<k<n/2
80         <case2> IRDFT (excluding scale)
81             a[k] = (R[0] + R[n/2]*cos(pi*k))/2 +
82                    sum_j=1^n/2-1 R[j]*cos(2*pi*j*k/n) +
83                    sum_j=1^n/2-1 I[j]*sin(2*pi*j*k/n), 0<=k<n
84     [usage]
85         <case1>
86             ip[0] = 0; // first time only
87             rdft(n, 1, a, ip, w);
88         <case2>
89             ip[0] = 0; // first time only
90             rdft(n, -1, a, ip, w);
91     [parameters]
92         n              :data length (int)
93                         n >= 2, n = power of 2
94         a[0...n-1]     :input/output data (double *)
95                         <case1>
96                             output data
97                                 a[2*k] = R[k], 0<=k<n/2
98                                 a[2*k+1] = I[k], 0<k<n/2
99                                 a[1] = R[n/2]
100                         <case2>
101                             input data
102                                 a[2*j] = R[j], 0<=j<n/2
103                                 a[2*j+1] = I[j], 0<j<n/2
104                                 a[1] = R[n/2]
105         ip[0...*]      :work area for bit reversal (int *)
106                         length of ip >= 2+sqrt(n/2)
107                         strictly,
108                         length of ip >=
109                             2+(1<<(int)(log(n/2+0.5)/log(2))/2).
110                         ip[0],ip[1] are pointers of the cos/sin table.
111         w[0...n/2-1]   :cos/sin table (double *)
112                         w[],ip[] are initialized if ip[0] == 0.
113     [remark]
114         Inverse of
115             rdft(n, 1, a, ip, w);
116         is
117             rdft(n, -1, a, ip, w);
118             for (j = 0; j <= n - 1; j++) {
119                 a[j] *= 2.0 / n;
120             }
121         .
122 
123 
124 -------- DCT (Discrete Cosine Transform) / Inverse of DCT --------
125     [definition]
126         <case1> IDCT (excluding scale)
127             C[k] = sum_j=0^n-1 a[j]*cos(pi*j*(k+1/2)/n), 0<=k<n
128         <case2> DCT
129             C[k] = sum_j=0^n-1 a[j]*cos(pi*(j+1/2)*k/n), 0<=k<n
130     [usage]
131         <case1>
132             ip[0] = 0; // first time only
133             ddct(n, 1, a, ip, w);
134         <case2>
135             ip[0] = 0; // first time only
136             ddct(n, -1, a, ip, w);
137     [parameters]
138         n              :data length (int)
139                         n >= 2, n = power of 2
140         a[0...n-1]     :input/output data (double *)
141                         output data
142                             a[k] = C[k], 0<=k<n
143         ip[0...*]      :work area for bit reversal (int *)
144                         length of ip >= 2+sqrt(n/2)
145                         strictly,
146                         length of ip >=
147                             2+(1<<(int)(log(n/2+0.5)/log(2))/2).
148                         ip[0],ip[1] are pointers of the cos/sin table.
149         w[0...n*5/4-1] :cos/sin table (double *)
150                         w[],ip[] are initialized if ip[0] == 0.
151     [remark]
152         Inverse of
153             ddct(n, -1, a, ip, w);
154         is
155             a[0] *= 0.5;
156             ddct(n, 1, a, ip, w);
157             for (j = 0; j <= n - 1; j++) {
158                 a[j] *= 2.0 / n;
159             }
160         .
161 
162 
163 -------- DST (Discrete Sine Transform) / Inverse of DST --------
164     [definition]
165         <case1> IDST (excluding scale)
166             S[k] = sum_j=1^n A[j]*sin(pi*j*(k+1/2)/n), 0<=k<n
167         <case2> DST
168             S[k] = sum_j=0^n-1 a[j]*sin(pi*(j+1/2)*k/n), 0<k<=n
169     [usage]
170         <case1>
171             ip[0] = 0; // first time only
172             ddst(n, 1, a, ip, w);
173         <case2>
174             ip[0] = 0; // first time only
175             ddst(n, -1, a, ip, w);
176     [parameters]
177         n              :data length (int)
178                         n >= 2, n = power of 2
179         a[0...n-1]     :input/output data (double *)
180                         <case1>
181                             input data
182                                 a[j] = A[j], 0<j<n
183                                 a[0] = A[n]
184                             output data
185                                 a[k] = S[k], 0<=k<n
186                         <case2>
187                             output data
188                                 a[k] = S[k], 0<k<n
189                                 a[0] = S[n]
190         ip[0...*]      :work area for bit reversal (int *)
191                         length of ip >= 2+sqrt(n/2)
192                         strictly,
193                         length of ip >=
194                             2+(1<<(int)(log(n/2+0.5)/log(2))/2).
195                         ip[0],ip[1] are pointers of the cos/sin table.
196         w[0...n*5/4-1] :cos/sin table (double *)
197                         w[],ip[] are initialized if ip[0] == 0.
198     [remark]
199         Inverse of
200             ddst(n, -1, a, ip, w);
201         is
202             a[0] *= 0.5;
203             ddst(n, 1, a, ip, w);
204             for (j = 0; j <= n - 1; j++) {
205                 a[j] *= 2.0 / n;
206             }
207         .
208 
209 
210 -------- Cosine Transform of RDFT (Real Symmetric DFT) --------
211     [definition]
212         C[k] = sum_j=0^n a[j]*cos(pi*j*k/n), 0<=k<=n
213     [usage]
214         ip[0] = 0; // first time only
215         dfct(n, a, t, ip, w);
216     [parameters]
217         n              :data length - 1 (int)
218                         n >= 2, n = power of 2
219         a[0...n]       :input/output data (double *)
220                         output data
221                             a[k] = C[k], 0<=k<=n
222         t[0...n/2]     :work area (double *)
223         ip[0...*]      :work area for bit reversal (int *)
224                         length of ip >= 2+sqrt(n/4)
225                         strictly,
226                         length of ip >=
227                             2+(1<<(int)(log(n/4+0.5)/log(2))/2).
228                         ip[0],ip[1] are pointers of the cos/sin table.
229         w[0...n*5/8-1] :cos/sin table (double *)
230                         w[],ip[] are initialized if ip[0] == 0.
231     [remark]
232         Inverse of
233             a[0] *= 0.5;
234             a[n] *= 0.5;
235             dfct(n, a, t, ip, w);
236         is
237             a[0] *= 0.5;
238             a[n] *= 0.5;
239             dfct(n, a, t, ip, w);
240             for (j = 0; j <= n; j++) {
241                 a[j] *= 2.0 / n;
242             }
243         .
244 
245 
246 -------- Sine Transform of RDFT (Real Anti-symmetric DFT) --------
247     [definition]
248         S[k] = sum_j=1^n-1 a[j]*sin(pi*j*k/n), 0<k<n
249     [usage]
250         ip[0] = 0; // first time only
251         dfst(n, a, t, ip, w);
252     [parameters]
253         n              :data length + 1 (int)
254                         n >= 2, n = power of 2
255         a[0...n-1]     :input/output data (double *)
256                         output data
257                             a[k] = S[k], 0<k<n
258                         (a[0] is used for work area)
259         t[0...n/2-1]   :work area (double *)
260         ip[0...*]      :work area for bit reversal (int *)
261                         length of ip >= 2+sqrt(n/4)
262                         strictly,
263                         length of ip >=
264                             2+(1<<(int)(log(n/4+0.5)/log(2))/2).
265                         ip[0],ip[1] are pointers of the cos/sin table.
266         w[0...n*5/8-1] :cos/sin table (double *)
267                         w[],ip[] are initialized if ip[0] == 0.
268     [remark]
269         Inverse of
270             dfst(n, a, t, ip, w);
271         is
272             dfst(n, a, t, ip, w);
273             for (j = 1; j <= n - 1; j++) {
274                 a[j] *= 2.0 / n;
275             }
276         .
277 
278 
279 Appendix :
280     The cos/sin table is recalculated when the larger table required.
281     w[] and ip[] are compatible with all routines.
282 */
283 
284 
cdft(int n,int isgn,double * a,int * ip,double * w)285 void cdft(int n, int isgn, double *a, int *ip, double *w)
286 {
287     void makewt(int nw, int *ip, double *w);
288     void cftfsub(int n, double *a, int *ip, int nw, double *w);
289     void cftbsub(int n, double *a, int *ip, int nw, double *w);
290     int nw;
291 
292     nw = ip[0];
293     if (n > (nw << 2)) {
294         nw = n >> 2;
295         makewt(nw, ip, w);
296     }
297     if (isgn >= 0) {
298         cftfsub(n, a, ip, nw, w);
299     } else {
300         cftbsub(n, a, ip, nw, w);
301     }
302 }
303 
304 
rdft(int n,int isgn,double * a,int * ip,double * w)305 void rdft(int n, int isgn, double *a, int *ip, double *w)
306 {
307     void makewt(int nw, int *ip, double *w);
308     void makect(int nc, int *ip, double *c);
309     void cftfsub(int n, double *a, int *ip, int nw, double *w);
310     void cftbsub(int n, double *a, int *ip, int nw, double *w);
311     void rftfsub(int n, double *a, int nc, double *c);
312     void rftbsub(int n, double *a, int nc, double *c);
313     int nw, nc;
314     double xi;
315 
316     nw = ip[0];
317     if (n > (nw << 2)) {
318         nw = n >> 2;
319         makewt(nw, ip, w);
320     }
321     nc = ip[1];
322     if (n > (nc << 2)) {
323         nc = n >> 2;
324         makect(nc, ip, w + nw);
325     }
326     if (isgn >= 0) {
327         if (n > 4) {
328             cftfsub(n, a, ip, nw, w);
329             rftfsub(n, a, nc, w + nw);
330         } else if (n == 4) {
331             cftfsub(n, a, ip, nw, w);
332         }
333         xi = a[0] - a[1];
334         a[0] += a[1];
335         a[1] = xi;
336     } else {
337         a[1] = 0.5 * (a[0] - a[1]);
338         a[0] -= a[1];
339         if (n > 4) {
340             rftbsub(n, a, nc, w + nw);
341             cftbsub(n, a, ip, nw, w);
342         } else if (n == 4) {
343             cftbsub(n, a, ip, nw, w);
344         }
345     }
346 }
347 
348 
ddct(int n,int isgn,double * a,int * ip,double * w)349 void ddct(int n, int isgn, double *a, int *ip, double *w)
350 {
351     void makewt(int nw, int *ip, double *w);
352     void makect(int nc, int *ip, double *c);
353     void cftfsub(int n, double *a, int *ip, int nw, double *w);
354     void cftbsub(int n, double *a, int *ip, int nw, double *w);
355     void rftfsub(int n, double *a, int nc, double *c);
356     void rftbsub(int n, double *a, int nc, double *c);
357     void dctsub(int n, double *a, int nc, double *c);
358     int j, nw, nc;
359     double xr;
360 
361     nw = ip[0];
362     if (n > (nw << 2)) {
363         nw = n >> 2;
364         makewt(nw, ip, w);
365     }
366     nc = ip[1];
367     if (n > nc) {
368         nc = n;
369         makect(nc, ip, w + nw);
370     }
371     if (isgn < 0) {
372         xr = a[n - 1];
373         for (j = n - 2; j >= 2; j -= 2) {
374             a[j + 1] = a[j] - a[j - 1];
375             a[j] += a[j - 1];
376         }
377         a[1] = a[0] - xr;
378         a[0] += xr;
379         if (n > 4) {
380             rftbsub(n, a, nc, w + nw);
381             cftbsub(n, a, ip, nw, w);
382         } else if (n == 4) {
383             cftbsub(n, a, ip, nw, w);
384         }
385     }
386     dctsub(n, a, nc, w + nw);
387     if (isgn >= 0) {
388         if (n > 4) {
389             cftfsub(n, a, ip, nw, w);
390             rftfsub(n, a, nc, w + nw);
391         } else if (n == 4) {
392             cftfsub(n, a, ip, nw, w);
393         }
394         xr = a[0] - a[1];
395         a[0] += a[1];
396         for (j = 2; j < n; j += 2) {
397             a[j - 1] = a[j] - a[j + 1];
398             a[j] += a[j + 1];
399         }
400         a[n - 1] = xr;
401     }
402 }
403 
404 
ddst(int n,int isgn,double * a,int * ip,double * w)405 void ddst(int n, int isgn, double *a, int *ip, double *w)
406 {
407     void makewt(int nw, int *ip, double *w);
408     void makect(int nc, int *ip, double *c);
409     void cftfsub(int n, double *a, int *ip, int nw, double *w);
410     void cftbsub(int n, double *a, int *ip, int nw, double *w);
411     void rftfsub(int n, double *a, int nc, double *c);
412     void rftbsub(int n, double *a, int nc, double *c);
413     void dstsub(int n, double *a, int nc, double *c);
414     int j, nw, nc;
415     double xr;
416 
417     nw = ip[0];
418     if (n > (nw << 2)) {
419         nw = n >> 2;
420         makewt(nw, ip, w);
421     }
422     nc = ip[1];
423     if (n > nc) {
424         nc = n;
425         makect(nc, ip, w + nw);
426     }
427     if (isgn < 0) {
428         xr = a[n - 1];
429         for (j = n - 2; j >= 2; j -= 2) {
430             a[j + 1] = -a[j] - a[j - 1];
431             a[j] -= a[j - 1];
432         }
433         a[1] = a[0] + xr;
434         a[0] -= xr;
435         if (n > 4) {
436             rftbsub(n, a, nc, w + nw);
437             cftbsub(n, a, ip, nw, w);
438         } else if (n == 4) {
439             cftbsub(n, a, ip, nw, w);
440         }
441     }
442     dstsub(n, a, nc, w + nw);
443     if (isgn >= 0) {
444         if (n > 4) {
445             cftfsub(n, a, ip, nw, w);
446             rftfsub(n, a, nc, w + nw);
447         } else if (n == 4) {
448             cftfsub(n, a, ip, nw, w);
449         }
450         xr = a[0] - a[1];
451         a[0] += a[1];
452         for (j = 2; j < n; j += 2) {
453             a[j - 1] = -a[j] - a[j + 1];
454             a[j] -= a[j + 1];
455         }
456         a[n - 1] = -xr;
457     }
458 }
459 
460 
dfct(int n,double * a,double * t,int * ip,double * w)461 void dfct(int n, double *a, double *t, int *ip, double *w)
462 {
463     void makewt(int nw, int *ip, double *w);
464     void makect(int nc, int *ip, double *c);
465     void cftfsub(int n, double *a, int *ip, int nw, double *w);
466     void rftfsub(int n, double *a, int nc, double *c);
467     void dctsub(int n, double *a, int nc, double *c);
468     int j, k, l, m, mh, nw, nc;
469     double xr, xi, yr, yi;
470 
471     nw = ip[0];
472     if (n > (nw << 3)) {
473         nw = n >> 3;
474         makewt(nw, ip, w);
475     }
476     nc = ip[1];
477     if (n > (nc << 1)) {
478         nc = n >> 1;
479         makect(nc, ip, w + nw);
480     }
481     m = n >> 1;
482     yi = a[m];
483     xi = a[0] + a[n];
484     a[0] -= a[n];
485     t[0] = xi - yi;
486     t[m] = xi + yi;
487     if (n > 2) {
488         mh = m >> 1;
489         for (j = 1; j < mh; j++) {
490             k = m - j;
491             xr = a[j] - a[n - j];
492             xi = a[j] + a[n - j];
493             yr = a[k] - a[n - k];
494             yi = a[k] + a[n - k];
495             a[j] = xr;
496             a[k] = yr;
497             t[j] = xi - yi;
498             t[k] = xi + yi;
499         }
500         t[mh] = a[mh] + a[n - mh];
501         a[mh] -= a[n - mh];
502         dctsub(m, a, nc, w + nw);
503         if (m > 4) {
504             cftfsub(m, a, ip, nw, w);
505             rftfsub(m, a, nc, w + nw);
506         } else if (m == 4) {
507             cftfsub(m, a, ip, nw, w);
508         }
509         a[n - 1] = a[0] - a[1];
510         a[1] = a[0] + a[1];
511         for (j = m - 2; j >= 2; j -= 2) {
512             a[2 * j + 1] = a[j] + a[j + 1];
513             a[2 * j - 1] = a[j] - a[j + 1];
514         }
515         l = 2;
516         m = mh;
517         while (m >= 2) {
518             dctsub(m, t, nc, w + nw);
519             if (m > 4) {
520                 cftfsub(m, t, ip, nw, w);
521                 rftfsub(m, t, nc, w + nw);
522             } else if (m == 4) {
523                 cftfsub(m, t, ip, nw, w);
524             }
525             a[n - l] = t[0] - t[1];
526             a[l] = t[0] + t[1];
527             k = 0;
528             for (j = 2; j < m; j += 2) {
529                 k += l << 2;
530                 a[k - l] = t[j] - t[j + 1];
531                 a[k + l] = t[j] + t[j + 1];
532             }
533             l <<= 1;
534             mh = m >> 1;
535             for (j = 0; j < mh; j++) {
536                 k = m - j;
537                 t[j] = t[m + k] - t[m + j];
538                 t[k] = t[m + k] + t[m + j];
539             }
540             t[mh] = t[m + mh];
541             m = mh;
542         }
543         a[l] = t[0];
544         a[n] = t[2] - t[1];
545         a[0] = t[2] + t[1];
546     } else {
547         a[1] = a[0];
548         a[2] = t[0];
549         a[0] = t[1];
550     }
551 }
552 
553 
dfst(int n,double * a,double * t,int * ip,double * w)554 void dfst(int n, double *a, double *t, int *ip, double *w)
555 {
556     void makewt(int nw, int *ip, double *w);
557     void makect(int nc, int *ip, double *c);
558     void cftfsub(int n, double *a, int *ip, int nw, double *w);
559     void rftfsub(int n, double *a, int nc, double *c);
560     void dstsub(int n, double *a, int nc, double *c);
561     int j, k, l, m, mh, nw, nc;
562     double xr, xi, yr, yi;
563 
564     nw = ip[0];
565     if (n > (nw << 3)) {
566         nw = n >> 3;
567         makewt(nw, ip, w);
568     }
569     nc = ip[1];
570     if (n > (nc << 1)) {
571         nc = n >> 1;
572         makect(nc, ip, w + nw);
573     }
574     if (n > 2) {
575         m = n >> 1;
576         mh = m >> 1;
577         for (j = 1; j < mh; j++) {
578             k = m - j;
579             xr = a[j] + a[n - j];
580             xi = a[j] - a[n - j];
581             yr = a[k] + a[n - k];
582             yi = a[k] - a[n - k];
583             a[j] = xr;
584             a[k] = yr;
585             t[j] = xi + yi;
586             t[k] = xi - yi;
587         }
588         t[0] = a[mh] - a[n - mh];
589         a[mh] += a[n - mh];
590         a[0] = a[m];
591         dstsub(m, a, nc, w + nw);
592         if (m > 4) {
593             cftfsub(m, a, ip, nw, w);
594             rftfsub(m, a, nc, w + nw);
595         } else if (m == 4) {
596             cftfsub(m, a, ip, nw, w);
597         }
598         a[n - 1] = a[1] - a[0];
599         a[1] = a[0] + a[1];
600         for (j = m - 2; j >= 2; j -= 2) {
601             a[2 * j + 1] = a[j] - a[j + 1];
602             a[2 * j - 1] = -a[j] - a[j + 1];
603         }
604         l = 2;
605         m = mh;
606         while (m >= 2) {
607             dstsub(m, t, nc, w + nw);
608             if (m > 4) {
609                 cftfsub(m, t, ip, nw, w);
610                 rftfsub(m, t, nc, w + nw);
611             } else if (m == 4) {
612                 cftfsub(m, t, ip, nw, w);
613             }
614             a[n - l] = t[1] - t[0];
615             a[l] = t[0] + t[1];
616             k = 0;
617             for (j = 2; j < m; j += 2) {
618                 k += l << 2;
619                 a[k - l] = -t[j] - t[j + 1];
620                 a[k + l] = t[j] - t[j + 1];
621             }
622             l <<= 1;
623             mh = m >> 1;
624             for (j = 1; j < mh; j++) {
625                 k = m - j;
626                 t[j] = t[m + k] + t[m + j];
627                 t[k] = t[m + k] - t[m + j];
628             }
629             t[0] = t[m + mh];
630             m = mh;
631         }
632         a[l] = t[0];
633     }
634     a[0] = 0;
635 }
636 
637 
638 /* -------- initializing routines -------- */
639 
640 
641 #include <math.h>
642 
makewt(int nw,int * ip,double * w)643 void makewt(int nw, int *ip, double *w)
644 {
645     void makeipt(int nw, int *ip);
646     int j, nwh, nw0, nw1;
647     double delta, wn4r, wk1r, wk1i, wk3r, wk3i;
648 
649     ip[0] = nw;
650     ip[1] = 1;
651     if (nw > 2) {
652         nwh = nw >> 1;
653         delta = atan(1.0) / nwh;
654         wn4r = cos(delta * nwh);
655         w[0] = 1;
656         w[1] = wn4r;
657         if (nwh == 4) {
658             w[2] = cos(delta * 2);
659             w[3] = sin(delta * 2);
660         } else if (nwh > 4) {
661             makeipt(nw, ip);
662             w[2] = 0.5 / cos(delta * 2);
663             w[3] = 0.5 / cos(delta * 6);
664             for (j = 4; j < nwh; j += 4) {
665                 w[j] = cos(delta * j);
666                 w[j + 1] = sin(delta * j);
667                 w[j + 2] = cos(3 * delta * j);
668                 w[j + 3] = -sin(3 * delta * j);
669             }
670         }
671         nw0 = 0;
672         while (nwh > 2) {
673             nw1 = nw0 + nwh;
674             nwh >>= 1;
675             w[nw1] = 1;
676             w[nw1 + 1] = wn4r;
677             if (nwh == 4) {
678                 wk1r = w[nw0 + 4];
679                 wk1i = w[nw0 + 5];
680                 w[nw1 + 2] = wk1r;
681                 w[nw1 + 3] = wk1i;
682             } else if (nwh > 4) {
683                 wk1r = w[nw0 + 4];
684                 wk3r = w[nw0 + 6];
685                 w[nw1 + 2] = 0.5 / wk1r;
686                 w[nw1 + 3] = 0.5 / wk3r;
687                 for (j = 4; j < nwh; j += 4) {
688                     wk1r = w[nw0 + 2 * j];
689                     wk1i = w[nw0 + 2 * j + 1];
690                     wk3r = w[nw0 + 2 * j + 2];
691                     wk3i = w[nw0 + 2 * j + 3];
692                     w[nw1 + j] = wk1r;
693                     w[nw1 + j + 1] = wk1i;
694                     w[nw1 + j + 2] = wk3r;
695                     w[nw1 + j + 3] = wk3i;
696                 }
697             }
698             nw0 = nw1;
699         }
700     }
701 }
702 
703 
makeipt(int nw,int * ip)704 void makeipt(int nw, int *ip)
705 {
706     int j, l, m, m2, p, q;
707 
708     ip[2] = 0;
709     ip[3] = 16;
710     m = 2;
711     for (l = nw; l > 32; l >>= 2) {
712         m2 = m << 1;
713         q = m2 << 3;
714         for (j = m; j < m2; j++) {
715             p = ip[j] << 2;
716             ip[m + j] = p;
717             ip[m2 + j] = p + q;
718         }
719         m = m2;
720     }
721 }
722 
723 
makect(int nc,int * ip,double * c)724 void makect(int nc, int *ip, double *c)
725 {
726     int j, nch;
727     double delta;
728 
729     ip[1] = nc;
730     if (nc > 1) {
731         nch = nc >> 1;
732         delta = atan(1.0) / nch;
733         c[0] = cos(delta * nch);
734         c[nch] = 0.5 * c[0];
735         for (j = 1; j < nch; j++) {
736             c[j] = 0.5 * cos(delta * j);
737             c[nc - j] = 0.5 * sin(delta * j);
738         }
739     }
740 }
741 
742 
743 /* -------- child routines -------- */
744 
745 
746 #ifdef USE_CDFT_PTHREADS
747 #define USE_CDFT_THREADS
748 #ifndef CDFT_THREADS_BEGIN_N
749 #define CDFT_THREADS_BEGIN_N 8192
750 #endif
751 #ifndef CDFT_4THREADS_BEGIN_N
752 #define CDFT_4THREADS_BEGIN_N 65536
753 #endif
754 #include <pthread.h>
755 #include <stdio.h>
756 #include <stdlib.h>
757 #define cdft_thread_t pthread_t
758 #define cdft_thread_create(thp,func,argp) { \
759     if (pthread_create(thp, NULL, func, (void *) argp) != 0) { \
760         fprintf(stderr, "cdft thread error\n"); \
761         exit(1); \
762     } \
763 }
764 #define cdft_thread_wait(th) { \
765     if (pthread_join(th, NULL) != 0) { \
766         fprintf(stderr, "cdft thread error\n"); \
767         exit(1); \
768     } \
769 }
770 #endif /* USE_CDFT_PTHREADS */
771 
772 
773 #ifdef USE_CDFT_WINTHREADS
774 #define USE_CDFT_THREADS
775 #ifndef CDFT_THREADS_BEGIN_N
776 #define CDFT_THREADS_BEGIN_N 32768
777 #endif
778 #ifndef CDFT_4THREADS_BEGIN_N
779 #define CDFT_4THREADS_BEGIN_N 524288
780 #endif
781 #include <windows.h>
782 #include <stdio.h>
783 #include <stdlib.h>
784 #define cdft_thread_t HANDLE
785 #define cdft_thread_create(thp,func,argp) { \
786     DWORD thid; \
787     *(thp) = CreateThread(NULL, 0, (LPTHREAD_START_ROUTINE) func, (LPVOID) argp, 0, &thid); \
788     if (*(thp) == 0) { \
789         fprintf(stderr, "cdft thread error\n"); \
790         exit(1); \
791     } \
792 }
793 #define cdft_thread_wait(th) { \
794     WaitForSingleObject(th, INFINITE); \
795     CloseHandle(th); \
796 }
797 #endif /* USE_CDFT_WINTHREADS */
798 
799 
cftfsub(int n,double * a,int * ip,int nw,double * w)800 void cftfsub(int n, double *a, int *ip, int nw, double *w)
801 {
802     void bitrv2(int n, int *ip, double *a);
803     void bitrv216(double *a);
804     void bitrv208(double *a);
805     void cftf1st(int n, double *a, double *w);
806     void cftrec4(int n, double *a, int nw, double *w);
807     void cftleaf(int n, int isplt, double *a, int nw, double *w);
808     void cftfx41(int n, double *a, int nw, double *w);
809     void cftf161(double *a, double *w);
810     void cftf081(double *a, double *w);
811     void cftf040(double *a);
812     void cftx020(double *a);
813 #ifdef USE_CDFT_THREADS
814     void cftrec4_th(int n, double *a, int nw, double *w);
815 #endif /* USE_CDFT_THREADS */
816 
817     if (n > 8) {
818         if (n > 32) {
819             cftf1st(n, a, &w[nw - (n >> 2)]);
820 #ifdef USE_CDFT_THREADS
821             if (n > CDFT_THREADS_BEGIN_N) {
822                 cftrec4_th(n, a, nw, w);
823             } else
824 #endif /* USE_CDFT_THREADS */
825             if (n > 512) {
826                 cftrec4(n, a, nw, w);
827             } else if (n > 128) {
828                 cftleaf(n, 1, a, nw, w);
829             } else {
830                 cftfx41(n, a, nw, w);
831             }
832             bitrv2(n, ip, a);
833         } else if (n == 32) {
834             cftf161(a, &w[nw - 8]);
835             bitrv216(a);
836         } else {
837             cftf081(a, w);
838             bitrv208(a);
839         }
840     } else if (n == 8) {
841         cftf040(a);
842     } else if (n == 4) {
843         cftx020(a);
844     }
845 }
846 
847 
cftbsub(int n,double * a,int * ip,int nw,double * w)848 void cftbsub(int n, double *a, int *ip, int nw, double *w)
849 {
850     void bitrv2conj(int n, int *ip, double *a);
851     void bitrv216neg(double *a);
852     void bitrv208neg(double *a);
853     void cftb1st(int n, double *a, double *w);
854     void cftrec4(int n, double *a, int nw, double *w);
855     void cftleaf(int n, int isplt, double *a, int nw, double *w);
856     void cftfx41(int n, double *a, int nw, double *w);
857     void cftf161(double *a, double *w);
858     void cftf081(double *a, double *w);
859     void cftb040(double *a);
860     void cftx020(double *a);
861 #ifdef USE_CDFT_THREADS
862     void cftrec4_th(int n, double *a, int nw, double *w);
863 #endif /* USE_CDFT_THREADS */
864 
865     if (n > 8) {
866         if (n > 32) {
867             cftb1st(n, a, &w[nw - (n >> 2)]);
868 #ifdef USE_CDFT_THREADS
869             if (n > CDFT_THREADS_BEGIN_N) {
870                 cftrec4_th(n, a, nw, w);
871             } else
872 #endif /* USE_CDFT_THREADS */
873             if (n > 512) {
874                 cftrec4(n, a, nw, w);
875             } else if (n > 128) {
876                 cftleaf(n, 1, a, nw, w);
877             } else {
878                 cftfx41(n, a, nw, w);
879             }
880             bitrv2conj(n, ip, a);
881         } else if (n == 32) {
882             cftf161(a, &w[nw - 8]);
883             bitrv216neg(a);
884         } else {
885             cftf081(a, w);
886             bitrv208neg(a);
887         }
888     } else if (n == 8) {
889         cftb040(a);
890     } else if (n == 4) {
891         cftx020(a);
892     }
893 }
894 
895 
bitrv2(int n,int * ip,double * a)896 void bitrv2(int n, int *ip, double *a)
897 {
898     int j, j1, k, k1, l, m, nh, nm;
899     double xr, xi, yr, yi;
900 
901     m = 1;
902     for (l = n >> 2; l > 8; l >>= 2) {
903         m <<= 1;
904     }
905     nh = n >> 1;
906     nm = 4 * m;
907     if (l == 8) {
908         for (k = 0; k < m; k++) {
909             for (j = 0; j < k; j++) {
910                 j1 = 4 * j + 2 * ip[m + k];
911                 k1 = 4 * k + 2 * ip[m + j];
912                 xr = a[j1];
913                 xi = a[j1 + 1];
914                 yr = a[k1];
915                 yi = a[k1 + 1];
916                 a[j1] = yr;
917                 a[j1 + 1] = yi;
918                 a[k1] = xr;
919                 a[k1 + 1] = xi;
920                 j1 += nm;
921                 k1 += 2 * nm;
922                 xr = a[j1];
923                 xi = a[j1 + 1];
924                 yr = a[k1];
925                 yi = a[k1 + 1];
926                 a[j1] = yr;
927                 a[j1 + 1] = yi;
928                 a[k1] = xr;
929                 a[k1 + 1] = xi;
930                 j1 += nm;
931                 k1 -= nm;
932                 xr = a[j1];
933                 xi = a[j1 + 1];
934                 yr = a[k1];
935                 yi = a[k1 + 1];
936                 a[j1] = yr;
937                 a[j1 + 1] = yi;
938                 a[k1] = xr;
939                 a[k1 + 1] = xi;
940                 j1 += nm;
941                 k1 += 2 * nm;
942                 xr = a[j1];
943                 xi = a[j1 + 1];
944                 yr = a[k1];
945                 yi = a[k1 + 1];
946                 a[j1] = yr;
947                 a[j1 + 1] = yi;
948                 a[k1] = xr;
949                 a[k1 + 1] = xi;
950                 j1 += nh;
951                 k1 += 2;
952                 xr = a[j1];
953                 xi = a[j1 + 1];
954                 yr = a[k1];
955                 yi = a[k1 + 1];
956                 a[j1] = yr;
957                 a[j1 + 1] = yi;
958                 a[k1] = xr;
959                 a[k1 + 1] = xi;
960                 j1 -= nm;
961                 k1 -= 2 * nm;
962                 xr = a[j1];
963                 xi = a[j1 + 1];
964                 yr = a[k1];
965                 yi = a[k1 + 1];
966                 a[j1] = yr;
967                 a[j1 + 1] = yi;
968                 a[k1] = xr;
969                 a[k1 + 1] = xi;
970                 j1 -= nm;
971                 k1 += nm;
972                 xr = a[j1];
973                 xi = a[j1 + 1];
974                 yr = a[k1];
975                 yi = a[k1 + 1];
976                 a[j1] = yr;
977                 a[j1 + 1] = yi;
978                 a[k1] = xr;
979                 a[k1 + 1] = xi;
980                 j1 -= nm;
981                 k1 -= 2 * nm;
982                 xr = a[j1];
983                 xi = a[j1 + 1];
984                 yr = a[k1];
985                 yi = a[k1 + 1];
986                 a[j1] = yr;
987                 a[j1 + 1] = yi;
988                 a[k1] = xr;
989                 a[k1 + 1] = xi;
990                 j1 += 2;
991                 k1 += nh;
992                 xr = a[j1];
993                 xi = a[j1 + 1];
994                 yr = a[k1];
995                 yi = a[k1 + 1];
996                 a[j1] = yr;
997                 a[j1 + 1] = yi;
998                 a[k1] = xr;
999                 a[k1 + 1] = xi;
1000                 j1 += nm;
1001                 k1 += 2 * nm;
1002                 xr = a[j1];
1003                 xi = a[j1 + 1];
1004                 yr = a[k1];
1005                 yi = a[k1 + 1];
1006                 a[j1] = yr;
1007                 a[j1 + 1] = yi;
1008                 a[k1] = xr;
1009                 a[k1 + 1] = xi;
1010                 j1 += nm;
1011                 k1 -= nm;
1012                 xr = a[j1];
1013                 xi = a[j1 + 1];
1014                 yr = a[k1];
1015                 yi = a[k1 + 1];
1016                 a[j1] = yr;
1017                 a[j1 + 1] = yi;
1018                 a[k1] = xr;
1019                 a[k1 + 1] = xi;
1020                 j1 += nm;
1021                 k1 += 2 * nm;
1022                 xr = a[j1];
1023                 xi = a[j1 + 1];
1024                 yr = a[k1];
1025                 yi = a[k1 + 1];
1026                 a[j1] = yr;
1027                 a[j1 + 1] = yi;
1028                 a[k1] = xr;
1029                 a[k1 + 1] = xi;
1030                 j1 -= nh;
1031                 k1 -= 2;
1032                 xr = a[j1];
1033                 xi = a[j1 + 1];
1034                 yr = a[k1];
1035                 yi = a[k1 + 1];
1036                 a[j1] = yr;
1037                 a[j1 + 1] = yi;
1038                 a[k1] = xr;
1039                 a[k1 + 1] = xi;
1040                 j1 -= nm;
1041                 k1 -= 2 * nm;
1042                 xr = a[j1];
1043                 xi = a[j1 + 1];
1044                 yr = a[k1];
1045                 yi = a[k1 + 1];
1046                 a[j1] = yr;
1047                 a[j1 + 1] = yi;
1048                 a[k1] = xr;
1049                 a[k1 + 1] = xi;
1050                 j1 -= nm;
1051                 k1 += nm;
1052                 xr = a[j1];
1053                 xi = a[j1 + 1];
1054                 yr = a[k1];
1055                 yi = a[k1 + 1];
1056                 a[j1] = yr;
1057                 a[j1 + 1] = yi;
1058                 a[k1] = xr;
1059                 a[k1 + 1] = xi;
1060                 j1 -= nm;
1061                 k1 -= 2 * nm;
1062                 xr = a[j1];
1063                 xi = a[j1 + 1];
1064                 yr = a[k1];
1065                 yi = a[k1 + 1];
1066                 a[j1] = yr;
1067                 a[j1 + 1] = yi;
1068                 a[k1] = xr;
1069                 a[k1 + 1] = xi;
1070             }
1071             k1 = 4 * k + 2 * ip[m + k];
1072             j1 = k1 + 2;
1073             k1 += nh;
1074             xr = a[j1];
1075             xi = a[j1 + 1];
1076             yr = a[k1];
1077             yi = a[k1 + 1];
1078             a[j1] = yr;
1079             a[j1 + 1] = yi;
1080             a[k1] = xr;
1081             a[k1 + 1] = xi;
1082             j1 += nm;
1083             k1 += 2 * nm;
1084             xr = a[j1];
1085             xi = a[j1 + 1];
1086             yr = a[k1];
1087             yi = a[k1 + 1];
1088             a[j1] = yr;
1089             a[j1 + 1] = yi;
1090             a[k1] = xr;
1091             a[k1 + 1] = xi;
1092             j1 += nm;
1093             k1 -= nm;
1094             xr = a[j1];
1095             xi = a[j1 + 1];
1096             yr = a[k1];
1097             yi = a[k1 + 1];
1098             a[j1] = yr;
1099             a[j1 + 1] = yi;
1100             a[k1] = xr;
1101             a[k1 + 1] = xi;
1102             j1 -= 2;
1103             k1 -= nh;
1104             xr = a[j1];
1105             xi = a[j1 + 1];
1106             yr = a[k1];
1107             yi = a[k1 + 1];
1108             a[j1] = yr;
1109             a[j1 + 1] = yi;
1110             a[k1] = xr;
1111             a[k1 + 1] = xi;
1112             j1 += nh + 2;
1113             k1 += nh + 2;
1114             xr = a[j1];
1115             xi = a[j1 + 1];
1116             yr = a[k1];
1117             yi = a[k1 + 1];
1118             a[j1] = yr;
1119             a[j1 + 1] = yi;
1120             a[k1] = xr;
1121             a[k1 + 1] = xi;
1122             j1 -= nh - nm;
1123             k1 += 2 * nm - 2;
1124             xr = a[j1];
1125             xi = a[j1 + 1];
1126             yr = a[k1];
1127             yi = a[k1 + 1];
1128             a[j1] = yr;
1129             a[j1 + 1] = yi;
1130             a[k1] = xr;
1131             a[k1 + 1] = xi;
1132         }
1133     } else {
1134         for (k = 0; k < m; k++) {
1135             for (j = 0; j < k; j++) {
1136                 j1 = 4 * j + ip[m + k];
1137                 k1 = 4 * k + ip[m + j];
1138                 xr = a[j1];
1139                 xi = a[j1 + 1];
1140                 yr = a[k1];
1141                 yi = a[k1 + 1];
1142                 a[j1] = yr;
1143                 a[j1 + 1] = yi;
1144                 a[k1] = xr;
1145                 a[k1 + 1] = xi;
1146                 j1 += nm;
1147                 k1 += nm;
1148                 xr = a[j1];
1149                 xi = a[j1 + 1];
1150                 yr = a[k1];
1151                 yi = a[k1 + 1];
1152                 a[j1] = yr;
1153                 a[j1 + 1] = yi;
1154                 a[k1] = xr;
1155                 a[k1 + 1] = xi;
1156                 j1 += nh;
1157                 k1 += 2;
1158                 xr = a[j1];
1159                 xi = a[j1 + 1];
1160                 yr = a[k1];
1161                 yi = a[k1 + 1];
1162                 a[j1] = yr;
1163                 a[j1 + 1] = yi;
1164                 a[k1] = xr;
1165                 a[k1 + 1] = xi;
1166                 j1 -= nm;
1167                 k1 -= nm;
1168                 xr = a[j1];
1169                 xi = a[j1 + 1];
1170                 yr = a[k1];
1171                 yi = a[k1 + 1];
1172                 a[j1] = yr;
1173                 a[j1 + 1] = yi;
1174                 a[k1] = xr;
1175                 a[k1 + 1] = xi;
1176                 j1 += 2;
1177                 k1 += nh;
1178                 xr = a[j1];
1179                 xi = a[j1 + 1];
1180                 yr = a[k1];
1181                 yi = a[k1 + 1];
1182                 a[j1] = yr;
1183                 a[j1 + 1] = yi;
1184                 a[k1] = xr;
1185                 a[k1 + 1] = xi;
1186                 j1 += nm;
1187                 k1 += nm;
1188                 xr = a[j1];
1189                 xi = a[j1 + 1];
1190                 yr = a[k1];
1191                 yi = a[k1 + 1];
1192                 a[j1] = yr;
1193                 a[j1 + 1] = yi;
1194                 a[k1] = xr;
1195                 a[k1 + 1] = xi;
1196                 j1 -= nh;
1197                 k1 -= 2;
1198                 xr = a[j1];
1199                 xi = a[j1 + 1];
1200                 yr = a[k1];
1201                 yi = a[k1 + 1];
1202                 a[j1] = yr;
1203                 a[j1 + 1] = yi;
1204                 a[k1] = xr;
1205                 a[k1 + 1] = xi;
1206                 j1 -= nm;
1207                 k1 -= nm;
1208                 xr = a[j1];
1209                 xi = a[j1 + 1];
1210                 yr = a[k1];
1211                 yi = a[k1 + 1];
1212                 a[j1] = yr;
1213                 a[j1 + 1] = yi;
1214                 a[k1] = xr;
1215                 a[k1 + 1] = xi;
1216             }
1217             k1 = 4 * k + ip[m + k];
1218             j1 = k1 + 2;
1219             k1 += nh;
1220             xr = a[j1];
1221             xi = a[j1 + 1];
1222             yr = a[k1];
1223             yi = a[k1 + 1];
1224             a[j1] = yr;
1225             a[j1 + 1] = yi;
1226             a[k1] = xr;
1227             a[k1 + 1] = xi;
1228             j1 += nm;
1229             k1 += nm;
1230             xr = a[j1];
1231             xi = a[j1 + 1];
1232             yr = a[k1];
1233             yi = a[k1 + 1];
1234             a[j1] = yr;
1235             a[j1 + 1] = yi;
1236             a[k1] = xr;
1237             a[k1 + 1] = xi;
1238         }
1239     }
1240 }
1241 
1242 
bitrv2conj(int n,int * ip,double * a)1243 void bitrv2conj(int n, int *ip, double *a)
1244 {
1245     int j, j1, k, k1, l, m, nh, nm;
1246     double xr, xi, yr, yi;
1247 
1248     m = 1;
1249     for (l = n >> 2; l > 8; l >>= 2) {
1250         m <<= 1;
1251     }
1252     nh = n >> 1;
1253     nm = 4 * m;
1254     if (l == 8) {
1255         for (k = 0; k < m; k++) {
1256             for (j = 0; j < k; j++) {
1257                 j1 = 4 * j + 2 * ip[m + k];
1258                 k1 = 4 * k + 2 * ip[m + j];
1259                 xr = a[j1];
1260                 xi = -a[j1 + 1];
1261                 yr = a[k1];
1262                 yi = -a[k1 + 1];
1263                 a[j1] = yr;
1264                 a[j1 + 1] = yi;
1265                 a[k1] = xr;
1266                 a[k1 + 1] = xi;
1267                 j1 += nm;
1268                 k1 += 2 * nm;
1269                 xr = a[j1];
1270                 xi = -a[j1 + 1];
1271                 yr = a[k1];
1272                 yi = -a[k1 + 1];
1273                 a[j1] = yr;
1274                 a[j1 + 1] = yi;
1275                 a[k1] = xr;
1276                 a[k1 + 1] = xi;
1277                 j1 += nm;
1278                 k1 -= nm;
1279                 xr = a[j1];
1280                 xi = -a[j1 + 1];
1281                 yr = a[k1];
1282                 yi = -a[k1 + 1];
1283                 a[j1] = yr;
1284                 a[j1 + 1] = yi;
1285                 a[k1] = xr;
1286                 a[k1 + 1] = xi;
1287                 j1 += nm;
1288                 k1 += 2 * nm;
1289                 xr = a[j1];
1290                 xi = -a[j1 + 1];
1291                 yr = a[k1];
1292                 yi = -a[k1 + 1];
1293                 a[j1] = yr;
1294                 a[j1 + 1] = yi;
1295                 a[k1] = xr;
1296                 a[k1 + 1] = xi;
1297                 j1 += nh;
1298                 k1 += 2;
1299                 xr = a[j1];
1300                 xi = -a[j1 + 1];
1301                 yr = a[k1];
1302                 yi = -a[k1 + 1];
1303                 a[j1] = yr;
1304                 a[j1 + 1] = yi;
1305                 a[k1] = xr;
1306                 a[k1 + 1] = xi;
1307                 j1 -= nm;
1308                 k1 -= 2 * nm;
1309                 xr = a[j1];
1310                 xi = -a[j1 + 1];
1311                 yr = a[k1];
1312                 yi = -a[k1 + 1];
1313                 a[j1] = yr;
1314                 a[j1 + 1] = yi;
1315                 a[k1] = xr;
1316                 a[k1 + 1] = xi;
1317                 j1 -= nm;
1318                 k1 += nm;
1319                 xr = a[j1];
1320                 xi = -a[j1 + 1];
1321                 yr = a[k1];
1322                 yi = -a[k1 + 1];
1323                 a[j1] = yr;
1324                 a[j1 + 1] = yi;
1325                 a[k1] = xr;
1326                 a[k1 + 1] = xi;
1327                 j1 -= nm;
1328                 k1 -= 2 * nm;
1329                 xr = a[j1];
1330                 xi = -a[j1 + 1];
1331                 yr = a[k1];
1332                 yi = -a[k1 + 1];
1333                 a[j1] = yr;
1334                 a[j1 + 1] = yi;
1335                 a[k1] = xr;
1336                 a[k1 + 1] = xi;
1337                 j1 += 2;
1338                 k1 += nh;
1339                 xr = a[j1];
1340                 xi = -a[j1 + 1];
1341                 yr = a[k1];
1342                 yi = -a[k1 + 1];
1343                 a[j1] = yr;
1344                 a[j1 + 1] = yi;
1345                 a[k1] = xr;
1346                 a[k1 + 1] = xi;
1347                 j1 += nm;
1348                 k1 += 2 * nm;
1349                 xr = a[j1];
1350                 xi = -a[j1 + 1];
1351                 yr = a[k1];
1352                 yi = -a[k1 + 1];
1353                 a[j1] = yr;
1354                 a[j1 + 1] = yi;
1355                 a[k1] = xr;
1356                 a[k1 + 1] = xi;
1357                 j1 += nm;
1358                 k1 -= nm;
1359                 xr = a[j1];
1360                 xi = -a[j1 + 1];
1361                 yr = a[k1];
1362                 yi = -a[k1 + 1];
1363                 a[j1] = yr;
1364                 a[j1 + 1] = yi;
1365                 a[k1] = xr;
1366                 a[k1 + 1] = xi;
1367                 j1 += nm;
1368                 k1 += 2 * nm;
1369                 xr = a[j1];
1370                 xi = -a[j1 + 1];
1371                 yr = a[k1];
1372                 yi = -a[k1 + 1];
1373                 a[j1] = yr;
1374                 a[j1 + 1] = yi;
1375                 a[k1] = xr;
1376                 a[k1 + 1] = xi;
1377                 j1 -= nh;
1378                 k1 -= 2;
1379                 xr = a[j1];
1380                 xi = -a[j1 + 1];
1381                 yr = a[k1];
1382                 yi = -a[k1 + 1];
1383                 a[j1] = yr;
1384                 a[j1 + 1] = yi;
1385                 a[k1] = xr;
1386                 a[k1 + 1] = xi;
1387                 j1 -= nm;
1388                 k1 -= 2 * nm;
1389                 xr = a[j1];
1390                 xi = -a[j1 + 1];
1391                 yr = a[k1];
1392                 yi = -a[k1 + 1];
1393                 a[j1] = yr;
1394                 a[j1 + 1] = yi;
1395                 a[k1] = xr;
1396                 a[k1 + 1] = xi;
1397                 j1 -= nm;
1398                 k1 += nm;
1399                 xr = a[j1];
1400                 xi = -a[j1 + 1];
1401                 yr = a[k1];
1402                 yi = -a[k1 + 1];
1403                 a[j1] = yr;
1404                 a[j1 + 1] = yi;
1405                 a[k1] = xr;
1406                 a[k1 + 1] = xi;
1407                 j1 -= nm;
1408                 k1 -= 2 * nm;
1409                 xr = a[j1];
1410                 xi = -a[j1 + 1];
1411                 yr = a[k1];
1412                 yi = -a[k1 + 1];
1413                 a[j1] = yr;
1414                 a[j1 + 1] = yi;
1415                 a[k1] = xr;
1416                 a[k1 + 1] = xi;
1417             }
1418             k1 = 4 * k + 2 * ip[m + k];
1419             j1 = k1 + 2;
1420             k1 += nh;
1421             a[j1 - 1] = -a[j1 - 1];
1422             xr = a[j1];
1423             xi = -a[j1 + 1];
1424             yr = a[k1];
1425             yi = -a[k1 + 1];
1426             a[j1] = yr;
1427             a[j1 + 1] = yi;
1428             a[k1] = xr;
1429             a[k1 + 1] = xi;
1430             a[k1 + 3] = -a[k1 + 3];
1431             j1 += nm;
1432             k1 += 2 * nm;
1433             xr = a[j1];
1434             xi = -a[j1 + 1];
1435             yr = a[k1];
1436             yi = -a[k1 + 1];
1437             a[j1] = yr;
1438             a[j1 + 1] = yi;
1439             a[k1] = xr;
1440             a[k1 + 1] = xi;
1441             j1 += nm;
1442             k1 -= nm;
1443             xr = a[j1];
1444             xi = -a[j1 + 1];
1445             yr = a[k1];
1446             yi = -a[k1 + 1];
1447             a[j1] = yr;
1448             a[j1 + 1] = yi;
1449             a[k1] = xr;
1450             a[k1 + 1] = xi;
1451             j1 -= 2;
1452             k1 -= nh;
1453             xr = a[j1];
1454             xi = -a[j1 + 1];
1455             yr = a[k1];
1456             yi = -a[k1 + 1];
1457             a[j1] = yr;
1458             a[j1 + 1] = yi;
1459             a[k1] = xr;
1460             a[k1 + 1] = xi;
1461             j1 += nh + 2;
1462             k1 += nh + 2;
1463             xr = a[j1];
1464             xi = -a[j1 + 1];
1465             yr = a[k1];
1466             yi = -a[k1 + 1];
1467             a[j1] = yr;
1468             a[j1 + 1] = yi;
1469             a[k1] = xr;
1470             a[k1 + 1] = xi;
1471             j1 -= nh - nm;
1472             k1 += 2 * nm - 2;
1473             a[j1 - 1] = -a[j1 - 1];
1474             xr = a[j1];
1475             xi = -a[j1 + 1];
1476             yr = a[k1];
1477             yi = -a[k1 + 1];
1478             a[j1] = yr;
1479             a[j1 + 1] = yi;
1480             a[k1] = xr;
1481             a[k1 + 1] = xi;
1482             a[k1 + 3] = -a[k1 + 3];
1483         }
1484     } else {
1485         for (k = 0; k < m; k++) {
1486             for (j = 0; j < k; j++) {
1487                 j1 = 4 * j + ip[m + k];
1488                 k1 = 4 * k + ip[m + j];
1489                 xr = a[j1];
1490                 xi = -a[j1 + 1];
1491                 yr = a[k1];
1492                 yi = -a[k1 + 1];
1493                 a[j1] = yr;
1494                 a[j1 + 1] = yi;
1495                 a[k1] = xr;
1496                 a[k1 + 1] = xi;
1497                 j1 += nm;
1498                 k1 += nm;
1499                 xr = a[j1];
1500                 xi = -a[j1 + 1];
1501                 yr = a[k1];
1502                 yi = -a[k1 + 1];
1503                 a[j1] = yr;
1504                 a[j1 + 1] = yi;
1505                 a[k1] = xr;
1506                 a[k1 + 1] = xi;
1507                 j1 += nh;
1508                 k1 += 2;
1509                 xr = a[j1];
1510                 xi = -a[j1 + 1];
1511                 yr = a[k1];
1512                 yi = -a[k1 + 1];
1513                 a[j1] = yr;
1514                 a[j1 + 1] = yi;
1515                 a[k1] = xr;
1516                 a[k1 + 1] = xi;
1517                 j1 -= nm;
1518                 k1 -= nm;
1519                 xr = a[j1];
1520                 xi = -a[j1 + 1];
1521                 yr = a[k1];
1522                 yi = -a[k1 + 1];
1523                 a[j1] = yr;
1524                 a[j1 + 1] = yi;
1525                 a[k1] = xr;
1526                 a[k1 + 1] = xi;
1527                 j1 += 2;
1528                 k1 += nh;
1529                 xr = a[j1];
1530                 xi = -a[j1 + 1];
1531                 yr = a[k1];
1532                 yi = -a[k1 + 1];
1533                 a[j1] = yr;
1534                 a[j1 + 1] = yi;
1535                 a[k1] = xr;
1536                 a[k1 + 1] = xi;
1537                 j1 += nm;
1538                 k1 += nm;
1539                 xr = a[j1];
1540                 xi = -a[j1 + 1];
1541                 yr = a[k1];
1542                 yi = -a[k1 + 1];
1543                 a[j1] = yr;
1544                 a[j1 + 1] = yi;
1545                 a[k1] = xr;
1546                 a[k1 + 1] = xi;
1547                 j1 -= nh;
1548                 k1 -= 2;
1549                 xr = a[j1];
1550                 xi = -a[j1 + 1];
1551                 yr = a[k1];
1552                 yi = -a[k1 + 1];
1553                 a[j1] = yr;
1554                 a[j1 + 1] = yi;
1555                 a[k1] = xr;
1556                 a[k1 + 1] = xi;
1557                 j1 -= nm;
1558                 k1 -= nm;
1559                 xr = a[j1];
1560                 xi = -a[j1 + 1];
1561                 yr = a[k1];
1562                 yi = -a[k1 + 1];
1563                 a[j1] = yr;
1564                 a[j1 + 1] = yi;
1565                 a[k1] = xr;
1566                 a[k1 + 1] = xi;
1567             }
1568             k1 = 4 * k + ip[m + k];
1569             j1 = k1 + 2;
1570             k1 += nh;
1571             a[j1 - 1] = -a[j1 - 1];
1572             xr = a[j1];
1573             xi = -a[j1 + 1];
1574             yr = a[k1];
1575             yi = -a[k1 + 1];
1576             a[j1] = yr;
1577             a[j1 + 1] = yi;
1578             a[k1] = xr;
1579             a[k1 + 1] = xi;
1580             a[k1 + 3] = -a[k1 + 3];
1581             j1 += nm;
1582             k1 += nm;
1583             a[j1 - 1] = -a[j1 - 1];
1584             xr = a[j1];
1585             xi = -a[j1 + 1];
1586             yr = a[k1];
1587             yi = -a[k1 + 1];
1588             a[j1] = yr;
1589             a[j1 + 1] = yi;
1590             a[k1] = xr;
1591             a[k1 + 1] = xi;
1592             a[k1 + 3] = -a[k1 + 3];
1593         }
1594     }
1595 }
1596 
1597 
bitrv216(double * a)1598 void bitrv216(double *a)
1599 {
1600     double x1r, x1i, x2r, x2i, x3r, x3i, x4r, x4i,
1601         x5r, x5i, x7r, x7i, x8r, x8i, x10r, x10i,
1602         x11r, x11i, x12r, x12i, x13r, x13i, x14r, x14i;
1603 
1604     x1r = a[2];
1605     x1i = a[3];
1606     x2r = a[4];
1607     x2i = a[5];
1608     x3r = a[6];
1609     x3i = a[7];
1610     x4r = a[8];
1611     x4i = a[9];
1612     x5r = a[10];
1613     x5i = a[11];
1614     x7r = a[14];
1615     x7i = a[15];
1616     x8r = a[16];
1617     x8i = a[17];
1618     x10r = a[20];
1619     x10i = a[21];
1620     x11r = a[22];
1621     x11i = a[23];
1622     x12r = a[24];
1623     x12i = a[25];
1624     x13r = a[26];
1625     x13i = a[27];
1626     x14r = a[28];
1627     x14i = a[29];
1628     a[2] = x8r;
1629     a[3] = x8i;
1630     a[4] = x4r;
1631     a[5] = x4i;
1632     a[6] = x12r;
1633     a[7] = x12i;
1634     a[8] = x2r;
1635     a[9] = x2i;
1636     a[10] = x10r;
1637     a[11] = x10i;
1638     a[14] = x14r;
1639     a[15] = x14i;
1640     a[16] = x1r;
1641     a[17] = x1i;
1642     a[20] = x5r;
1643     a[21] = x5i;
1644     a[22] = x13r;
1645     a[23] = x13i;
1646     a[24] = x3r;
1647     a[25] = x3i;
1648     a[26] = x11r;
1649     a[27] = x11i;
1650     a[28] = x7r;
1651     a[29] = x7i;
1652 }
1653 
1654 
bitrv216neg(double * a)1655 void bitrv216neg(double *a)
1656 {
1657     double x1r, x1i, x2r, x2i, x3r, x3i, x4r, x4i,
1658         x5r, x5i, x6r, x6i, x7r, x7i, x8r, x8i,
1659         x9r, x9i, x10r, x10i, x11r, x11i, x12r, x12i,
1660         x13r, x13i, x14r, x14i, x15r, x15i;
1661 
1662     x1r = a[2];
1663     x1i = a[3];
1664     x2r = a[4];
1665     x2i = a[5];
1666     x3r = a[6];
1667     x3i = a[7];
1668     x4r = a[8];
1669     x4i = a[9];
1670     x5r = a[10];
1671     x5i = a[11];
1672     x6r = a[12];
1673     x6i = a[13];
1674     x7r = a[14];
1675     x7i = a[15];
1676     x8r = a[16];
1677     x8i = a[17];
1678     x9r = a[18];
1679     x9i = a[19];
1680     x10r = a[20];
1681     x10i = a[21];
1682     x11r = a[22];
1683     x11i = a[23];
1684     x12r = a[24];
1685     x12i = a[25];
1686     x13r = a[26];
1687     x13i = a[27];
1688     x14r = a[28];
1689     x14i = a[29];
1690     x15r = a[30];
1691     x15i = a[31];
1692     a[2] = x15r;
1693     a[3] = x15i;
1694     a[4] = x7r;
1695     a[5] = x7i;
1696     a[6] = x11r;
1697     a[7] = x11i;
1698     a[8] = x3r;
1699     a[9] = x3i;
1700     a[10] = x13r;
1701     a[11] = x13i;
1702     a[12] = x5r;
1703     a[13] = x5i;
1704     a[14] = x9r;
1705     a[15] = x9i;
1706     a[16] = x1r;
1707     a[17] = x1i;
1708     a[18] = x14r;
1709     a[19] = x14i;
1710     a[20] = x6r;
1711     a[21] = x6i;
1712     a[22] = x10r;
1713     a[23] = x10i;
1714     a[24] = x2r;
1715     a[25] = x2i;
1716     a[26] = x12r;
1717     a[27] = x12i;
1718     a[28] = x4r;
1719     a[29] = x4i;
1720     a[30] = x8r;
1721     a[31] = x8i;
1722 }
1723 
1724 
bitrv208(double * a)1725 void bitrv208(double *a)
1726 {
1727     double x1r, x1i, x3r, x3i, x4r, x4i, x6r, x6i;
1728 
1729     x1r = a[2];
1730     x1i = a[3];
1731     x3r = a[6];
1732     x3i = a[7];
1733     x4r = a[8];
1734     x4i = a[9];
1735     x6r = a[12];
1736     x6i = a[13];
1737     a[2] = x4r;
1738     a[3] = x4i;
1739     a[6] = x6r;
1740     a[7] = x6i;
1741     a[8] = x1r;
1742     a[9] = x1i;
1743     a[12] = x3r;
1744     a[13] = x3i;
1745 }
1746 
1747 
bitrv208neg(double * a)1748 void bitrv208neg(double *a)
1749 {
1750     double x1r, x1i, x2r, x2i, x3r, x3i, x4r, x4i,
1751         x5r, x5i, x6r, x6i, x7r, x7i;
1752 
1753     x1r = a[2];
1754     x1i = a[3];
1755     x2r = a[4];
1756     x2i = a[5];
1757     x3r = a[6];
1758     x3i = a[7];
1759     x4r = a[8];
1760     x4i = a[9];
1761     x5r = a[10];
1762     x5i = a[11];
1763     x6r = a[12];
1764     x6i = a[13];
1765     x7r = a[14];
1766     x7i = a[15];
1767     a[2] = x7r;
1768     a[3] = x7i;
1769     a[4] = x3r;
1770     a[5] = x3i;
1771     a[6] = x5r;
1772     a[7] = x5i;
1773     a[8] = x1r;
1774     a[9] = x1i;
1775     a[10] = x6r;
1776     a[11] = x6i;
1777     a[12] = x2r;
1778     a[13] = x2i;
1779     a[14] = x4r;
1780     a[15] = x4i;
1781 }
1782 
1783 
cftf1st(int n,double * a,double * w)1784 void cftf1st(int n, double *a, double *w)
1785 {
1786     int j, j0, j1, j2, j3, k, m, mh;
1787     double wn4r, csc1, csc3, wk1r, wk1i, wk3r, wk3i,
1788         wd1r, wd1i, wd3r, wd3i;
1789     double x0r, x0i, x1r, x1i, x2r, x2i, x3r, x3i,
1790         y0r, y0i, y1r, y1i, y2r, y2i, y3r, y3i;
1791 
1792     mh = n >> 3;
1793     m = 2 * mh;
1794     j1 = m;
1795     j2 = j1 + m;
1796     j3 = j2 + m;
1797     x0r = a[0] + a[j2];
1798     x0i = a[1] + a[j2 + 1];
1799     x1r = a[0] - a[j2];
1800     x1i = a[1] - a[j2 + 1];
1801     x2r = a[j1] + a[j3];
1802     x2i = a[j1 + 1] + a[j3 + 1];
1803     x3r = a[j1] - a[j3];
1804     x3i = a[j1 + 1] - a[j3 + 1];
1805     a[0] = x0r + x2r;
1806     a[1] = x0i + x2i;
1807     a[j1] = x0r - x2r;
1808     a[j1 + 1] = x0i - x2i;
1809     a[j2] = x1r - x3i;
1810     a[j2 + 1] = x1i + x3r;
1811     a[j3] = x1r + x3i;
1812     a[j3 + 1] = x1i - x3r;
1813     wn4r = w[1];
1814     csc1 = w[2];
1815     csc3 = w[3];
1816     wd1r = 1;
1817     wd1i = 0;
1818     wd3r = 1;
1819     wd3i = 0;
1820     k = 0;
1821     for (j = 2; j < mh - 2; j += 4) {
1822         k += 4;
1823         wk1r = csc1 * (wd1r + w[k]);
1824         wk1i = csc1 * (wd1i + w[k + 1]);
1825         wk3r = csc3 * (wd3r + w[k + 2]);
1826         wk3i = csc3 * (wd3i + w[k + 3]);
1827         wd1r = w[k];
1828         wd1i = w[k + 1];
1829         wd3r = w[k + 2];
1830         wd3i = w[k + 3];
1831         j1 = j + m;
1832         j2 = j1 + m;
1833         j3 = j2 + m;
1834         x0r = a[j] + a[j2];
1835         x0i = a[j + 1] + a[j2 + 1];
1836         x1r = a[j] - a[j2];
1837         x1i = a[j + 1] - a[j2 + 1];
1838         y0r = a[j + 2] + a[j2 + 2];
1839         y0i = a[j + 3] + a[j2 + 3];
1840         y1r = a[j + 2] - a[j2 + 2];
1841         y1i = a[j + 3] - a[j2 + 3];
1842         x2r = a[j1] + a[j3];
1843         x2i = a[j1 + 1] + a[j3 + 1];
1844         x3r = a[j1] - a[j3];
1845         x3i = a[j1 + 1] - a[j3 + 1];
1846         y2r = a[j1 + 2] + a[j3 + 2];
1847         y2i = a[j1 + 3] + a[j3 + 3];
1848         y3r = a[j1 + 2] - a[j3 + 2];
1849         y3i = a[j1 + 3] - a[j3 + 3];
1850         a[j] = x0r + x2r;
1851         a[j + 1] = x0i + x2i;
1852         a[j + 2] = y0r + y2r;
1853         a[j + 3] = y0i + y2i;
1854         a[j1] = x0r - x2r;
1855         a[j1 + 1] = x0i - x2i;
1856         a[j1 + 2] = y0r - y2r;
1857         a[j1 + 3] = y0i - y2i;
1858         x0r = x1r - x3i;
1859         x0i = x1i + x3r;
1860         a[j2] = wk1r * x0r - wk1i * x0i;
1861         a[j2 + 1] = wk1r * x0i + wk1i * x0r;
1862         x0r = y1r - y3i;
1863         x0i = y1i + y3r;
1864         a[j2 + 2] = wd1r * x0r - wd1i * x0i;
1865         a[j2 + 3] = wd1r * x0i + wd1i * x0r;
1866         x0r = x1r + x3i;
1867         x0i = x1i - x3r;
1868         a[j3] = wk3r * x0r + wk3i * x0i;
1869         a[j3 + 1] = wk3r * x0i - wk3i * x0r;
1870         x0r = y1r + y3i;
1871         x0i = y1i - y3r;
1872         a[j3 + 2] = wd3r * x0r + wd3i * x0i;
1873         a[j3 + 3] = wd3r * x0i - wd3i * x0r;
1874         j0 = m - j;
1875         j1 = j0 + m;
1876         j2 = j1 + m;
1877         j3 = j2 + m;
1878         x0r = a[j0] + a[j2];
1879         x0i = a[j0 + 1] + a[j2 + 1];
1880         x1r = a[j0] - a[j2];
1881         x1i = a[j0 + 1] - a[j2 + 1];
1882         y0r = a[j0 - 2] + a[j2 - 2];
1883         y0i = a[j0 - 1] + a[j2 - 1];
1884         y1r = a[j0 - 2] - a[j2 - 2];
1885         y1i = a[j0 - 1] - a[j2 - 1];
1886         x2r = a[j1] + a[j3];
1887         x2i = a[j1 + 1] + a[j3 + 1];
1888         x3r = a[j1] - a[j3];
1889         x3i = a[j1 + 1] - a[j3 + 1];
1890         y2r = a[j1 - 2] + a[j3 - 2];
1891         y2i = a[j1 - 1] + a[j3 - 1];
1892         y3r = a[j1 - 2] - a[j3 - 2];
1893         y3i = a[j1 - 1] - a[j3 - 1];
1894         a[j0] = x0r + x2r;
1895         a[j0 + 1] = x0i + x2i;
1896         a[j0 - 2] = y0r + y2r;
1897         a[j0 - 1] = y0i + y2i;
1898         a[j1] = x0r - x2r;
1899         a[j1 + 1] = x0i - x2i;
1900         a[j1 - 2] = y0r - y2r;
1901         a[j1 - 1] = y0i - y2i;
1902         x0r = x1r - x3i;
1903         x0i = x1i + x3r;
1904         a[j2] = wk1i * x0r - wk1r * x0i;
1905         a[j2 + 1] = wk1i * x0i + wk1r * x0r;
1906         x0r = y1r - y3i;
1907         x0i = y1i + y3r;
1908         a[j2 - 2] = wd1i * x0r - wd1r * x0i;
1909         a[j2 - 1] = wd1i * x0i + wd1r * x0r;
1910         x0r = x1r + x3i;
1911         x0i = x1i - x3r;
1912         a[j3] = wk3i * x0r + wk3r * x0i;
1913         a[j3 + 1] = wk3i * x0i - wk3r * x0r;
1914         x0r = y1r + y3i;
1915         x0i = y1i - y3r;
1916         a[j3 - 2] = wd3i * x0r + wd3r * x0i;
1917         a[j3 - 1] = wd3i * x0i - wd3r * x0r;
1918     }
1919     wk1r = csc1 * (wd1r + wn4r);
1920     wk1i = csc1 * (wd1i + wn4r);
1921     wk3r = csc3 * (wd3r - wn4r);
1922     wk3i = csc3 * (wd3i - wn4r);
1923     j0 = mh;
1924     j1 = j0 + m;
1925     j2 = j1 + m;
1926     j3 = j2 + m;
1927     x0r = a[j0 - 2] + a[j2 - 2];
1928     x0i = a[j0 - 1] + a[j2 - 1];
1929     x1r = a[j0 - 2] - a[j2 - 2];
1930     x1i = a[j0 - 1] - a[j2 - 1];
1931     x2r = a[j1 - 2] + a[j3 - 2];
1932     x2i = a[j1 - 1] + a[j3 - 1];
1933     x3r = a[j1 - 2] - a[j3 - 2];
1934     x3i = a[j1 - 1] - a[j3 - 1];
1935     a[j0 - 2] = x0r + x2r;
1936     a[j0 - 1] = x0i + x2i;
1937     a[j1 - 2] = x0r - x2r;
1938     a[j1 - 1] = x0i - x2i;
1939     x0r = x1r - x3i;
1940     x0i = x1i + x3r;
1941     a[j2 - 2] = wk1r * x0r - wk1i * x0i;
1942     a[j2 - 1] = wk1r * x0i + wk1i * x0r;
1943     x0r = x1r + x3i;
1944     x0i = x1i - x3r;
1945     a[j3 - 2] = wk3r * x0r + wk3i * x0i;
1946     a[j3 - 1] = wk3r * x0i - wk3i * x0r;
1947     x0r = a[j0] + a[j2];
1948     x0i = a[j0 + 1] + a[j2 + 1];
1949     x1r = a[j0] - a[j2];
1950     x1i = a[j0 + 1] - a[j2 + 1];
1951     x2r = a[j1] + a[j3];
1952     x2i = a[j1 + 1] + a[j3 + 1];
1953     x3r = a[j1] - a[j3];
1954     x3i = a[j1 + 1] - a[j3 + 1];
1955     a[j0] = x0r + x2r;
1956     a[j0 + 1] = x0i + x2i;
1957     a[j1] = x0r - x2r;
1958     a[j1 + 1] = x0i - x2i;
1959     x0r = x1r - x3i;
1960     x0i = x1i + x3r;
1961     a[j2] = wn4r * (x0r - x0i);
1962     a[j2 + 1] = wn4r * (x0i + x0r);
1963     x0r = x1r + x3i;
1964     x0i = x1i - x3r;
1965     a[j3] = -wn4r * (x0r + x0i);
1966     a[j3 + 1] = -wn4r * (x0i - x0r);
1967     x0r = a[j0 + 2] + a[j2 + 2];
1968     x0i = a[j0 + 3] + a[j2 + 3];
1969     x1r = a[j0 + 2] - a[j2 + 2];
1970     x1i = a[j0 + 3] - a[j2 + 3];
1971     x2r = a[j1 + 2] + a[j3 + 2];
1972     x2i = a[j1 + 3] + a[j3 + 3];
1973     x3r = a[j1 + 2] - a[j3 + 2];
1974     x3i = a[j1 + 3] - a[j3 + 3];
1975     a[j0 + 2] = x0r + x2r;
1976     a[j0 + 3] = x0i + x2i;
1977     a[j1 + 2] = x0r - x2r;
1978     a[j1 + 3] = x0i - x2i;
1979     x0r = x1r - x3i;
1980     x0i = x1i + x3r;
1981     a[j2 + 2] = wk1i * x0r - wk1r * x0i;
1982     a[j2 + 3] = wk1i * x0i + wk1r * x0r;
1983     x0r = x1r + x3i;
1984     x0i = x1i - x3r;
1985     a[j3 + 2] = wk3i * x0r + wk3r * x0i;
1986     a[j3 + 3] = wk3i * x0i - wk3r * x0r;
1987 }
1988 
1989 
cftb1st(int n,double * a,double * w)1990 void cftb1st(int n, double *a, double *w)
1991 {
1992     int j, j0, j1, j2, j3, k, m, mh;
1993     double wn4r, csc1, csc3, wk1r, wk1i, wk3r, wk3i,
1994         wd1r, wd1i, wd3r, wd3i;
1995     double x0r, x0i, x1r, x1i, x2r, x2i, x3r, x3i,
1996         y0r, y0i, y1r, y1i, y2r, y2i, y3r, y3i;
1997 
1998     mh = n >> 3;
1999     m = 2 * mh;
2000     j1 = m;
2001     j2 = j1 + m;
2002     j3 = j2 + m;
2003     x0r = a[0] + a[j2];
2004     x0i = -a[1] - a[j2 + 1];
2005     x1r = a[0] - a[j2];
2006     x1i = -a[1] + a[j2 + 1];
2007     x2r = a[j1] + a[j3];
2008     x2i = a[j1 + 1] + a[j3 + 1];
2009     x3r = a[j1] - a[j3];
2010     x3i = a[j1 + 1] - a[j3 + 1];
2011     a[0] = x0r + x2r;
2012     a[1] = x0i - x2i;
2013     a[j1] = x0r - x2r;
2014     a[j1 + 1] = x0i + x2i;
2015     a[j2] = x1r + x3i;
2016     a[j2 + 1] = x1i + x3r;
2017     a[j3] = x1r - x3i;
2018     a[j3 + 1] = x1i - x3r;
2019     wn4r = w[1];
2020     csc1 = w[2];
2021     csc3 = w[3];
2022     wd1r = 1;
2023     wd1i = 0;
2024     wd3r = 1;
2025     wd3i = 0;
2026     k = 0;
2027     for (j = 2; j < mh - 2; j += 4) {
2028         k += 4;
2029         wk1r = csc1 * (wd1r + w[k]);
2030         wk1i = csc1 * (wd1i + w[k + 1]);
2031         wk3r = csc3 * (wd3r + w[k + 2]);
2032         wk3i = csc3 * (wd3i + w[k + 3]);
2033         wd1r = w[k];
2034         wd1i = w[k + 1];
2035         wd3r = w[k + 2];
2036         wd3i = w[k + 3];
2037         j1 = j + m;
2038         j2 = j1 + m;
2039         j3 = j2 + m;
2040         x0r = a[j] + a[j2];
2041         x0i = -a[j + 1] - a[j2 + 1];
2042         x1r = a[j] - a[j2];
2043         x1i = -a[j + 1] + a[j2 + 1];
2044         y0r = a[j + 2] + a[j2 + 2];
2045         y0i = -a[j + 3] - a[j2 + 3];
2046         y1r = a[j + 2] - a[j2 + 2];
2047         y1i = -a[j + 3] + a[j2 + 3];
2048         x2r = a[j1] + a[j3];
2049         x2i = a[j1 + 1] + a[j3 + 1];
2050         x3r = a[j1] - a[j3];
2051         x3i = a[j1 + 1] - a[j3 + 1];
2052         y2r = a[j1 + 2] + a[j3 + 2];
2053         y2i = a[j1 + 3] + a[j3 + 3];
2054         y3r = a[j1 + 2] - a[j3 + 2];
2055         y3i = a[j1 + 3] - a[j3 + 3];
2056         a[j] = x0r + x2r;
2057         a[j + 1] = x0i - x2i;
2058         a[j + 2] = y0r + y2r;
2059         a[j + 3] = y0i - y2i;
2060         a[j1] = x0r - x2r;
2061         a[j1 + 1] = x0i + x2i;
2062         a[j1 + 2] = y0r - y2r;
2063         a[j1 + 3] = y0i + y2i;
2064         x0r = x1r + x3i;
2065         x0i = x1i + x3r;
2066         a[j2] = wk1r * x0r - wk1i * x0i;
2067         a[j2 + 1] = wk1r * x0i + wk1i * x0r;
2068         x0r = y1r + y3i;
2069         x0i = y1i + y3r;
2070         a[j2 + 2] = wd1r * x0r - wd1i * x0i;
2071         a[j2 + 3] = wd1r * x0i + wd1i * x0r;
2072         x0r = x1r - x3i;
2073         x0i = x1i - x3r;
2074         a[j3] = wk3r * x0r + wk3i * x0i;
2075         a[j3 + 1] = wk3r * x0i - wk3i * x0r;
2076         x0r = y1r - y3i;
2077         x0i = y1i - y3r;
2078         a[j3 + 2] = wd3r * x0r + wd3i * x0i;
2079         a[j3 + 3] = wd3r * x0i - wd3i * x0r;
2080         j0 = m - j;
2081         j1 = j0 + m;
2082         j2 = j1 + m;
2083         j3 = j2 + m;
2084         x0r = a[j0] + a[j2];
2085         x0i = -a[j0 + 1] - a[j2 + 1];
2086         x1r = a[j0] - a[j2];
2087         x1i = -a[j0 + 1] + a[j2 + 1];
2088         y0r = a[j0 - 2] + a[j2 - 2];
2089         y0i = -a[j0 - 1] - a[j2 - 1];
2090         y1r = a[j0 - 2] - a[j2 - 2];
2091         y1i = -a[j0 - 1] + a[j2 - 1];
2092         x2r = a[j1] + a[j3];
2093         x2i = a[j1 + 1] + a[j3 + 1];
2094         x3r = a[j1] - a[j3];
2095         x3i = a[j1 + 1] - a[j3 + 1];
2096         y2r = a[j1 - 2] + a[j3 - 2];
2097         y2i = a[j1 - 1] + a[j3 - 1];
2098         y3r = a[j1 - 2] - a[j3 - 2];
2099         y3i = a[j1 - 1] - a[j3 - 1];
2100         a[j0] = x0r + x2r;
2101         a[j0 + 1] = x0i - x2i;
2102         a[j0 - 2] = y0r + y2r;
2103         a[j0 - 1] = y0i - y2i;
2104         a[j1] = x0r - x2r;
2105         a[j1 + 1] = x0i + x2i;
2106         a[j1 - 2] = y0r - y2r;
2107         a[j1 - 1] = y0i + y2i;
2108         x0r = x1r + x3i;
2109         x0i = x1i + x3r;
2110         a[j2] = wk1i * x0r - wk1r * x0i;
2111         a[j2 + 1] = wk1i * x0i + wk1r * x0r;
2112         x0r = y1r + y3i;
2113         x0i = y1i + y3r;
2114         a[j2 - 2] = wd1i * x0r - wd1r * x0i;
2115         a[j2 - 1] = wd1i * x0i + wd1r * x0r;
2116         x0r = x1r - x3i;
2117         x0i = x1i - x3r;
2118         a[j3] = wk3i * x0r + wk3r * x0i;
2119         a[j3 + 1] = wk3i * x0i - wk3r * x0r;
2120         x0r = y1r - y3i;
2121         x0i = y1i - y3r;
2122         a[j3 - 2] = wd3i * x0r + wd3r * x0i;
2123         a[j3 - 1] = wd3i * x0i - wd3r * x0r;
2124     }
2125     wk1r = csc1 * (wd1r + wn4r);
2126     wk1i = csc1 * (wd1i + wn4r);
2127     wk3r = csc3 * (wd3r - wn4r);
2128     wk3i = csc3 * (wd3i - wn4r);
2129     j0 = mh;
2130     j1 = j0 + m;
2131     j2 = j1 + m;
2132     j3 = j2 + m;
2133     x0r = a[j0 - 2] + a[j2 - 2];
2134     x0i = -a[j0 - 1] - a[j2 - 1];
2135     x1r = a[j0 - 2] - a[j2 - 2];
2136     x1i = -a[j0 - 1] + a[j2 - 1];
2137     x2r = a[j1 - 2] + a[j3 - 2];
2138     x2i = a[j1 - 1] + a[j3 - 1];
2139     x3r = a[j1 - 2] - a[j3 - 2];
2140     x3i = a[j1 - 1] - a[j3 - 1];
2141     a[j0 - 2] = x0r + x2r;
2142     a[j0 - 1] = x0i - x2i;
2143     a[j1 - 2] = x0r - x2r;
2144     a[j1 - 1] = x0i + x2i;
2145     x0r = x1r + x3i;
2146     x0i = x1i + x3r;
2147     a[j2 - 2] = wk1r * x0r - wk1i * x0i;
2148     a[j2 - 1] = wk1r * x0i + wk1i * x0r;
2149     x0r = x1r - x3i;
2150     x0i = x1i - x3r;
2151     a[j3 - 2] = wk3r * x0r + wk3i * x0i;
2152     a[j3 - 1] = wk3r * x0i - wk3i * x0r;
2153     x0r = a[j0] + a[j2];
2154     x0i = -a[j0 + 1] - a[j2 + 1];
2155     x1r = a[j0] - a[j2];
2156     x1i = -a[j0 + 1] + a[j2 + 1];
2157     x2r = a[j1] + a[j3];
2158     x2i = a[j1 + 1] + a[j3 + 1];
2159     x3r = a[j1] - a[j3];
2160     x3i = a[j1 + 1] - a[j3 + 1];
2161     a[j0] = x0r + x2r;
2162     a[j0 + 1] = x0i - x2i;
2163     a[j1] = x0r - x2r;
2164     a[j1 + 1] = x0i + x2i;
2165     x0r = x1r + x3i;
2166     x0i = x1i + x3r;
2167     a[j2] = wn4r * (x0r - x0i);
2168     a[j2 + 1] = wn4r * (x0i + x0r);
2169     x0r = x1r - x3i;
2170     x0i = x1i - x3r;
2171     a[j3] = -wn4r * (x0r + x0i);
2172     a[j3 + 1] = -wn4r * (x0i - x0r);
2173     x0r = a[j0 + 2] + a[j2 + 2];
2174     x0i = -a[j0 + 3] - a[j2 + 3];
2175     x1r = a[j0 + 2] - a[j2 + 2];
2176     x1i = -a[j0 + 3] + a[j2 + 3];
2177     x2r = a[j1 + 2] + a[j3 + 2];
2178     x2i = a[j1 + 3] + a[j3 + 3];
2179     x3r = a[j1 + 2] - a[j3 + 2];
2180     x3i = a[j1 + 3] - a[j3 + 3];
2181     a[j0 + 2] = x0r + x2r;
2182     a[j0 + 3] = x0i - x2i;
2183     a[j1 + 2] = x0r - x2r;
2184     a[j1 + 3] = x0i + x2i;
2185     x0r = x1r + x3i;
2186     x0i = x1i + x3r;
2187     a[j2 + 2] = wk1i * x0r - wk1r * x0i;
2188     a[j2 + 3] = wk1i * x0i + wk1r * x0r;
2189     x0r = x1r - x3i;
2190     x0i = x1i - x3r;
2191     a[j3 + 2] = wk3i * x0r + wk3r * x0i;
2192     a[j3 + 3] = wk3i * x0i - wk3r * x0r;
2193 }
2194 
2195 
2196 #ifdef USE_CDFT_THREADS
2197 struct cdft_arg_st {
2198     int n0;
2199     int n;
2200     double *a;
2201     int nw;
2202     double *w;
2203 };
2204 typedef struct cdft_arg_st cdft_arg_t;
2205 
2206 
cftrec4_th(int n,double * a,int nw,double * w)2207 void cftrec4_th(int n, double *a, int nw, double *w)
2208 {
2209     void *cftrec1_th(void *p);
2210     void *cftrec2_th(void *p);
2211     int i, idiv4, m, nthread;
2212     cdft_thread_t th[4];
2213     cdft_arg_t ag[4];
2214 
2215     nthread = 2;
2216     idiv4 = 0;
2217     m = n >> 1;
2218     if (n > CDFT_4THREADS_BEGIN_N) {
2219         nthread = 4;
2220         idiv4 = 1;
2221         m >>= 1;
2222     }
2223     for (i = 0; i < nthread; i++) {
2224         ag[i].n0 = n;
2225         ag[i].n = m;
2226         ag[i].a = &a[i * m];
2227         ag[i].nw = nw;
2228         ag[i].w = w;
2229         if (i != idiv4) {
2230             cdft_thread_create(&th[i], cftrec1_th, &ag[i]);
2231         } else {
2232             cdft_thread_create(&th[i], cftrec2_th, &ag[i]);
2233         }
2234     }
2235     for (i = 0; i < nthread; i++) {
2236         cdft_thread_wait(th[i]);
2237     }
2238 }
2239 
2240 
cftrec1_th(void * p)2241 void *cftrec1_th(void *p)
2242 {
2243     int cfttree(int n, int j, int k, double *a, int nw, double *w);
2244     void cftleaf(int n, int isplt, double *a, int nw, double *w);
2245     void cftmdl1(int n, double *a, double *w);
2246     int isplt, j, k, m, n, n0, nw;
2247     double *a, *w;
2248 
2249     n0 = ((cdft_arg_t *) p)->n0;
2250     n = ((cdft_arg_t *) p)->n;
2251     a = ((cdft_arg_t *) p)->a;
2252     nw = ((cdft_arg_t *) p)->nw;
2253     w = ((cdft_arg_t *) p)->w;
2254     m = n0;
2255     while (m > 512) {
2256         m >>= 2;
2257         cftmdl1(m, &a[n - m], &w[nw - (m >> 1)]);
2258     }
2259     cftleaf(m, 1, &a[n - m], nw, w);
2260     k = 0;
2261     for (j = n - m; j > 0; j -= m) {
2262         k++;
2263         isplt = cfttree(m, j, k, a, nw, w);
2264         cftleaf(m, isplt, &a[j - m], nw, w);
2265     }
2266     return (void *) 0;
2267 }
2268 
2269 
cftrec2_th(void * p)2270 void *cftrec2_th(void *p)
2271 {
2272     int cfttree(int n, int j, int k, double *a, int nw, double *w);
2273     void cftleaf(int n, int isplt, double *a, int nw, double *w);
2274     void cftmdl2(int n, double *a, double *w);
2275     int isplt, j, k, m, n, n0, nw;
2276     double *a, *w;
2277 
2278     n0 = ((cdft_arg_t *) p)->n0;
2279     n = ((cdft_arg_t *) p)->n;
2280     a = ((cdft_arg_t *) p)->a;
2281     nw = ((cdft_arg_t *) p)->nw;
2282     w = ((cdft_arg_t *) p)->w;
2283     k = 1;
2284     m = n0;
2285     while (m > 512) {
2286         m >>= 2;
2287         k <<= 2;
2288         cftmdl2(m, &a[n - m], &w[nw - m]);
2289     }
2290     cftleaf(m, 0, &a[n - m], nw, w);
2291     k >>= 1;
2292     for (j = n - m; j > 0; j -= m) {
2293         k++;
2294         isplt = cfttree(m, j, k, a, nw, w);
2295         cftleaf(m, isplt, &a[j - m], nw, w);
2296     }
2297     return (void *) 0;
2298 }
2299 #endif /* USE_CDFT_THREADS */
2300 
2301 
cftrec4(int n,double * a,int nw,double * w)2302 void cftrec4(int n, double *a, int nw, double *w)
2303 {
2304     int cfttree(int n, int j, int k, double *a, int nw, double *w);
2305     void cftleaf(int n, int isplt, double *a, int nw, double *w);
2306     void cftmdl1(int n, double *a, double *w);
2307     int isplt, j, k, m;
2308 
2309     m = n;
2310     while (m > 512) {
2311         m >>= 2;
2312         cftmdl1(m, &a[n - m], &w[nw - (m >> 1)]);
2313     }
2314     cftleaf(m, 1, &a[n - m], nw, w);
2315     k = 0;
2316     for (j = n - m; j > 0; j -= m) {
2317         k++;
2318         isplt = cfttree(m, j, k, a, nw, w);
2319         cftleaf(m, isplt, &a[j - m], nw, w);
2320     }
2321 }
2322 
2323 
cfttree(int n,int j,int k,double * a,int nw,double * w)2324 int cfttree(int n, int j, int k, double *a, int nw, double *w)
2325 {
2326     void cftmdl1(int n, double *a, double *w);
2327     void cftmdl2(int n, double *a, double *w);
2328     int i, isplt, m;
2329 
2330     if ((k & 3) != 0) {
2331         isplt = k & 1;
2332         if (isplt != 0) {
2333             cftmdl1(n, &a[j - n], &w[nw - (n >> 1)]);
2334         } else {
2335             cftmdl2(n, &a[j - n], &w[nw - n]);
2336         }
2337     } else {
2338         m = n;
2339         for (i = k; (i & 3) == 0; i >>= 2) {
2340             m <<= 2;
2341         }
2342         isplt = i & 1;
2343         if (isplt != 0) {
2344             while (m > 128) {
2345                 cftmdl1(m, &a[j - m], &w[nw - (m >> 1)]);
2346                 m >>= 2;
2347             }
2348         } else {
2349             while (m > 128) {
2350                 cftmdl2(m, &a[j - m], &w[nw - m]);
2351                 m >>= 2;
2352             }
2353         }
2354     }
2355     return isplt;
2356 }
2357 
2358 
cftleaf(int n,int isplt,double * a,int nw,double * w)2359 void cftleaf(int n, int isplt, double *a, int nw, double *w)
2360 {
2361     void cftmdl1(int n, double *a, double *w);
2362     void cftmdl2(int n, double *a, double *w);
2363     void cftf161(double *a, double *w);
2364     void cftf162(double *a, double *w);
2365     void cftf081(double *a, double *w);
2366     void cftf082(double *a, double *w);
2367 
2368     if (n == 512) {
2369         cftmdl1(128, a, &w[nw - 64]);
2370         cftf161(a, &w[nw - 8]);
2371         cftf162(&a[32], &w[nw - 32]);
2372         cftf161(&a[64], &w[nw - 8]);
2373         cftf161(&a[96], &w[nw - 8]);
2374         cftmdl2(128, &a[128], &w[nw - 128]);
2375         cftf161(&a[128], &w[nw - 8]);
2376         cftf162(&a[160], &w[nw - 32]);
2377         cftf161(&a[192], &w[nw - 8]);
2378         cftf162(&a[224], &w[nw - 32]);
2379         cftmdl1(128, &a[256], &w[nw - 64]);
2380         cftf161(&a[256], &w[nw - 8]);
2381         cftf162(&a[288], &w[nw - 32]);
2382         cftf161(&a[320], &w[nw - 8]);
2383         cftf161(&a[352], &w[nw - 8]);
2384         if (isplt != 0) {
2385             cftmdl1(128, &a[384], &w[nw - 64]);
2386             cftf161(&a[480], &w[nw - 8]);
2387         } else {
2388             cftmdl2(128, &a[384], &w[nw - 128]);
2389             cftf162(&a[480], &w[nw - 32]);
2390         }
2391         cftf161(&a[384], &w[nw - 8]);
2392         cftf162(&a[416], &w[nw - 32]);
2393         cftf161(&a[448], &w[nw - 8]);
2394     } else {
2395         cftmdl1(64, a, &w[nw - 32]);
2396         cftf081(a, &w[nw - 8]);
2397         cftf082(&a[16], &w[nw - 8]);
2398         cftf081(&a[32], &w[nw - 8]);
2399         cftf081(&a[48], &w[nw - 8]);
2400         cftmdl2(64, &a[64], &w[nw - 64]);
2401         cftf081(&a[64], &w[nw - 8]);
2402         cftf082(&a[80], &w[nw - 8]);
2403         cftf081(&a[96], &w[nw - 8]);
2404         cftf082(&a[112], &w[nw - 8]);
2405         cftmdl1(64, &a[128], &w[nw - 32]);
2406         cftf081(&a[128], &w[nw - 8]);
2407         cftf082(&a[144], &w[nw - 8]);
2408         cftf081(&a[160], &w[nw - 8]);
2409         cftf081(&a[176], &w[nw - 8]);
2410         if (isplt != 0) {
2411             cftmdl1(64, &a[192], &w[nw - 32]);
2412             cftf081(&a[240], &w[nw - 8]);
2413         } else {
2414             cftmdl2(64, &a[192], &w[nw - 64]);
2415             cftf082(&a[240], &w[nw - 8]);
2416         }
2417         cftf081(&a[192], &w[nw - 8]);
2418         cftf082(&a[208], &w[nw - 8]);
2419         cftf081(&a[224], &w[nw - 8]);
2420     }
2421 }
2422 
2423 
cftmdl1(int n,double * a,double * w)2424 void cftmdl1(int n, double *a, double *w)
2425 {
2426     int j, j0, j1, j2, j3, k, m, mh;
2427     double wn4r, wk1r, wk1i, wk3r, wk3i;
2428     double x0r, x0i, x1r, x1i, x2r, x2i, x3r, x3i;
2429 
2430     mh = n >> 3;
2431     m = 2 * mh;
2432     j1 = m;
2433     j2 = j1 + m;
2434     j3 = j2 + m;
2435     x0r = a[0] + a[j2];
2436     x0i = a[1] + a[j2 + 1];
2437     x1r = a[0] - a[j2];
2438     x1i = a[1] - a[j2 + 1];
2439     x2r = a[j1] + a[j3];
2440     x2i = a[j1 + 1] + a[j3 + 1];
2441     x3r = a[j1] - a[j3];
2442     x3i = a[j1 + 1] - a[j3 + 1];
2443     a[0] = x0r + x2r;
2444     a[1] = x0i + x2i;
2445     a[j1] = x0r - x2r;
2446     a[j1 + 1] = x0i - x2i;
2447     a[j2] = x1r - x3i;
2448     a[j2 + 1] = x1i + x3r;
2449     a[j3] = x1r + x3i;
2450     a[j3 + 1] = x1i - x3r;
2451     wn4r = w[1];
2452     k = 0;
2453     for (j = 2; j < mh; j += 2) {
2454         k += 4;
2455         wk1r = w[k];
2456         wk1i = w[k + 1];
2457         wk3r = w[k + 2];
2458         wk3i = w[k + 3];
2459         j1 = j + m;
2460         j2 = j1 + m;
2461         j3 = j2 + m;
2462         x0r = a[j] + a[j2];
2463         x0i = a[j + 1] + a[j2 + 1];
2464         x1r = a[j] - a[j2];
2465         x1i = a[j + 1] - a[j2 + 1];
2466         x2r = a[j1] + a[j3];
2467         x2i = a[j1 + 1] + a[j3 + 1];
2468         x3r = a[j1] - a[j3];
2469         x3i = a[j1 + 1] - a[j3 + 1];
2470         a[j] = x0r + x2r;
2471         a[j + 1] = x0i + x2i;
2472         a[j1] = x0r - x2r;
2473         a[j1 + 1] = x0i - x2i;
2474         x0r = x1r - x3i;
2475         x0i = x1i + x3r;
2476         a[j2] = wk1r * x0r - wk1i * x0i;
2477         a[j2 + 1] = wk1r * x0i + wk1i * x0r;
2478         x0r = x1r + x3i;
2479         x0i = x1i - x3r;
2480         a[j3] = wk3r * x0r + wk3i * x0i;
2481         a[j3 + 1] = wk3r * x0i - wk3i * x0r;
2482         j0 = m - j;
2483         j1 = j0 + m;
2484         j2 = j1 + m;
2485         j3 = j2 + m;
2486         x0r = a[j0] + a[j2];
2487         x0i = a[j0 + 1] + a[j2 + 1];
2488         x1r = a[j0] - a[j2];
2489         x1i = a[j0 + 1] - a[j2 + 1];
2490         x2r = a[j1] + a[j3];
2491         x2i = a[j1 + 1] + a[j3 + 1];
2492         x3r = a[j1] - a[j3];
2493         x3i = a[j1 + 1] - a[j3 + 1];
2494         a[j0] = x0r + x2r;
2495         a[j0 + 1] = x0i + x2i;
2496         a[j1] = x0r - x2r;
2497         a[j1 + 1] = x0i - x2i;
2498         x0r = x1r - x3i;
2499         x0i = x1i + x3r;
2500         a[j2] = wk1i * x0r - wk1r * x0i;
2501         a[j2 + 1] = wk1i * x0i + wk1r * x0r;
2502         x0r = x1r + x3i;
2503         x0i = x1i - x3r;
2504         a[j3] = wk3i * x0r + wk3r * x0i;
2505         a[j3 + 1] = wk3i * x0i - wk3r * x0r;
2506     }
2507     j0 = mh;
2508     j1 = j0 + m;
2509     j2 = j1 + m;
2510     j3 = j2 + m;
2511     x0r = a[j0] + a[j2];
2512     x0i = a[j0 + 1] + a[j2 + 1];
2513     x1r = a[j0] - a[j2];
2514     x1i = a[j0 + 1] - a[j2 + 1];
2515     x2r = a[j1] + a[j3];
2516     x2i = a[j1 + 1] + a[j3 + 1];
2517     x3r = a[j1] - a[j3];
2518     x3i = a[j1 + 1] - a[j3 + 1];
2519     a[j0] = x0r + x2r;
2520     a[j0 + 1] = x0i + x2i;
2521     a[j1] = x0r - x2r;
2522     a[j1 + 1] = x0i - x2i;
2523     x0r = x1r - x3i;
2524     x0i = x1i + x3r;
2525     a[j2] = wn4r * (x0r - x0i);
2526     a[j2 + 1] = wn4r * (x0i + x0r);
2527     x0r = x1r + x3i;
2528     x0i = x1i - x3r;
2529     a[j3] = -wn4r * (x0r + x0i);
2530     a[j3 + 1] = -wn4r * (x0i - x0r);
2531 }
2532 
2533 
cftmdl2(int n,double * a,double * w)2534 void cftmdl2(int n, double *a, double *w)
2535 {
2536     int j, j0, j1, j2, j3, k, kr, m, mh;
2537     double wn4r, wk1r, wk1i, wk3r, wk3i, wd1r, wd1i, wd3r, wd3i;
2538     double x0r, x0i, x1r, x1i, x2r, x2i, x3r, x3i, y0r, y0i, y2r, y2i;
2539 
2540     mh = n >> 3;
2541     m = 2 * mh;
2542     wn4r = w[1];
2543     j1 = m;
2544     j2 = j1 + m;
2545     j3 = j2 + m;
2546     x0r = a[0] - a[j2 + 1];
2547     x0i = a[1] + a[j2];
2548     x1r = a[0] + a[j2 + 1];
2549     x1i = a[1] - a[j2];
2550     x2r = a[j1] - a[j3 + 1];
2551     x2i = a[j1 + 1] + a[j3];
2552     x3r = a[j1] + a[j3 + 1];
2553     x3i = a[j1 + 1] - a[j3];
2554     y0r = wn4r * (x2r - x2i);
2555     y0i = wn4r * (x2i + x2r);
2556     a[0] = x0r + y0r;
2557     a[1] = x0i + y0i;
2558     a[j1] = x0r - y0r;
2559     a[j1 + 1] = x0i - y0i;
2560     y0r = wn4r * (x3r - x3i);
2561     y0i = wn4r * (x3i + x3r);
2562     a[j2] = x1r - y0i;
2563     a[j2 + 1] = x1i + y0r;
2564     a[j3] = x1r + y0i;
2565     a[j3 + 1] = x1i - y0r;
2566     k = 0;
2567     kr = 2 * m;
2568     for (j = 2; j < mh; j += 2) {
2569         k += 4;
2570         wk1r = w[k];
2571         wk1i = w[k + 1];
2572         wk3r = w[k + 2];
2573         wk3i = w[k + 3];
2574         kr -= 4;
2575         wd1i = w[kr];
2576         wd1r = w[kr + 1];
2577         wd3i = w[kr + 2];
2578         wd3r = w[kr + 3];
2579         j1 = j + m;
2580         j2 = j1 + m;
2581         j3 = j2 + m;
2582         x0r = a[j] - a[j2 + 1];
2583         x0i = a[j + 1] + a[j2];
2584         x1r = a[j] + a[j2 + 1];
2585         x1i = a[j + 1] - a[j2];
2586         x2r = a[j1] - a[j3 + 1];
2587         x2i = a[j1 + 1] + a[j3];
2588         x3r = a[j1] + a[j3 + 1];
2589         x3i = a[j1 + 1] - a[j3];
2590         y0r = wk1r * x0r - wk1i * x0i;
2591         y0i = wk1r * x0i + wk1i * x0r;
2592         y2r = wd1r * x2r - wd1i * x2i;
2593         y2i = wd1r * x2i + wd1i * x2r;
2594         a[j] = y0r + y2r;
2595         a[j + 1] = y0i + y2i;
2596         a[j1] = y0r - y2r;
2597         a[j1 + 1] = y0i - y2i;
2598         y0r = wk3r * x1r + wk3i * x1i;
2599         y0i = wk3r * x1i - wk3i * x1r;
2600         y2r = wd3r * x3r + wd3i * x3i;
2601         y2i = wd3r * x3i - wd3i * x3r;
2602         a[j2] = y0r + y2r;
2603         a[j2 + 1] = y0i + y2i;
2604         a[j3] = y0r - y2r;
2605         a[j3 + 1] = y0i - y2i;
2606         j0 = m - j;
2607         j1 = j0 + m;
2608         j2 = j1 + m;
2609         j3 = j2 + m;
2610         x0r = a[j0] - a[j2 + 1];
2611         x0i = a[j0 + 1] + a[j2];
2612         x1r = a[j0] + a[j2 + 1];
2613         x1i = a[j0 + 1] - a[j2];
2614         x2r = a[j1] - a[j3 + 1];
2615         x2i = a[j1 + 1] + a[j3];
2616         x3r = a[j1] + a[j3 + 1];
2617         x3i = a[j1 + 1] - a[j3];
2618         y0r = wd1i * x0r - wd1r * x0i;
2619         y0i = wd1i * x0i + wd1r * x0r;
2620         y2r = wk1i * x2r - wk1r * x2i;
2621         y2i = wk1i * x2i + wk1r * x2r;
2622         a[j0] = y0r + y2r;
2623         a[j0 + 1] = y0i + y2i;
2624         a[j1] = y0r - y2r;
2625         a[j1 + 1] = y0i - y2i;
2626         y0r = wd3i * x1r + wd3r * x1i;
2627         y0i = wd3i * x1i - wd3r * x1r;
2628         y2r = wk3i * x3r + wk3r * x3i;
2629         y2i = wk3i * x3i - wk3r * x3r;
2630         a[j2] = y0r + y2r;
2631         a[j2 + 1] = y0i + y2i;
2632         a[j3] = y0r - y2r;
2633         a[j3 + 1] = y0i - y2i;
2634     }
2635     wk1r = w[m];
2636     wk1i = w[m + 1];
2637     j0 = mh;
2638     j1 = j0 + m;
2639     j2 = j1 + m;
2640     j3 = j2 + m;
2641     x0r = a[j0] - a[j2 + 1];
2642     x0i = a[j0 + 1] + a[j2];
2643     x1r = a[j0] + a[j2 + 1];
2644     x1i = a[j0 + 1] - a[j2];
2645     x2r = a[j1] - a[j3 + 1];
2646     x2i = a[j1 + 1] + a[j3];
2647     x3r = a[j1] + a[j3 + 1];
2648     x3i = a[j1 + 1] - a[j3];
2649     y0r = wk1r * x0r - wk1i * x0i;
2650     y0i = wk1r * x0i + wk1i * x0r;
2651     y2r = wk1i * x2r - wk1r * x2i;
2652     y2i = wk1i * x2i + wk1r * x2r;
2653     a[j0] = y0r + y2r;
2654     a[j0 + 1] = y0i + y2i;
2655     a[j1] = y0r - y2r;
2656     a[j1 + 1] = y0i - y2i;
2657     y0r = wk1i * x1r - wk1r * x1i;
2658     y0i = wk1i * x1i + wk1r * x1r;
2659     y2r = wk1r * x3r - wk1i * x3i;
2660     y2i = wk1r * x3i + wk1i * x3r;
2661     a[j2] = y0r - y2r;
2662     a[j2 + 1] = y0i - y2i;
2663     a[j3] = y0r + y2r;
2664     a[j3 + 1] = y0i + y2i;
2665 }
2666 
2667 
cftfx41(int n,double * a,int nw,double * w)2668 void cftfx41(int n, double *a, int nw, double *w)
2669 {
2670     void cftf161(double *a, double *w);
2671     void cftf162(double *a, double *w);
2672     void cftf081(double *a, double *w);
2673     void cftf082(double *a, double *w);
2674 
2675     if (n == 128) {
2676         cftf161(a, &w[nw - 8]);
2677         cftf162(&a[32], &w[nw - 32]);
2678         cftf161(&a[64], &w[nw - 8]);
2679         cftf161(&a[96], &w[nw - 8]);
2680     } else {
2681         cftf081(a, &w[nw - 8]);
2682         cftf082(&a[16], &w[nw - 8]);
2683         cftf081(&a[32], &w[nw - 8]);
2684         cftf081(&a[48], &w[nw - 8]);
2685     }
2686 }
2687 
2688 
cftf161(double * a,double * w)2689 void cftf161(double *a, double *w)
2690 {
2691     double wn4r, wk1r, wk1i,
2692         x0r, x0i, x1r, x1i, x2r, x2i, x3r, x3i,
2693         y0r, y0i, y1r, y1i, y2r, y2i, y3r, y3i,
2694         y4r, y4i, y5r, y5i, y6r, y6i, y7r, y7i,
2695         y8r, y8i, y9r, y9i, y10r, y10i, y11r, y11i,
2696         y12r, y12i, y13r, y13i, y14r, y14i, y15r, y15i;
2697 
2698     wn4r = w[1];
2699     wk1r = w[2];
2700     wk1i = w[3];
2701     x0r = a[0] + a[16];
2702     x0i = a[1] + a[17];
2703     x1r = a[0] - a[16];
2704     x1i = a[1] - a[17];
2705     x2r = a[8] + a[24];
2706     x2i = a[9] + a[25];
2707     x3r = a[8] - a[24];
2708     x3i = a[9] - a[25];
2709     y0r = x0r + x2r;
2710     y0i = x0i + x2i;
2711     y4r = x0r - x2r;
2712     y4i = x0i - x2i;
2713     y8r = x1r - x3i;
2714     y8i = x1i + x3r;
2715     y12r = x1r + x3i;
2716     y12i = x1i - x3r;
2717     x0r = a[2] + a[18];
2718     x0i = a[3] + a[19];
2719     x1r = a[2] - a[18];
2720     x1i = a[3] - a[19];
2721     x2r = a[10] + a[26];
2722     x2i = a[11] + a[27];
2723     x3r = a[10] - a[26];
2724     x3i = a[11] - a[27];
2725     y1r = x0r + x2r;
2726     y1i = x0i + x2i;
2727     y5r = x0r - x2r;
2728     y5i = x0i - x2i;
2729     x0r = x1r - x3i;
2730     x0i = x1i + x3r;
2731     y9r = wk1r * x0r - wk1i * x0i;
2732     y9i = wk1r * x0i + wk1i * x0r;
2733     x0r = x1r + x3i;
2734     x0i = x1i - x3r;
2735     y13r = wk1i * x0r - wk1r * x0i;
2736     y13i = wk1i * x0i + wk1r * x0r;
2737     x0r = a[4] + a[20];
2738     x0i = a[5] + a[21];
2739     x1r = a[4] - a[20];
2740     x1i = a[5] - a[21];
2741     x2r = a[12] + a[28];
2742     x2i = a[13] + a[29];
2743     x3r = a[12] - a[28];
2744     x3i = a[13] - a[29];
2745     y2r = x0r + x2r;
2746     y2i = x0i + x2i;
2747     y6r = x0r - x2r;
2748     y6i = x0i - x2i;
2749     x0r = x1r - x3i;
2750     x0i = x1i + x3r;
2751     y10r = wn4r * (x0r - x0i);
2752     y10i = wn4r * (x0i + x0r);
2753     x0r = x1r + x3i;
2754     x0i = x1i - x3r;
2755     y14r = wn4r * (x0r + x0i);
2756     y14i = wn4r * (x0i - x0r);
2757     x0r = a[6] + a[22];
2758     x0i = a[7] + a[23];
2759     x1r = a[6] - a[22];
2760     x1i = a[7] - a[23];
2761     x2r = a[14] + a[30];
2762     x2i = a[15] + a[31];
2763     x3r = a[14] - a[30];
2764     x3i = a[15] - a[31];
2765     y3r = x0r + x2r;
2766     y3i = x0i + x2i;
2767     y7r = x0r - x2r;
2768     y7i = x0i - x2i;
2769     x0r = x1r - x3i;
2770     x0i = x1i + x3r;
2771     y11r = wk1i * x0r - wk1r * x0i;
2772     y11i = wk1i * x0i + wk1r * x0r;
2773     x0r = x1r + x3i;
2774     x0i = x1i - x3r;
2775     y15r = wk1r * x0r - wk1i * x0i;
2776     y15i = wk1r * x0i + wk1i * x0r;
2777     x0r = y12r - y14r;
2778     x0i = y12i - y14i;
2779     x1r = y12r + y14r;
2780     x1i = y12i + y14i;
2781     x2r = y13r - y15r;
2782     x2i = y13i - y15i;
2783     x3r = y13r + y15r;
2784     x3i = y13i + y15i;
2785     a[24] = x0r + x2r;
2786     a[25] = x0i + x2i;
2787     a[26] = x0r - x2r;
2788     a[27] = x0i - x2i;
2789     a[28] = x1r - x3i;
2790     a[29] = x1i + x3r;
2791     a[30] = x1r + x3i;
2792     a[31] = x1i - x3r;
2793     x0r = y8r + y10r;
2794     x0i = y8i + y10i;
2795     x1r = y8r - y10r;
2796     x1i = y8i - y10i;
2797     x2r = y9r + y11r;
2798     x2i = y9i + y11i;
2799     x3r = y9r - y11r;
2800     x3i = y9i - y11i;
2801     a[16] = x0r + x2r;
2802     a[17] = x0i + x2i;
2803     a[18] = x0r - x2r;
2804     a[19] = x0i - x2i;
2805     a[20] = x1r - x3i;
2806     a[21] = x1i + x3r;
2807     a[22] = x1r + x3i;
2808     a[23] = x1i - x3r;
2809     x0r = y5r - y7i;
2810     x0i = y5i + y7r;
2811     x2r = wn4r * (x0r - x0i);
2812     x2i = wn4r * (x0i + x0r);
2813     x0r = y5r + y7i;
2814     x0i = y5i - y7r;
2815     x3r = wn4r * (x0r - x0i);
2816     x3i = wn4r * (x0i + x0r);
2817     x0r = y4r - y6i;
2818     x0i = y4i + y6r;
2819     x1r = y4r + y6i;
2820     x1i = y4i - y6r;
2821     a[8] = x0r + x2r;
2822     a[9] = x0i + x2i;
2823     a[10] = x0r - x2r;
2824     a[11] = x0i - x2i;
2825     a[12] = x1r - x3i;
2826     a[13] = x1i + x3r;
2827     a[14] = x1r + x3i;
2828     a[15] = x1i - x3r;
2829     x0r = y0r + y2r;
2830     x0i = y0i + y2i;
2831     x1r = y0r - y2r;
2832     x1i = y0i - y2i;
2833     x2r = y1r + y3r;
2834     x2i = y1i + y3i;
2835     x3r = y1r - y3r;
2836     x3i = y1i - y3i;
2837     a[0] = x0r + x2r;
2838     a[1] = x0i + x2i;
2839     a[2] = x0r - x2r;
2840     a[3] = x0i - x2i;
2841     a[4] = x1r - x3i;
2842     a[5] = x1i + x3r;
2843     a[6] = x1r + x3i;
2844     a[7] = x1i - x3r;
2845 }
2846 
2847 
cftf162(double * a,double * w)2848 void cftf162(double *a, double *w)
2849 {
2850     double wn4r, wk1r, wk1i, wk2r, wk2i, wk3r, wk3i,
2851         x0r, x0i, x1r, x1i, x2r, x2i,
2852         y0r, y0i, y1r, y1i, y2r, y2i, y3r, y3i,
2853         y4r, y4i, y5r, y5i, y6r, y6i, y7r, y7i,
2854         y8r, y8i, y9r, y9i, y10r, y10i, y11r, y11i,
2855         y12r, y12i, y13r, y13i, y14r, y14i, y15r, y15i;
2856 
2857     wn4r = w[1];
2858     wk1r = w[4];
2859     wk1i = w[5];
2860     wk3r = w[6];
2861     wk3i = -w[7];
2862     wk2r = w[8];
2863     wk2i = w[9];
2864     x1r = a[0] - a[17];
2865     x1i = a[1] + a[16];
2866     x0r = a[8] - a[25];
2867     x0i = a[9] + a[24];
2868     x2r = wn4r * (x0r - x0i);
2869     x2i = wn4r * (x0i + x0r);
2870     y0r = x1r + x2r;
2871     y0i = x1i + x2i;
2872     y4r = x1r - x2r;
2873     y4i = x1i - x2i;
2874     x1r = a[0] + a[17];
2875     x1i = a[1] - a[16];
2876     x0r = a[8] + a[25];
2877     x0i = a[9] - a[24];
2878     x2r = wn4r * (x0r - x0i);
2879     x2i = wn4r * (x0i + x0r);
2880     y8r = x1r - x2i;
2881     y8i = x1i + x2r;
2882     y12r = x1r + x2i;
2883     y12i = x1i - x2r;
2884     x0r = a[2] - a[19];
2885     x0i = a[3] + a[18];
2886     x1r = wk1r * x0r - wk1i * x0i;
2887     x1i = wk1r * x0i + wk1i * x0r;
2888     x0r = a[10] - a[27];
2889     x0i = a[11] + a[26];
2890     x2r = wk3i * x0r - wk3r * x0i;
2891     x2i = wk3i * x0i + wk3r * x0r;
2892     y1r = x1r + x2r;
2893     y1i = x1i + x2i;
2894     y5r = x1r - x2r;
2895     y5i = x1i - x2i;
2896     x0r = a[2] + a[19];
2897     x0i = a[3] - a[18];
2898     x1r = wk3r * x0r - wk3i * x0i;
2899     x1i = wk3r * x0i + wk3i * x0r;
2900     x0r = a[10] + a[27];
2901     x0i = a[11] - a[26];
2902     x2r = wk1r * x0r + wk1i * x0i;
2903     x2i = wk1r * x0i - wk1i * x0r;
2904     y9r = x1r - x2r;
2905     y9i = x1i - x2i;
2906     y13r = x1r + x2r;
2907     y13i = x1i + x2i;
2908     x0r = a[4] - a[21];
2909     x0i = a[5] + a[20];
2910     x1r = wk2r * x0r - wk2i * x0i;
2911     x1i = wk2r * x0i + wk2i * x0r;
2912     x0r = a[12] - a[29];
2913     x0i = a[13] + a[28];
2914     x2r = wk2i * x0r - wk2r * x0i;
2915     x2i = wk2i * x0i + wk2r * x0r;
2916     y2r = x1r + x2r;
2917     y2i = x1i + x2i;
2918     y6r = x1r - x2r;
2919     y6i = x1i - x2i;
2920     x0r = a[4] + a[21];
2921     x0i = a[5] - a[20];
2922     x1r = wk2i * x0r - wk2r * x0i;
2923     x1i = wk2i * x0i + wk2r * x0r;
2924     x0r = a[12] + a[29];
2925     x0i = a[13] - a[28];
2926     x2r = wk2r * x0r - wk2i * x0i;
2927     x2i = wk2r * x0i + wk2i * x0r;
2928     y10r = x1r - x2r;
2929     y10i = x1i - x2i;
2930     y14r = x1r + x2r;
2931     y14i = x1i + x2i;
2932     x0r = a[6] - a[23];
2933     x0i = a[7] + a[22];
2934     x1r = wk3r * x0r - wk3i * x0i;
2935     x1i = wk3r * x0i + wk3i * x0r;
2936     x0r = a[14] - a[31];
2937     x0i = a[15] + a[30];
2938     x2r = wk1i * x0r - wk1r * x0i;
2939     x2i = wk1i * x0i + wk1r * x0r;
2940     y3r = x1r + x2r;
2941     y3i = x1i + x2i;
2942     y7r = x1r - x2r;
2943     y7i = x1i - x2i;
2944     x0r = a[6] + a[23];
2945     x0i = a[7] - a[22];
2946     x1r = wk1i * x0r + wk1r * x0i;
2947     x1i = wk1i * x0i - wk1r * x0r;
2948     x0r = a[14] + a[31];
2949     x0i = a[15] - a[30];
2950     x2r = wk3i * x0r - wk3r * x0i;
2951     x2i = wk3i * x0i + wk3r * x0r;
2952     y11r = x1r + x2r;
2953     y11i = x1i + x2i;
2954     y15r = x1r - x2r;
2955     y15i = x1i - x2i;
2956     x1r = y0r + y2r;
2957     x1i = y0i + y2i;
2958     x2r = y1r + y3r;
2959     x2i = y1i + y3i;
2960     a[0] = x1r + x2r;
2961     a[1] = x1i + x2i;
2962     a[2] = x1r - x2r;
2963     a[3] = x1i - x2i;
2964     x1r = y0r - y2r;
2965     x1i = y0i - y2i;
2966     x2r = y1r - y3r;
2967     x2i = y1i - y3i;
2968     a[4] = x1r - x2i;
2969     a[5] = x1i + x2r;
2970     a[6] = x1r + x2i;
2971     a[7] = x1i - x2r;
2972     x1r = y4r - y6i;
2973     x1i = y4i + y6r;
2974     x0r = y5r - y7i;
2975     x0i = y5i + y7r;
2976     x2r = wn4r * (x0r - x0i);
2977     x2i = wn4r * (x0i + x0r);
2978     a[8] = x1r + x2r;
2979     a[9] = x1i + x2i;
2980     a[10] = x1r - x2r;
2981     a[11] = x1i - x2i;
2982     x1r = y4r + y6i;
2983     x1i = y4i - y6r;
2984     x0r = y5r + y7i;
2985     x0i = y5i - y7r;
2986     x2r = wn4r * (x0r - x0i);
2987     x2i = wn4r * (x0i + x0r);
2988     a[12] = x1r - x2i;
2989     a[13] = x1i + x2r;
2990     a[14] = x1r + x2i;
2991     a[15] = x1i - x2r;
2992     x1r = y8r + y10r;
2993     x1i = y8i + y10i;
2994     x2r = y9r - y11r;
2995     x2i = y9i - y11i;
2996     a[16] = x1r + x2r;
2997     a[17] = x1i + x2i;
2998     a[18] = x1r - x2r;
2999     a[19] = x1i - x2i;
3000     x1r = y8r - y10r;
3001     x1i = y8i - y10i;
3002     x2r = y9r + y11r;
3003     x2i = y9i + y11i;
3004     a[20] = x1r - x2i;
3005     a[21] = x1i + x2r;
3006     a[22] = x1r + x2i;
3007     a[23] = x1i - x2r;
3008     x1r = y12r - y14i;
3009     x1i = y12i + y14r;
3010     x0r = y13r + y15i;
3011     x0i = y13i - y15r;
3012     x2r = wn4r * (x0r - x0i);
3013     x2i = wn4r * (x0i + x0r);
3014     a[24] = x1r + x2r;
3015     a[25] = x1i + x2i;
3016     a[26] = x1r - x2r;
3017     a[27] = x1i - x2i;
3018     x1r = y12r + y14i;
3019     x1i = y12i - y14r;
3020     x0r = y13r - y15i;
3021     x0i = y13i + y15r;
3022     x2r = wn4r * (x0r - x0i);
3023     x2i = wn4r * (x0i + x0r);
3024     a[28] = x1r - x2i;
3025     a[29] = x1i + x2r;
3026     a[30] = x1r + x2i;
3027     a[31] = x1i - x2r;
3028 }
3029 
3030 
cftf081(double * a,double * w)3031 void cftf081(double *a, double *w)
3032 {
3033     double wn4r, x0r, x0i, x1r, x1i, x2r, x2i, x3r, x3i,
3034         y0r, y0i, y1r, y1i, y2r, y2i, y3r, y3i,
3035         y4r, y4i, y5r, y5i, y6r, y6i, y7r, y7i;
3036 
3037     wn4r = w[1];
3038     x0r = a[0] + a[8];
3039     x0i = a[1] + a[9];
3040     x1r = a[0] - a[8];
3041     x1i = a[1] - a[9];
3042     x2r = a[4] + a[12];
3043     x2i = a[5] + a[13];
3044     x3r = a[4] - a[12];
3045     x3i = a[5] - a[13];
3046     y0r = x0r + x2r;
3047     y0i = x0i + x2i;
3048     y2r = x0r - x2r;
3049     y2i = x0i - x2i;
3050     y1r = x1r - x3i;
3051     y1i = x1i + x3r;
3052     y3r = x1r + x3i;
3053     y3i = x1i - x3r;
3054     x0r = a[2] + a[10];
3055     x0i = a[3] + a[11];
3056     x1r = a[2] - a[10];
3057     x1i = a[3] - a[11];
3058     x2r = a[6] + a[14];
3059     x2i = a[7] + a[15];
3060     x3r = a[6] - a[14];
3061     x3i = a[7] - a[15];
3062     y4r = x0r + x2r;
3063     y4i = x0i + x2i;
3064     y6r = x0r - x2r;
3065     y6i = x0i - x2i;
3066     x0r = x1r - x3i;
3067     x0i = x1i + x3r;
3068     x2r = x1r + x3i;
3069     x2i = x1i - x3r;
3070     y5r = wn4r * (x0r - x0i);
3071     y5i = wn4r * (x0r + x0i);
3072     y7r = wn4r * (x2r - x2i);
3073     y7i = wn4r * (x2r + x2i);
3074     a[8] = y1r + y5r;
3075     a[9] = y1i + y5i;
3076     a[10] = y1r - y5r;
3077     a[11] = y1i - y5i;
3078     a[12] = y3r - y7i;
3079     a[13] = y3i + y7r;
3080     a[14] = y3r + y7i;
3081     a[15] = y3i - y7r;
3082     a[0] = y0r + y4r;
3083     a[1] = y0i + y4i;
3084     a[2] = y0r - y4r;
3085     a[3] = y0i - y4i;
3086     a[4] = y2r - y6i;
3087     a[5] = y2i + y6r;
3088     a[6] = y2r + y6i;
3089     a[7] = y2i - y6r;
3090 }
3091 
3092 
cftf082(double * a,double * w)3093 void cftf082(double *a, double *w)
3094 {
3095     double wn4r, wk1r, wk1i, x0r, x0i, x1r, x1i,
3096         y0r, y0i, y1r, y1i, y2r, y2i, y3r, y3i,
3097         y4r, y4i, y5r, y5i, y6r, y6i, y7r, y7i;
3098 
3099     wn4r = w[1];
3100     wk1r = w[2];
3101     wk1i = w[3];
3102     y0r = a[0] - a[9];
3103     y0i = a[1] + a[8];
3104     y1r = a[0] + a[9];
3105     y1i = a[1] - a[8];
3106     x0r = a[4] - a[13];
3107     x0i = a[5] + a[12];
3108     y2r = wn4r * (x0r - x0i);
3109     y2i = wn4r * (x0i + x0r);
3110     x0r = a[4] + a[13];
3111     x0i = a[5] - a[12];
3112     y3r = wn4r * (x0r - x0i);
3113     y3i = wn4r * (x0i + x0r);
3114     x0r = a[2] - a[11];
3115     x0i = a[3] + a[10];
3116     y4r = wk1r * x0r - wk1i * x0i;
3117     y4i = wk1r * x0i + wk1i * x0r;
3118     x0r = a[2] + a[11];
3119     x0i = a[3] - a[10];
3120     y5r = wk1i * x0r - wk1r * x0i;
3121     y5i = wk1i * x0i + wk1r * x0r;
3122     x0r = a[6] - a[15];
3123     x0i = a[7] + a[14];
3124     y6r = wk1i * x0r - wk1r * x0i;
3125     y6i = wk1i * x0i + wk1r * x0r;
3126     x0r = a[6] + a[15];
3127     x0i = a[7] - a[14];
3128     y7r = wk1r * x0r - wk1i * x0i;
3129     y7i = wk1r * x0i + wk1i * x0r;
3130     x0r = y0r + y2r;
3131     x0i = y0i + y2i;
3132     x1r = y4r + y6r;
3133     x1i = y4i + y6i;
3134     a[0] = x0r + x1r;
3135     a[1] = x0i + x1i;
3136     a[2] = x0r - x1r;
3137     a[3] = x0i - x1i;
3138     x0r = y0r - y2r;
3139     x0i = y0i - y2i;
3140     x1r = y4r - y6r;
3141     x1i = y4i - y6i;
3142     a[4] = x0r - x1i;
3143     a[5] = x0i + x1r;
3144     a[6] = x0r + x1i;
3145     a[7] = x0i - x1r;
3146     x0r = y1r - y3i;
3147     x0i = y1i + y3r;
3148     x1r = y5r - y7r;
3149     x1i = y5i - y7i;
3150     a[8] = x0r + x1r;
3151     a[9] = x0i + x1i;
3152     a[10] = x0r - x1r;
3153     a[11] = x0i - x1i;
3154     x0r = y1r + y3i;
3155     x0i = y1i - y3r;
3156     x1r = y5r + y7r;
3157     x1i = y5i + y7i;
3158     a[12] = x0r - x1i;
3159     a[13] = x0i + x1r;
3160     a[14] = x0r + x1i;
3161     a[15] = x0i - x1r;
3162 }
3163 
3164 
cftf040(double * a)3165 void cftf040(double *a)
3166 {
3167     double x0r, x0i, x1r, x1i, x2r, x2i, x3r, x3i;
3168 
3169     x0r = a[0] + a[4];
3170     x0i = a[1] + a[5];
3171     x1r = a[0] - a[4];
3172     x1i = a[1] - a[5];
3173     x2r = a[2] + a[6];
3174     x2i = a[3] + a[7];
3175     x3r = a[2] - a[6];
3176     x3i = a[3] - a[7];
3177     a[0] = x0r + x2r;
3178     a[1] = x0i + x2i;
3179     a[2] = x1r - x3i;
3180     a[3] = x1i + x3r;
3181     a[4] = x0r - x2r;
3182     a[5] = x0i - x2i;
3183     a[6] = x1r + x3i;
3184     a[7] = x1i - x3r;
3185 }
3186 
3187 
cftb040(double * a)3188 void cftb040(double *a)
3189 {
3190     double x0r, x0i, x1r, x1i, x2r, x2i, x3r, x3i;
3191 
3192     x0r = a[0] + a[4];
3193     x0i = a[1] + a[5];
3194     x1r = a[0] - a[4];
3195     x1i = a[1] - a[5];
3196     x2r = a[2] + a[6];
3197     x2i = a[3] + a[7];
3198     x3r = a[2] - a[6];
3199     x3i = a[3] - a[7];
3200     a[0] = x0r + x2r;
3201     a[1] = x0i + x2i;
3202     a[2] = x1r + x3i;
3203     a[3] = x1i - x3r;
3204     a[4] = x0r - x2r;
3205     a[5] = x0i - x2i;
3206     a[6] = x1r - x3i;
3207     a[7] = x1i + x3r;
3208 }
3209 
3210 
cftx020(double * a)3211 void cftx020(double *a)
3212 {
3213     double x0r, x0i;
3214 
3215     x0r = a[0] - a[2];
3216     x0i = a[1] - a[3];
3217     a[0] += a[2];
3218     a[1] += a[3];
3219     a[2] = x0r;
3220     a[3] = x0i;
3221 }
3222 
3223 
rftfsub(int n,double * a,int nc,double * c)3224 void rftfsub(int n, double *a, int nc, double *c)
3225 {
3226     int j, k, kk, ks, m;
3227     double wkr, wki, xr, xi, yr, yi;
3228 
3229     m = n >> 1;
3230     ks = 2 * nc / m;
3231     kk = 0;
3232     for (j = 2; j < m; j += 2) {
3233         k = n - j;
3234         kk += ks;
3235         wkr = 0.5 - c[nc - kk];
3236         wki = c[kk];
3237         xr = a[j] - a[k];
3238         xi = a[j + 1] + a[k + 1];
3239         yr = wkr * xr - wki * xi;
3240         yi = wkr * xi + wki * xr;
3241         a[j] -= yr;
3242         a[j + 1] -= yi;
3243         a[k] += yr;
3244         a[k + 1] -= yi;
3245     }
3246 }
3247 
3248 
rftbsub(int n,double * a,int nc,double * c)3249 void rftbsub(int n, double *a, int nc, double *c)
3250 {
3251     int j, k, kk, ks, m;
3252     double wkr, wki, xr, xi, yr, yi;
3253 
3254     m = n >> 1;
3255     ks = 2 * nc / m;
3256     kk = 0;
3257     for (j = 2; j < m; j += 2) {
3258         k = n - j;
3259         kk += ks;
3260         wkr = 0.5 - c[nc - kk];
3261         wki = c[kk];
3262         xr = a[j] - a[k];
3263         xi = a[j + 1] + a[k + 1];
3264         yr = wkr * xr + wki * xi;
3265         yi = wkr * xi - wki * xr;
3266         a[j] -= yr;
3267         a[j + 1] -= yi;
3268         a[k] += yr;
3269         a[k + 1] -= yi;
3270     }
3271 }
3272 
3273 
dctsub(int n,double * a,int nc,double * c)3274 void dctsub(int n, double *a, int nc, double *c)
3275 {
3276     int j, k, kk, ks, m;
3277     double wkr, wki, xr;
3278 
3279     m = n >> 1;
3280     ks = nc / n;
3281     kk = 0;
3282     for (j = 1; j < m; j++) {
3283         k = n - j;
3284         kk += ks;
3285         wkr = c[kk] - c[nc - kk];
3286         wki = c[kk] + c[nc - kk];
3287         xr = wki * a[j] - wkr * a[k];
3288         a[j] = wkr * a[j] + wki * a[k];
3289         a[k] = xr;
3290     }
3291     a[m] *= c[0];
3292 }
3293 
3294 
dstsub(int n,double * a,int nc,double * c)3295 void dstsub(int n, double *a, int nc, double *c)
3296 {
3297     int j, k, kk, ks, m;
3298     double wkr, wki, xr;
3299 
3300     m = n >> 1;
3301     ks = nc / n;
3302     kk = 0;
3303     for (j = 1; j < m; j++) {
3304         k = n - j;
3305         kk += ks;
3306         wkr = c[kk] - c[nc - kk];
3307         wki = c[kk] + c[nc - kk];
3308         xr = wki * a[k] - wkr * a[j];
3309         a[k] = wkr * a[k] + wki * a[j];
3310         a[j] = xr;
3311     }
3312     a[m] *= c[0];
3313 }
3314 
3315