xref: /aosp_15_r20/external/pffft/pffastconv.h (revision 3f1979aa0d7ad34fcf3763de7b7b8f8cd67e5bdd)
1*3f1979aaSAndroid Build Coastguard Worker /* Copyright (c) 2019  Hayati Ayguen ( [email protected] )
2*3f1979aaSAndroid Build Coastguard Worker 
3*3f1979aaSAndroid Build Coastguard Worker    Redistribution and use of the Software in source and binary forms,
4*3f1979aaSAndroid Build Coastguard Worker    with or without modification, is permitted provided that the
5*3f1979aaSAndroid Build Coastguard Worker    following conditions are met:
6*3f1979aaSAndroid Build Coastguard Worker 
7*3f1979aaSAndroid Build Coastguard Worker    - Neither the names of PFFFT, PFFASTCONV, nor the names of its
8*3f1979aaSAndroid Build Coastguard Worker    sponsors or contributors may be used to endorse or promote products
9*3f1979aaSAndroid Build Coastguard Worker    derived from this Software without specific prior written permission.
10*3f1979aaSAndroid Build Coastguard Worker 
11*3f1979aaSAndroid Build Coastguard Worker    - Redistributions of source code must retain the above copyright
12*3f1979aaSAndroid Build Coastguard Worker    notices, this list of conditions, and the disclaimer below.
13*3f1979aaSAndroid Build Coastguard Worker 
14*3f1979aaSAndroid Build Coastguard Worker    - Redistributions in binary form must reproduce the above copyright
15*3f1979aaSAndroid Build Coastguard Worker    notice, this list of conditions, and the disclaimer below in the
16*3f1979aaSAndroid Build Coastguard Worker    documentation and/or other materials provided with the
17*3f1979aaSAndroid Build Coastguard Worker    distribution.
18*3f1979aaSAndroid Build Coastguard Worker 
19*3f1979aaSAndroid Build Coastguard Worker    THIS SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
20*3f1979aaSAndroid Build Coastguard Worker    EXPRESS OR IMPLIED, INCLUDING, BUT NOT LIMITED TO THE WARRANTIES OF
21*3f1979aaSAndroid Build Coastguard Worker    MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
22*3f1979aaSAndroid Build Coastguard Worker    NONINFRINGEMENT. IN NO EVENT SHALL THE CONTRIBUTORS OR COPYRIGHT
23*3f1979aaSAndroid Build Coastguard Worker    HOLDERS BE LIABLE FOR ANY CLAIM, INDIRECT, INCIDENTAL, SPECIAL,
24*3f1979aaSAndroid Build Coastguard Worker    EXEMPLARY, OR CONSEQUENTIAL DAMAGES OR OTHER LIABILITY, WHETHER IN AN
25*3f1979aaSAndroid Build Coastguard Worker    ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
26*3f1979aaSAndroid Build Coastguard Worker    CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS WITH THE
27*3f1979aaSAndroid Build Coastguard Worker    SOFTWARE.
28*3f1979aaSAndroid Build Coastguard Worker */
29*3f1979aaSAndroid Build Coastguard Worker 
30*3f1979aaSAndroid Build Coastguard Worker /*
31*3f1979aaSAndroid Build Coastguard Worker    PFFASTCONV : a Pretty Fast Fast Convolution
32*3f1979aaSAndroid Build Coastguard Worker 
33*3f1979aaSAndroid Build Coastguard Worker    This is basically the implementation of fast convolution,
34*3f1979aaSAndroid Build Coastguard Worker    utilizing the FFT (pffft).
35*3f1979aaSAndroid Build Coastguard Worker 
36*3f1979aaSAndroid Build Coastguard Worker    Restrictions:
37*3f1979aaSAndroid Build Coastguard Worker 
38*3f1979aaSAndroid Build Coastguard Worker    - 1D transforms only, with 32-bit single precision.
39*3f1979aaSAndroid Build Coastguard Worker 
40*3f1979aaSAndroid Build Coastguard Worker    - all (float*) pointers in the functions below are expected to
41*3f1979aaSAndroid Build Coastguard Worker    have an "simd-compatible" alignment, that is 16 bytes on x86 and
42*3f1979aaSAndroid Build Coastguard Worker    powerpc CPUs.
43*3f1979aaSAndroid Build Coastguard Worker 
44*3f1979aaSAndroid Build Coastguard Worker    You can allocate such buffers with the functions
45*3f1979aaSAndroid Build Coastguard Worker    pffft_aligned_malloc / pffft_aligned_free (or with stuff like
46*3f1979aaSAndroid Build Coastguard Worker    posix_memalign..)
47*3f1979aaSAndroid Build Coastguard Worker 
48*3f1979aaSAndroid Build Coastguard Worker */
49*3f1979aaSAndroid Build Coastguard Worker 
50*3f1979aaSAndroid Build Coastguard Worker #ifndef PFFASTCONV_H
51*3f1979aaSAndroid Build Coastguard Worker #define PFFASTCONV_H
52*3f1979aaSAndroid Build Coastguard Worker 
53*3f1979aaSAndroid Build Coastguard Worker #include <stddef.h> /* for size_t */
54*3f1979aaSAndroid Build Coastguard Worker #include "pffft.h"
55*3f1979aaSAndroid Build Coastguard Worker 
56*3f1979aaSAndroid Build Coastguard Worker 
57*3f1979aaSAndroid Build Coastguard Worker #ifdef __cplusplus
58*3f1979aaSAndroid Build Coastguard Worker extern "C" {
59*3f1979aaSAndroid Build Coastguard Worker #endif
60*3f1979aaSAndroid Build Coastguard Worker 
61*3f1979aaSAndroid Build Coastguard Worker   /* opaque struct holding internal stuff
62*3f1979aaSAndroid Build Coastguard Worker      this struct can't be shared by many threads as it contains
63*3f1979aaSAndroid Build Coastguard Worker      temporary data, computed within the convolution
64*3f1979aaSAndroid Build Coastguard Worker   */
65*3f1979aaSAndroid Build Coastguard Worker   typedef struct PFFASTCONV_Setup PFFASTCONV_Setup;
66*3f1979aaSAndroid Build Coastguard Worker 
67*3f1979aaSAndroid Build Coastguard Worker   typedef enum {
68*3f1979aaSAndroid Build Coastguard Worker     PFFASTCONV_CPLX_INP_OUT = 1,
69*3f1979aaSAndroid Build Coastguard Worker     /* set when input and output is complex,
70*3f1979aaSAndroid Build Coastguard Worker      * with real and imag part interleaved in both vectors.
71*3f1979aaSAndroid Build Coastguard Worker      * input[] has inputLen complex values: 2 * inputLen floats,
72*3f1979aaSAndroid Build Coastguard Worker      * output[] is also written with complex values.
73*3f1979aaSAndroid Build Coastguard Worker      * without this flag, the input is interpreted as real vector
74*3f1979aaSAndroid Build Coastguard Worker      */
75*3f1979aaSAndroid Build Coastguard Worker 
76*3f1979aaSAndroid Build Coastguard Worker     PFFASTCONV_CPLX_FILTER = 2,
77*3f1979aaSAndroid Build Coastguard Worker     /* set when filterCoeffs is complex,
78*3f1979aaSAndroid Build Coastguard Worker      * with real and imag part interleaved.
79*3f1979aaSAndroid Build Coastguard Worker      * filterCoeffs[] has filterLen complex values: 2 * filterLen floats
80*3f1979aaSAndroid Build Coastguard Worker      * without this flag, the filter is interpreted as real vector
81*3f1979aaSAndroid Build Coastguard Worker      * ATTENTION: this is not implemented yet!
82*3f1979aaSAndroid Build Coastguard Worker      */
83*3f1979aaSAndroid Build Coastguard Worker 
84*3f1979aaSAndroid Build Coastguard Worker     PFFASTCONV_DIRECT_INP = 4,
85*3f1979aaSAndroid Build Coastguard Worker     /* set PFFASTCONV_DIRECT_INP only, when following conditions are met:
86*3f1979aaSAndroid Build Coastguard Worker      * 1- input vecor X must be aligned
87*3f1979aaSAndroid Build Coastguard Worker      * 2- (all) inputLen <= ouput blockLen
88*3f1979aaSAndroid Build Coastguard Worker      * 3- X must have minimum length of output BlockLen
89*3f1979aaSAndroid Build Coastguard Worker      * 4- the additional samples from inputLen .. BlockLen-1
90*3f1979aaSAndroid Build Coastguard Worker      *   must contain valid small and non-NAN samples (ideally zero)
91*3f1979aaSAndroid Build Coastguard Worker      *
92*3f1979aaSAndroid Build Coastguard Worker      * this option is ignored when PFFASTCONV_CPLX_INP_OUT is set
93*3f1979aaSAndroid Build Coastguard Worker      */
94*3f1979aaSAndroid Build Coastguard Worker 
95*3f1979aaSAndroid Build Coastguard Worker     PFFASTCONV_DIRECT_OUT = 8,
96*3f1979aaSAndroid Build Coastguard Worker     /* set PFFASTCONV_DIRECT_OUT only when following conditions are met:
97*3f1979aaSAndroid Build Coastguard Worker      * 1- output vector Y must be aligned
98*3f1979aaSAndroid Build Coastguard Worker      * 2- (all) inputLen <= ouput blockLen
99*3f1979aaSAndroid Build Coastguard Worker      * 3- Y must have minimum length of output blockLen
100*3f1979aaSAndroid Build Coastguard Worker      *
101*3f1979aaSAndroid Build Coastguard Worker      * this option is ignored when PFFASTCONV_CPLX_INP_OUT is set
102*3f1979aaSAndroid Build Coastguard Worker      */
103*3f1979aaSAndroid Build Coastguard Worker 
104*3f1979aaSAndroid Build Coastguard Worker     PFFASTCONV_CPLX_SINGLE_FFT = 16,
105*3f1979aaSAndroid Build Coastguard Worker     /* hint to process complex data with one single FFT;
106*3f1979aaSAndroid Build Coastguard Worker      * default is to use 2 FFTs: one for real part, one for imag part
107*3f1979aaSAndroid Build Coastguard Worker      * */
108*3f1979aaSAndroid Build Coastguard Worker 
109*3f1979aaSAndroid Build Coastguard Worker 
110*3f1979aaSAndroid Build Coastguard Worker     PFFASTCONV_SYMMETRIC = 32,
111*3f1979aaSAndroid Build Coastguard Worker     /* just informal, that filter is symmetric .. and filterLen is multiple of 8 */
112*3f1979aaSAndroid Build Coastguard Worker 
113*3f1979aaSAndroid Build Coastguard Worker     PFFASTCONV_CORRELATION = 64,
114*3f1979aaSAndroid Build Coastguard Worker     /* filterCoeffs[] of pffastconv_new_setup are for correlation;
115*3f1979aaSAndroid Build Coastguard Worker      * thus, do not flip them for the internal fft calculation
116*3f1979aaSAndroid Build Coastguard Worker      * - as necessary for the fast convolution */
117*3f1979aaSAndroid Build Coastguard Worker 
118*3f1979aaSAndroid Build Coastguard Worker   } pffastconv_flags_t;
119*3f1979aaSAndroid Build Coastguard Worker 
120*3f1979aaSAndroid Build Coastguard Worker   /*
121*3f1979aaSAndroid Build Coastguard Worker     prepare for performing fast convolution(s) of 'filterLen' with input 'blockLen'.
122*3f1979aaSAndroid Build Coastguard Worker     The output 'blockLen' might be bigger to allow the fast convolution.
123*3f1979aaSAndroid Build Coastguard Worker 
124*3f1979aaSAndroid Build Coastguard Worker     'flags' are bitmask over the 'pffastconv_flags_t' enum.
125*3f1979aaSAndroid Build Coastguard Worker 
126*3f1979aaSAndroid Build Coastguard Worker     PFFASTCONV_Setup structure can't be shared accross multiple filters
127*3f1979aaSAndroid Build Coastguard Worker     or concurrent threads.
128*3f1979aaSAndroid Build Coastguard Worker   */
129*3f1979aaSAndroid Build Coastguard Worker   PFFASTCONV_Setup * pffastconv_new_setup( const float * filterCoeffs, int filterLen, int * blockLen, int flags );
130*3f1979aaSAndroid Build Coastguard Worker 
131*3f1979aaSAndroid Build Coastguard Worker   void pffastconv_destroy_setup(PFFASTCONV_Setup *);
132*3f1979aaSAndroid Build Coastguard Worker 
133*3f1979aaSAndroid Build Coastguard Worker   /*
134*3f1979aaSAndroid Build Coastguard Worker      Perform the fast convolution.
135*3f1979aaSAndroid Build Coastguard Worker 
136*3f1979aaSAndroid Build Coastguard Worker      'input' and 'output' don't need to be aligned - unless any of
137*3f1979aaSAndroid Build Coastguard Worker      PFFASTCONV_DIRECT_INP or PFFASTCONV_DIRECT_OUT is set in 'flags'.
138*3f1979aaSAndroid Build Coastguard Worker 
139*3f1979aaSAndroid Build Coastguard Worker      inputLen > output 'blockLen' (from pffastconv_new_setup()) is allowed.
140*3f1979aaSAndroid Build Coastguard Worker      in this case, multiple FFTs are called internally, to process the
141*3f1979aaSAndroid Build Coastguard Worker      input[].
142*3f1979aaSAndroid Build Coastguard Worker 
143*3f1979aaSAndroid Build Coastguard Worker      'output' vector must have size >= (inputLen - filterLen + 1)
144*3f1979aaSAndroid Build Coastguard Worker 
145*3f1979aaSAndroid Build Coastguard Worker      set bool option 'applyFlush' to process the full input[].
146*3f1979aaSAndroid Build Coastguard Worker      with this option, 'tail samples' of input are also processed.
147*3f1979aaSAndroid Build Coastguard Worker      This might be inefficient, because the FFT is called to produce
148*3f1979aaSAndroid Build Coastguard Worker      few(er) output samples, than possible.
149*3f1979aaSAndroid Build Coastguard Worker      This option is useful to process the last samples of an input (file)
150*3f1979aaSAndroid Build Coastguard Worker      or to reduce latency.
151*3f1979aaSAndroid Build Coastguard Worker 
152*3f1979aaSAndroid Build Coastguard Worker      return value is the number of produced samples in output[].
153*3f1979aaSAndroid Build Coastguard Worker      the same amount of samples is processed from input[]. to continue
154*3f1979aaSAndroid Build Coastguard Worker      processing, the caller must save/move the remaining samples of
155*3f1979aaSAndroid Build Coastguard Worker      input[].
156*3f1979aaSAndroid Build Coastguard Worker 
157*3f1979aaSAndroid Build Coastguard Worker   */
158*3f1979aaSAndroid Build Coastguard Worker   int pffastconv_apply(PFFASTCONV_Setup * s, const float *input, int inputLen, float *output, int applyFlush);
159*3f1979aaSAndroid Build Coastguard Worker 
160*3f1979aaSAndroid Build Coastguard Worker   void *pffastconv_malloc(size_t nb_bytes);
161*3f1979aaSAndroid Build Coastguard Worker   void pffastconv_free(void *);
162*3f1979aaSAndroid Build Coastguard Worker 
163*3f1979aaSAndroid Build Coastguard Worker   /* return 4 or 1 wether support SSE/Altivec instructions was enabled when building pffft.c */
164*3f1979aaSAndroid Build Coastguard Worker   int pffastconv_simd_size();
165*3f1979aaSAndroid Build Coastguard Worker 
166*3f1979aaSAndroid Build Coastguard Worker 
167*3f1979aaSAndroid Build Coastguard Worker #ifdef __cplusplus
168*3f1979aaSAndroid Build Coastguard Worker }
169*3f1979aaSAndroid Build Coastguard Worker #endif
170*3f1979aaSAndroid Build Coastguard Worker 
171*3f1979aaSAndroid Build Coastguard Worker #endif /* PFFASTCONV_H */
172