1 /* Copyright (c) 2019 Hayati Ayguen ( [email protected] ) 2 3 Redistribution and use of the Software in source and binary forms, 4 with or without modification, is permitted provided that the 5 following conditions are met: 6 7 - Neither the names of PFFFT, PFFASTCONV, nor the names of its 8 sponsors or contributors may be used to endorse or promote products 9 derived from this Software without specific prior written permission. 10 11 - Redistributions of source code must retain the above copyright 12 notices, this list of conditions, and the disclaimer below. 13 14 - Redistributions in binary form must reproduce the above copyright 15 notice, this list of conditions, and the disclaimer below in the 16 documentation and/or other materials provided with the 17 distribution. 18 19 THIS SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 20 EXPRESS OR IMPLIED, INCLUDING, BUT NOT LIMITED TO THE WARRANTIES OF 21 MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 22 NONINFRINGEMENT. IN NO EVENT SHALL THE CONTRIBUTORS OR COPYRIGHT 23 HOLDERS BE LIABLE FOR ANY CLAIM, INDIRECT, INCIDENTAL, SPECIAL, 24 EXEMPLARY, OR CONSEQUENTIAL DAMAGES OR OTHER LIABILITY, WHETHER IN AN 25 ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN 26 CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS WITH THE 27 SOFTWARE. 28 */ 29 30 /* 31 PFFASTCONV : a Pretty Fast Fast Convolution 32 33 This is basically the implementation of fast convolution, 34 utilizing the FFT (pffft). 35 36 Restrictions: 37 38 - 1D transforms only, with 32-bit single precision. 39 40 - all (float*) pointers in the functions below are expected to 41 have an "simd-compatible" alignment, that is 16 bytes on x86 and 42 powerpc CPUs. 43 44 You can allocate such buffers with the functions 45 pffft_aligned_malloc / pffft_aligned_free (or with stuff like 46 posix_memalign..) 47 48 */ 49 50 #ifndef PFFASTCONV_H 51 #define PFFASTCONV_H 52 53 #include <stddef.h> /* for size_t */ 54 #include "pffft.h" 55 56 57 #ifdef __cplusplus 58 extern "C" { 59 #endif 60 61 /* opaque struct holding internal stuff 62 this struct can't be shared by many threads as it contains 63 temporary data, computed within the convolution 64 */ 65 typedef struct PFFASTCONV_Setup PFFASTCONV_Setup; 66 67 typedef enum { 68 PFFASTCONV_CPLX_INP_OUT = 1, 69 /* set when input and output is complex, 70 * with real and imag part interleaved in both vectors. 71 * input[] has inputLen complex values: 2 * inputLen floats, 72 * output[] is also written with complex values. 73 * without this flag, the input is interpreted as real vector 74 */ 75 76 PFFASTCONV_CPLX_FILTER = 2, 77 /* set when filterCoeffs is complex, 78 * with real and imag part interleaved. 79 * filterCoeffs[] has filterLen complex values: 2 * filterLen floats 80 * without this flag, the filter is interpreted as real vector 81 * ATTENTION: this is not implemented yet! 82 */ 83 84 PFFASTCONV_DIRECT_INP = 4, 85 /* set PFFASTCONV_DIRECT_INP only, when following conditions are met: 86 * 1- input vecor X must be aligned 87 * 2- (all) inputLen <= ouput blockLen 88 * 3- X must have minimum length of output BlockLen 89 * 4- the additional samples from inputLen .. BlockLen-1 90 * must contain valid small and non-NAN samples (ideally zero) 91 * 92 * this option is ignored when PFFASTCONV_CPLX_INP_OUT is set 93 */ 94 95 PFFASTCONV_DIRECT_OUT = 8, 96 /* set PFFASTCONV_DIRECT_OUT only when following conditions are met: 97 * 1- output vector Y must be aligned 98 * 2- (all) inputLen <= ouput blockLen 99 * 3- Y must have minimum length of output blockLen 100 * 101 * this option is ignored when PFFASTCONV_CPLX_INP_OUT is set 102 */ 103 104 PFFASTCONV_CPLX_SINGLE_FFT = 16, 105 /* hint to process complex data with one single FFT; 106 * default is to use 2 FFTs: one for real part, one for imag part 107 * */ 108 109 110 PFFASTCONV_SYMMETRIC = 32, 111 /* just informal, that filter is symmetric .. and filterLen is multiple of 8 */ 112 113 PFFASTCONV_CORRELATION = 64, 114 /* filterCoeffs[] of pffastconv_new_setup are for correlation; 115 * thus, do not flip them for the internal fft calculation 116 * - as necessary for the fast convolution */ 117 118 } pffastconv_flags_t; 119 120 /* 121 prepare for performing fast convolution(s) of 'filterLen' with input 'blockLen'. 122 The output 'blockLen' might be bigger to allow the fast convolution. 123 124 'flags' are bitmask over the 'pffastconv_flags_t' enum. 125 126 PFFASTCONV_Setup structure can't be shared accross multiple filters 127 or concurrent threads. 128 */ 129 PFFASTCONV_Setup * pffastconv_new_setup( const float * filterCoeffs, int filterLen, int * blockLen, int flags ); 130 131 void pffastconv_destroy_setup(PFFASTCONV_Setup *); 132 133 /* 134 Perform the fast convolution. 135 136 'input' and 'output' don't need to be aligned - unless any of 137 PFFASTCONV_DIRECT_INP or PFFASTCONV_DIRECT_OUT is set in 'flags'. 138 139 inputLen > output 'blockLen' (from pffastconv_new_setup()) is allowed. 140 in this case, multiple FFTs are called internally, to process the 141 input[]. 142 143 'output' vector must have size >= (inputLen - filterLen + 1) 144 145 set bool option 'applyFlush' to process the full input[]. 146 with this option, 'tail samples' of input are also processed. 147 This might be inefficient, because the FFT is called to produce 148 few(er) output samples, than possible. 149 This option is useful to process the last samples of an input (file) 150 or to reduce latency. 151 152 return value is the number of produced samples in output[]. 153 the same amount of samples is processed from input[]. to continue 154 processing, the caller must save/move the remaining samples of 155 input[]. 156 157 */ 158 int pffastconv_apply(PFFASTCONV_Setup * s, const float *input, int inputLen, float *output, int applyFlush); 159 160 void *pffastconv_malloc(size_t nb_bytes); 161 void pffastconv_free(void *); 162 163 /* return 4 or 1 wether support SSE/Altivec instructions was enabled when building pffft.c */ 164 int pffastconv_simd_size(); 165 166 167 #ifdef __cplusplus 168 } 169 #endif 170 171 #endif /* PFFASTCONV_H */ 172