xref: /nrf52832-nimble/rt-thread/components/CMSIS/Include/arm_math.h (revision 042d53a763ad75cb1465103098bb88c245d95138)
1 /* ----------------------------------------------------------------------
2 * Copyright (C) 2010-2013 ARM Limited. All rights reserved.
3 *
4 * $Date:        17. January 2013
5 * $Revision:    V1.4.1
6 *
7 * Project:      CMSIS DSP Library
8 * Title:        arm_math.h
9 *
10 * Description:  Public header file for CMSIS DSP Library
11 *
12 * Target Processor: Cortex-M4/Cortex-M3/Cortex-M0
13 *
14 * Redistribution and use in source and binary forms, with or without
15 * modification, are permitted provided that the following conditions
16 * are met:
17 *   - Redistributions of source code must retain the above copyright
18 *     notice, this list of conditions and the following disclaimer.
19 *   - Redistributions in binary form must reproduce the above copyright
20 *     notice, this list of conditions and the following disclaimer in
21 *     the documentation and/or other materials provided with the
22 *     distribution.
23 *   - Neither the name of ARM LIMITED nor the names of its contributors
24 *     may be used to endorse or promote products derived from this
25 *     software without specific prior written permission.
26 *
27 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
28 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
29 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
30 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
31 * COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
32 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
33 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
34 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
35 * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
36 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
37 * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
38 * POSSIBILITY OF SUCH DAMAGE.
39  * -------------------------------------------------------------------- */
40 
41 /**
42    \mainpage CMSIS DSP Software Library
43    *
44    * <b>Introduction</b>
45    *
46    * This user manual describes the CMSIS DSP software library,
47    * a suite of common signal processing functions for use on Cortex-M processor based devices.
48    *
49    * The library is divided into a number of functions each covering a specific category:
50    * - Basic math functions
51    * - Fast math functions
52    * - Complex math functions
53    * - Filters
54    * - Matrix functions
55    * - Transforms
56    * - Motor control functions
57    * - Statistical functions
58    * - Support functions
59    * - Interpolation functions
60    *
61    * The library has separate functions for operating on 8-bit integers, 16-bit integers,
62    * 32-bit integer and 32-bit floating-point values.
63    *
64    * <b>Using the Library</b>
65    *
66    * The library installer contains prebuilt versions of the libraries in the <code>Lib</code> folder.
67    * - arm_cortexM4lf_math.lib (Little endian and Floating Point Unit on Cortex-M4)
68    * - arm_cortexM4bf_math.lib (Big endian and Floating Point Unit on Cortex-M4)
69    * - arm_cortexM4l_math.lib (Little endian on Cortex-M4)
70    * - arm_cortexM4b_math.lib (Big endian on Cortex-M4)
71    * - arm_cortexM3l_math.lib (Little endian on Cortex-M3)
72    * - arm_cortexM3b_math.lib (Big endian on Cortex-M3)
73    * - arm_cortexM0l_math.lib (Little endian on Cortex-M0)
74    * - arm_cortexM0b_math.lib (Big endian on Cortex-M3)
75    *
76    * The library functions are declared in the public file <code>arm_math.h</code> which is placed in the <code>Include</code> folder.
77    * Simply include this file and link the appropriate library in the application and begin calling the library functions. The Library supports single
78    * public header file <code> arm_math.h</code> for Cortex-M4/M3/M0 with little endian and big endian. Same header file will be used for floating point unit(FPU) variants.
79    * Define the appropriate pre processor MACRO ARM_MATH_CM4 or  ARM_MATH_CM3 or
80    * ARM_MATH_CM0 or ARM_MATH_CM0PLUS depending on the target processor in the application.
81    *
82    * <b>Examples</b>
83    *
84    * The library ships with a number of examples which demonstrate how to use the library functions.
85    *
86    * <b>Toolchain Support</b>
87    *
88    * The library has been developed and tested with MDK-ARM version 4.60.
89    * The library is being tested in GCC and IAR toolchains and updates on this activity will be made available shortly.
90    *
91    * <b>Building the Library</b>
92    *
93    * The library installer contains project files to re build libraries on MDK Tool chain in the <code>CMSIS\\DSP_Lib\\Source\\ARM</code> folder.
94    * - arm_cortexM0b_math.uvproj
95    * - arm_cortexM0l_math.uvproj
96    * - arm_cortexM3b_math.uvproj
97    * - arm_cortexM3l_math.uvproj
98    * - arm_cortexM4b_math.uvproj
99    * - arm_cortexM4l_math.uvproj
100    * - arm_cortexM4bf_math.uvproj
101    * - arm_cortexM4lf_math.uvproj
102    *
103    *
104    * The project can be built by opening the appropriate project in MDK-ARM 4.60 chain and defining the optional pre processor MACROs detailed above.
105    *
106    * <b>Pre-processor Macros</b>
107    *
108    * Each library project have differant pre-processor macros.
109    *
110    * - UNALIGNED_SUPPORT_DISABLE:
111    *
112    * Define macro UNALIGNED_SUPPORT_DISABLE, If the silicon does not support unaligned memory access
113    *
114    * - ARM_MATH_BIG_ENDIAN:
115    *
116    * Define macro ARM_MATH_BIG_ENDIAN to build the library for big endian targets. By default library builds for little endian targets.
117    *
118    * - ARM_MATH_MATRIX_CHECK:
119    *
120    * Define macro ARM_MATH_MATRIX_CHECK for checking on the input and output sizes of matrices
121    *
122    * - ARM_MATH_ROUNDING:
123    *
124    * Define macro ARM_MATH_ROUNDING for rounding on support functions
125    *
126    * - ARM_MATH_CMx:
127    *
128    * Define macro ARM_MATH_CM4 for building the library on Cortex-M4 target, ARM_MATH_CM3 for building library on Cortex-M3 target
129    * and ARM_MATH_CM0 for building library on cortex-M0 target, ARM_MATH_CM0PLUS for building library on cortex-M0+ target.
130    *
131    * - __FPU_PRESENT:
132    *
133    * Initialize macro __FPU_PRESENT = 1 when building on FPU supported Targets. Enable this macro for M4bf and M4lf libraries
134    *
135    * <b>Copyright Notice</b>
136    *
137    * Copyright (C) 2010-2013 ARM Limited. All rights reserved.
138    */
139 
140 
141 /**
142  * @defgroup groupMath Basic Math Functions
143  */
144 
145 /**
146  * @defgroup groupFastMath Fast Math Functions
147  * This set of functions provides a fast approximation to sine, cosine, and square root.
148  * As compared to most of the other functions in the CMSIS math library, the fast math functions
149  * operate on individual values and not arrays.
150  * There are separate functions for Q15, Q31, and floating-point data.
151  *
152  */
153 
154 /**
155  * @defgroup groupCmplxMath Complex Math Functions
156  * This set of functions operates on complex data vectors.
157  * The data in the complex arrays is stored in an interleaved fashion
158  * (real, imag, real, imag, ...).
159  * In the API functions, the number of samples in a complex array refers
160  * to the number of complex values; the array contains twice this number of
161  * real values.
162  */
163 
164 /**
165  * @defgroup groupFilters Filtering Functions
166  */
167 
168 /**
169  * @defgroup groupMatrix Matrix Functions
170  *
171  * This set of functions provides basic matrix math operations.
172  * The functions operate on matrix data structures.  For example,
173  * the type
174  * definition for the floating-point matrix structure is shown
175  * below:
176  * <pre>
177  *     typedef struct
178  *     {
179  *       uint16_t numRows;     // number of rows of the matrix.
180  *       uint16_t numCols;     // number of columns of the matrix.
181  *       float32_t *pData;     // points to the data of the matrix.
182  *     } arm_matrix_instance_f32;
183  * </pre>
184  * There are similar definitions for Q15 and Q31 data types.
185  *
186  * The structure specifies the size of the matrix and then points to
187  * an array of data.  The array is of size <code>numRows X numCols</code>
188  * and the values are arranged in row order.  That is, the
189  * matrix element (i, j) is stored at:
190  * <pre>
191  *     pData[i*numCols + j]
192  * </pre>
193  *
194  * \par Init Functions
195  * There is an associated initialization function for each type of matrix
196  * data structure.
197  * The initialization function sets the values of the internal structure fields.
198  * Refer to the function <code>arm_mat_init_f32()</code>, <code>arm_mat_init_q31()</code>
199  * and <code>arm_mat_init_q15()</code> for floating-point, Q31 and Q15 types,  respectively.
200  *
201  * \par
202  * Use of the initialization function is optional. However, if initialization function is used
203  * then the instance structure cannot be placed into a const data section.
204  * To place the instance structure in a const data
205  * section, manually initialize the data structure.  For example:
206  * <pre>
207  * <code>arm_matrix_instance_f32 S = {nRows, nColumns, pData};</code>
208  * <code>arm_matrix_instance_q31 S = {nRows, nColumns, pData};</code>
209  * <code>arm_matrix_instance_q15 S = {nRows, nColumns, pData};</code>
210  * </pre>
211  * where <code>nRows</code> specifies the number of rows, <code>nColumns</code>
212  * specifies the number of columns, and <code>pData</code> points to the
213  * data array.
214  *
215  * \par Size Checking
216  * By default all of the matrix functions perform size checking on the input and
217  * output matrices.  For example, the matrix addition function verifies that the
218  * two input matrices and the output matrix all have the same number of rows and
219  * columns.  If the size check fails the functions return:
220  * <pre>
221  *     ARM_MATH_SIZE_MISMATCH
222  * </pre>
223  * Otherwise the functions return
224  * <pre>
225  *     ARM_MATH_SUCCESS
226  * </pre>
227  * There is some overhead associated with this matrix size checking.
228  * The matrix size checking is enabled via the \#define
229  * <pre>
230  *     ARM_MATH_MATRIX_CHECK
231  * </pre>
232  * within the library project settings.  By default this macro is defined
233  * and size checking is enabled.  By changing the project settings and
234  * undefining this macro size checking is eliminated and the functions
235  * run a bit faster.  With size checking disabled the functions always
236  * return <code>ARM_MATH_SUCCESS</code>.
237  */
238 
239 /**
240  * @defgroup groupTransforms Transform Functions
241  */
242 
243 /**
244  * @defgroup groupController Controller Functions
245  */
246 
247 /**
248  * @defgroup groupStats Statistics Functions
249  */
250 /**
251  * @defgroup groupSupport Support Functions
252  */
253 
254 /**
255  * @defgroup groupInterpolation Interpolation Functions
256  * These functions perform 1- and 2-dimensional interpolation of data.
257  * Linear interpolation is used for 1-dimensional data and
258  * bilinear interpolation is used for 2-dimensional data.
259  */
260 
261 /**
262  * @defgroup groupExamples Examples
263  */
264 #ifndef _ARM_MATH_H
265 #define _ARM_MATH_H
266 
267 #define __CMSIS_GENERIC         /* disable NVIC and Systick functions */
268 
269 #if defined (ARM_MATH_CM4)
270 #include "core_cm4.h"
271 #elif defined (ARM_MATH_CM3)
272 #include "core_cm3.h"
273 #elif defined (ARM_MATH_CM0)
274 #include "core_cm0.h"
275 #define ARM_MATH_CM0_FAMILY
276 #elif defined (ARM_MATH_CM0PLUS)
277 #include "core_cm0plus.h"
278 #define ARM_MATH_CM0_FAMILY
279 #else
280 #include "ARMCM4.h"
281 #warning "Define either ARM_MATH_CM4 OR ARM_MATH_CM3...By Default building on ARM_MATH_CM4....."
282 #endif
283 
284 #undef  __CMSIS_GENERIC         /* enable NVIC and Systick functions */
285 #include "string.h"
286 #include "math.h"
287 #ifdef	__cplusplus
288 extern "C"
289 {
290 #endif
291 
292 
293   /**
294    * @brief Macros required for reciprocal calculation in Normalized LMS
295    */
296 
297 #define DELTA_Q31 			(0x100)
298 #define DELTA_Q15 			0x5
299 #define INDEX_MASK 			0x0000003F
300 #ifndef PI
301 #define PI					3.14159265358979f
302 #endif
303 
304   /**
305    * @brief Macros required for SINE and COSINE Fast math approximations
306    */
307 
308 #define TABLE_SIZE			256
309 #define TABLE_SPACING_Q31	0x800000
310 #define TABLE_SPACING_Q15	0x80
311 
312   /**
313    * @brief Macros required for SINE and COSINE Controller functions
314    */
315   /* 1.31(q31) Fixed value of 2/360 */
316   /* -1 to +1 is divided into 360 values so total spacing is (2/360) */
317 #define INPUT_SPACING			0xB60B61
318 
319   /**
320    * @brief Macro for Unaligned Support
321    */
322 #ifndef UNALIGNED_SUPPORT_DISABLE
323     #define ALIGN4
324 #else
325   #if defined  (__GNUC__)
326     #define ALIGN4 __attribute__((aligned(4)))
327   #else
328     #define ALIGN4 __align(4)
329   #endif
330 #endif	/*	#ifndef UNALIGNED_SUPPORT_DISABLE	*/
331 
332   /**
333    * @brief Error status returned by some functions in the library.
334    */
335 
336   typedef enum
337   {
338     ARM_MATH_SUCCESS = 0,                /**< No error */
339     ARM_MATH_ARGUMENT_ERROR = -1,        /**< One or more arguments are incorrect */
340     ARM_MATH_LENGTH_ERROR = -2,          /**< Length of data buffer is incorrect */
341     ARM_MATH_SIZE_MISMATCH = -3,         /**< Size of matrices is not compatible with the operation. */
342     ARM_MATH_NANINF = -4,                /**< Not-a-number (NaN) or infinity is generated */
343     ARM_MATH_SINGULAR = -5,              /**< Generated by matrix inversion if the input matrix is singular and cannot be inverted. */
344     ARM_MATH_TEST_FAILURE = -6           /**< Test Failed  */
345   } arm_status;
346 
347   /**
348    * @brief 8-bit fractional data type in 1.7 format.
349    */
350   typedef int8_t q7_t;
351 
352   /**
353    * @brief 16-bit fractional data type in 1.15 format.
354    */
355   typedef int16_t q15_t;
356 
357   /**
358    * @brief 32-bit fractional data type in 1.31 format.
359    */
360   typedef int32_t q31_t;
361 
362   /**
363    * @brief 64-bit fractional data type in 1.63 format.
364    */
365   typedef int64_t q63_t;
366 
367   /**
368    * @brief 32-bit floating-point type definition.
369    */
370   typedef float float32_t;
371 
372   /**
373    * @brief 64-bit floating-point type definition.
374    */
375   typedef double float64_t;
376 
377   /**
378    * @brief definition to read/write two 16 bit values.
379    */
380 #if defined __CC_ARM
381 #define __SIMD32_TYPE int32_t __packed
382 #define CMSIS_UNUSED __attribute__((unused))
383 #elif defined __ICCARM__
384 #define CMSIS_UNUSED
385 #define __SIMD32_TYPE int32_t __packed
386 #elif defined __GNUC__
387 #define __SIMD32_TYPE int32_t
388 #define CMSIS_UNUSED __attribute__((unused))
389 #else
390 #error Unknown compiler
391 #endif
392 
393 #define __SIMD32(addr)  (*(__SIMD32_TYPE **) & (addr))
394 #define __SIMD32_CONST(addr)  ((__SIMD32_TYPE *)(addr))
395 
396 #define _SIMD32_OFFSET(addr)  (*(__SIMD32_TYPE *)  (addr))
397 
398 #define __SIMD64(addr)  (*(int64_t **) & (addr))
399 
400 #if defined (ARM_MATH_CM3) || defined (ARM_MATH_CM0_FAMILY)
401   /**
402    * @brief definition to pack two 16 bit values.
403    */
404 #define __PKHBT(ARG1, ARG2, ARG3)      ( (((int32_t)(ARG1) <<  0) & (int32_t)0x0000FFFF) | \
405                                          (((int32_t)(ARG2) << ARG3) & (int32_t)0xFFFF0000)  )
406 #define __PKHTB(ARG1, ARG2, ARG3)      ( (((int32_t)(ARG1) <<  0) & (int32_t)0xFFFF0000) | \
407                                          (((int32_t)(ARG2) >> ARG3) & (int32_t)0x0000FFFF)  )
408 
409 #endif
410 
411 
412    /**
413    * @brief definition to pack four 8 bit values.
414    */
415 #ifndef ARM_MATH_BIG_ENDIAN
416 
417 #define __PACKq7(v0,v1,v2,v3) ( (((int32_t)(v0) <<  0) & (int32_t)0x000000FF) |	\
418                                 (((int32_t)(v1) <<  8) & (int32_t)0x0000FF00) |	\
419 							    (((int32_t)(v2) << 16) & (int32_t)0x00FF0000) |	\
420 							    (((int32_t)(v3) << 24) & (int32_t)0xFF000000)  )
421 #else
422 
423 #define __PACKq7(v0,v1,v2,v3) ( (((int32_t)(v3) <<  0) & (int32_t)0x000000FF) |	\
424                                 (((int32_t)(v2) <<  8) & (int32_t)0x0000FF00) |	\
425 							    (((int32_t)(v1) << 16) & (int32_t)0x00FF0000) |	\
426 							    (((int32_t)(v0) << 24) & (int32_t)0xFF000000)  )
427 
428 #endif
429 
430 
431   /**
432    * @brief Clips Q63 to Q31 values.
433    */
434   static __INLINE q31_t clip_q63_to_q31(
435   q63_t x)
436   {
437     return ((q31_t) (x >> 32) != ((q31_t) x >> 31)) ?
438       ((0x7FFFFFFF ^ ((q31_t) (x >> 63)))) : (q31_t) x;
439   }
440 
441   /**
442    * @brief Clips Q63 to Q15 values.
443    */
444   static __INLINE q15_t clip_q63_to_q15(
445   q63_t x)
446   {
447     return ((q31_t) (x >> 32) != ((q31_t) x >> 31)) ?
448       ((0x7FFF ^ ((q15_t) (x >> 63)))) : (q15_t) (x >> 15);
449   }
450 
451   /**
452    * @brief Clips Q31 to Q7 values.
453    */
454   static __INLINE q7_t clip_q31_to_q7(
455   q31_t x)
456   {
457     return ((q31_t) (x >> 24) != ((q31_t) x >> 23)) ?
458       ((0x7F ^ ((q7_t) (x >> 31)))) : (q7_t) x;
459   }
460 
461   /**
462    * @brief Clips Q31 to Q15 values.
463    */
464   static __INLINE q15_t clip_q31_to_q15(
465   q31_t x)
466   {
467     return ((q31_t) (x >> 16) != ((q31_t) x >> 15)) ?
468       ((0x7FFF ^ ((q15_t) (x >> 31)))) : (q15_t) x;
469   }
470 
471   /**
472    * @brief Multiplies 32 X 64 and returns 32 bit result in 2.30 format.
473    */
474 
475   static __INLINE q63_t mult32x64(
476   q63_t x,
477   q31_t y)
478   {
479     return ((((q63_t) (x & 0x00000000FFFFFFFF) * y) >> 32) +
480             (((q63_t) (x >> 32) * y)));
481   }
482 
483 
484 #if defined (ARM_MATH_CM0_FAMILY) && defined ( __CC_ARM   )
485 #define __CLZ __clz
486 #endif
487 
488 #if defined (ARM_MATH_CM0_FAMILY) && ((defined (__ICCARM__)) ||(defined (__GNUC__)) || defined (__TASKING__) )
489 
490   static __INLINE uint32_t __CLZ(
491   q31_t data);
492 
493 
494   static __INLINE uint32_t __CLZ(
495   q31_t data)
496   {
497     uint32_t count = 0;
498     uint32_t mask = 0x80000000;
499 
500     while((data & mask) == 0)
501     {
502       count += 1u;
503       mask = mask >> 1u;
504     }
505 
506     return (count);
507 
508   }
509 
510 #endif
511 
512   /**
513    * @brief Function to Calculates 1/in (reciprocal) value of Q31 Data type.
514    */
515 
516   static __INLINE uint32_t arm_recip_q31(
517   q31_t in,
518   q31_t * dst,
519   q31_t * pRecipTable)
520   {
521 
522     uint32_t out, tempVal;
523     uint32_t index, i;
524     uint32_t signBits;
525 
526     if(in > 0)
527     {
528       signBits = __CLZ(in) - 1;
529     }
530     else
531     {
532       signBits = __CLZ(-in) - 1;
533     }
534 
535     /* Convert input sample to 1.31 format */
536     in = in << signBits;
537 
538     /* calculation of index for initial approximated Val */
539     index = (uint32_t) (in >> 24u);
540     index = (index & INDEX_MASK);
541 
542     /* 1.31 with exp 1 */
543     out = pRecipTable[index];
544 
545     /* calculation of reciprocal value */
546     /* running approximation for two iterations */
547     for (i = 0u; i < 2u; i++)
548     {
549       tempVal = (q31_t) (((q63_t) in * out) >> 31u);
550       tempVal = 0x7FFFFFFF - tempVal;
551       /*      1.31 with exp 1 */
552       //out = (q31_t) (((q63_t) out * tempVal) >> 30u);
553       out = (q31_t) clip_q63_to_q31(((q63_t) out * tempVal) >> 30u);
554     }
555 
556     /* write output */
557     *dst = out;
558 
559     /* return num of signbits of out = 1/in value */
560     return (signBits + 1u);
561 
562   }
563 
564   /**
565    * @brief Function to Calculates 1/in (reciprocal) value of Q15 Data type.
566    */
567   static __INLINE uint32_t arm_recip_q15(
568   q15_t in,
569   q15_t * dst,
570   q15_t * pRecipTable)
571   {
572 
573     uint32_t out = 0, tempVal = 0;
574     uint32_t index = 0, i = 0;
575     uint32_t signBits = 0;
576 
577     if(in > 0)
578     {
579       signBits = __CLZ(in) - 17;
580     }
581     else
582     {
583       signBits = __CLZ(-in) - 17;
584     }
585 
586     /* Convert input sample to 1.15 format */
587     in = in << signBits;
588 
589     /* calculation of index for initial approximated Val */
590     index = in >> 8;
591     index = (index & INDEX_MASK);
592 
593     /*      1.15 with exp 1  */
594     out = pRecipTable[index];
595 
596     /* calculation of reciprocal value */
597     /* running approximation for two iterations */
598     for (i = 0; i < 2; i++)
599     {
600       tempVal = (q15_t) (((q31_t) in * out) >> 15);
601       tempVal = 0x7FFF - tempVal;
602       /*      1.15 with exp 1 */
603       out = (q15_t) (((q31_t) out * tempVal) >> 14);
604     }
605 
606     /* write output */
607     *dst = out;
608 
609     /* return num of signbits of out = 1/in value */
610     return (signBits + 1);
611 
612   }
613 
614 
615   /*
616    * @brief C custom defined intrinisic function for only M0 processors
617    */
618 #if defined(ARM_MATH_CM0_FAMILY)
619 
620   static __INLINE q31_t __SSAT(
621   q31_t x,
622   uint32_t y)
623   {
624     int32_t posMax, negMin;
625     uint32_t i;
626 
627     posMax = 1;
628     for (i = 0; i < (y - 1); i++)
629     {
630       posMax = posMax * 2;
631     }
632 
633     if(x > 0)
634     {
635       posMax = (posMax - 1);
636 
637       if(x > posMax)
638       {
639         x = posMax;
640       }
641     }
642     else
643     {
644       negMin = -posMax;
645 
646       if(x < negMin)
647       {
648         x = negMin;
649       }
650     }
651     return (x);
652 
653 
654   }
655 
656 #endif /* end of ARM_MATH_CM0_FAMILY */
657 
658 
659 
660   /*
661    * @brief C custom defined intrinsic function for M3 and M0 processors
662    */
663 #if defined (ARM_MATH_CM3) || defined (ARM_MATH_CM0_FAMILY)
664 
665   /*
666    * @brief C custom defined QADD8 for M3 and M0 processors
667    */
668   static __INLINE q31_t __QADD8(
669   q31_t x,
670   q31_t y)
671   {
672 
673     q31_t sum;
674     q7_t r, s, t, u;
675 
676     r = (q7_t) x;
677     s = (q7_t) y;
678 
679     r = __SSAT((q31_t) (r + s), 8);
680     s = __SSAT(((q31_t) (((x << 16) >> 24) + ((y << 16) >> 24))), 8);
681     t = __SSAT(((q31_t) (((x << 8) >> 24) + ((y << 8) >> 24))), 8);
682     u = __SSAT(((q31_t) ((x >> 24) + (y >> 24))), 8);
683 
684     sum =
685       (((q31_t) u << 24) & 0xFF000000) | (((q31_t) t << 16) & 0x00FF0000) |
686       (((q31_t) s << 8) & 0x0000FF00) | (r & 0x000000FF);
687 
688     return sum;
689 
690   }
691 
692   /*
693    * @brief C custom defined QSUB8 for M3 and M0 processors
694    */
695   static __INLINE q31_t __QSUB8(
696   q31_t x,
697   q31_t y)
698   {
699 
700     q31_t sum;
701     q31_t r, s, t, u;
702 
703     r = (q7_t) x;
704     s = (q7_t) y;
705 
706     r = __SSAT((r - s), 8);
707     s = __SSAT(((q31_t) (((x << 16) >> 24) - ((y << 16) >> 24))), 8) << 8;
708     t = __SSAT(((q31_t) (((x << 8) >> 24) - ((y << 8) >> 24))), 8) << 16;
709     u = __SSAT(((q31_t) ((x >> 24) - (y >> 24))), 8) << 24;
710 
711     sum =
712       (u & 0xFF000000) | (t & 0x00FF0000) | (s & 0x0000FF00) | (r &
713                                                                 0x000000FF);
714 
715     return sum;
716   }
717 
718   /*
719    * @brief C custom defined QADD16 for M3 and M0 processors
720    */
721 
722   /*
723    * @brief C custom defined QADD16 for M3 and M0 processors
724    */
725   static __INLINE q31_t __QADD16(
726   q31_t x,
727   q31_t y)
728   {
729 
730     q31_t sum;
731     q31_t r, s;
732 
733     r = (short) x;
734     s = (short) y;
735 
736     r = __SSAT(r + s, 16);
737     s = __SSAT(((q31_t) ((x >> 16) + (y >> 16))), 16) << 16;
738 
739     sum = (s & 0xFFFF0000) | (r & 0x0000FFFF);
740 
741     return sum;
742 
743   }
744 
745   /*
746    * @brief C custom defined SHADD16 for M3 and M0 processors
747    */
748   static __INLINE q31_t __SHADD16(
749   q31_t x,
750   q31_t y)
751   {
752 
753     q31_t sum;
754     q31_t r, s;
755 
756     r = (short) x;
757     s = (short) y;
758 
759     r = ((r >> 1) + (s >> 1));
760     s = ((q31_t) ((x >> 17) + (y >> 17))) << 16;
761 
762     sum = (s & 0xFFFF0000) | (r & 0x0000FFFF);
763 
764     return sum;
765 
766   }
767 
768   /*
769    * @brief C custom defined QSUB16 for M3 and M0 processors
770    */
771   static __INLINE q31_t __QSUB16(
772   q31_t x,
773   q31_t y)
774   {
775 
776     q31_t sum;
777     q31_t r, s;
778 
779     r = (short) x;
780     s = (short) y;
781 
782     r = __SSAT(r - s, 16);
783     s = __SSAT(((q31_t) ((x >> 16) - (y >> 16))), 16) << 16;
784 
785     sum = (s & 0xFFFF0000) | (r & 0x0000FFFF);
786 
787     return sum;
788   }
789 
790   /*
791    * @brief C custom defined SHSUB16 for M3 and M0 processors
792    */
793   static __INLINE q31_t __SHSUB16(
794   q31_t x,
795   q31_t y)
796   {
797 
798     q31_t diff;
799     q31_t r, s;
800 
801     r = (short) x;
802     s = (short) y;
803 
804     r = ((r >> 1) - (s >> 1));
805     s = (((x >> 17) - (y >> 17)) << 16);
806 
807     diff = (s & 0xFFFF0000) | (r & 0x0000FFFF);
808 
809     return diff;
810   }
811 
812   /*
813    * @brief C custom defined QASX for M3 and M0 processors
814    */
815   static __INLINE q31_t __QASX(
816   q31_t x,
817   q31_t y)
818   {
819 
820     q31_t sum = 0;
821 
822     sum =
823       ((sum +
824         clip_q31_to_q15((q31_t) ((short) (x >> 16) + (short) y))) << 16) +
825       clip_q31_to_q15((q31_t) ((short) x - (short) (y >> 16)));
826 
827     return sum;
828   }
829 
830   /*
831    * @brief C custom defined SHASX for M3 and M0 processors
832    */
833   static __INLINE q31_t __SHASX(
834   q31_t x,
835   q31_t y)
836   {
837 
838     q31_t sum;
839     q31_t r, s;
840 
841     r = (short) x;
842     s = (short) y;
843 
844     r = ((r >> 1) - (y >> 17));
845     s = (((x >> 17) + (s >> 1)) << 16);
846 
847     sum = (s & 0xFFFF0000) | (r & 0x0000FFFF);
848 
849     return sum;
850   }
851 
852 
853   /*
854    * @brief C custom defined QSAX for M3 and M0 processors
855    */
856   static __INLINE q31_t __QSAX(
857   q31_t x,
858   q31_t y)
859   {
860 
861     q31_t sum = 0;
862 
863     sum =
864       ((sum +
865         clip_q31_to_q15((q31_t) ((short) (x >> 16) - (short) y))) << 16) +
866       clip_q31_to_q15((q31_t) ((short) x + (short) (y >> 16)));
867 
868     return sum;
869   }
870 
871   /*
872    * @brief C custom defined SHSAX for M3 and M0 processors
873    */
874   static __INLINE q31_t __SHSAX(
875   q31_t x,
876   q31_t y)
877   {
878 
879     q31_t sum;
880     q31_t r, s;
881 
882     r = (short) x;
883     s = (short) y;
884 
885     r = ((r >> 1) + (y >> 17));
886     s = (((x >> 17) - (s >> 1)) << 16);
887 
888     sum = (s & 0xFFFF0000) | (r & 0x0000FFFF);
889 
890     return sum;
891   }
892 
893   /*
894    * @brief C custom defined SMUSDX for M3 and M0 processors
895    */
896   static __INLINE q31_t __SMUSDX(
897   q31_t x,
898   q31_t y)
899   {
900 
901     return ((q31_t) (((short) x * (short) (y >> 16)) -
902                      ((short) (x >> 16) * (short) y)));
903   }
904 
905   /*
906    * @brief C custom defined SMUADX for M3 and M0 processors
907    */
908   static __INLINE q31_t __SMUADX(
909   q31_t x,
910   q31_t y)
911   {
912 
913     return ((q31_t) (((short) x * (short) (y >> 16)) +
914                      ((short) (x >> 16) * (short) y)));
915   }
916 
917   /*
918    * @brief C custom defined QADD for M3 and M0 processors
919    */
920   static __INLINE q31_t __QADD(
921   q31_t x,
922   q31_t y)
923   {
924     return clip_q63_to_q31((q63_t) x + y);
925   }
926 
927   /*
928    * @brief C custom defined QSUB for M3 and M0 processors
929    */
930   static __INLINE q31_t __QSUB(
931   q31_t x,
932   q31_t y)
933   {
934     return clip_q63_to_q31((q63_t) x - y);
935   }
936 
937   /*
938    * @brief C custom defined SMLAD for M3 and M0 processors
939    */
940   static __INLINE q31_t __SMLAD(
941   q31_t x,
942   q31_t y,
943   q31_t sum)
944   {
945 
946     return (sum + ((short) (x >> 16) * (short) (y >> 16)) +
947             ((short) x * (short) y));
948   }
949 
950   /*
951    * @brief C custom defined SMLADX for M3 and M0 processors
952    */
953   static __INLINE q31_t __SMLADX(
954   q31_t x,
955   q31_t y,
956   q31_t sum)
957   {
958 
959     return (sum + ((short) (x >> 16) * (short) (y)) +
960             ((short) x * (short) (y >> 16)));
961   }
962 
963   /*
964    * @brief C custom defined SMLSDX for M3 and M0 processors
965    */
966   static __INLINE q31_t __SMLSDX(
967   q31_t x,
968   q31_t y,
969   q31_t sum)
970   {
971 
972     return (sum - ((short) (x >> 16) * (short) (y)) +
973             ((short) x * (short) (y >> 16)));
974   }
975 
976   /*
977    * @brief C custom defined SMLALD for M3 and M0 processors
978    */
979   static __INLINE q63_t __SMLALD(
980   q31_t x,
981   q31_t y,
982   q63_t sum)
983   {
984 
985     return (sum + ((short) (x >> 16) * (short) (y >> 16)) +
986             ((short) x * (short) y));
987   }
988 
989   /*
990    * @brief C custom defined SMLALDX for M3 and M0 processors
991    */
992   static __INLINE q63_t __SMLALDX(
993   q31_t x,
994   q31_t y,
995   q63_t sum)
996   {
997 
998     return (sum + ((short) (x >> 16) * (short) y)) +
999       ((short) x * (short) (y >> 16));
1000   }
1001 
1002   /*
1003    * @brief C custom defined SMUAD for M3 and M0 processors
1004    */
1005   static __INLINE q31_t __SMUAD(
1006   q31_t x,
1007   q31_t y)
1008   {
1009 
1010     return (((x >> 16) * (y >> 16)) +
1011             (((x << 16) >> 16) * ((y << 16) >> 16)));
1012   }
1013 
1014   /*
1015    * @brief C custom defined SMUSD for M3 and M0 processors
1016    */
1017   static __INLINE q31_t __SMUSD(
1018   q31_t x,
1019   q31_t y)
1020   {
1021 
1022     return (-((x >> 16) * (y >> 16)) +
1023             (((x << 16) >> 16) * ((y << 16) >> 16)));
1024   }
1025 
1026 
1027   /*
1028    * @brief C custom defined SXTB16 for M3 and M0 processors
1029    */
1030   static __INLINE q31_t __SXTB16(
1031   q31_t x)
1032   {
1033 
1034     return ((((x << 24) >> 24) & 0x0000FFFF) |
1035             (((x << 8) >> 8) & 0xFFFF0000));
1036   }
1037 
1038 
1039 #endif /* defined (ARM_MATH_CM3) || defined (ARM_MATH_CM0_FAMILY) */
1040 
1041 
1042   /**
1043    * @brief Instance structure for the Q7 FIR filter.
1044    */
1045   typedef struct
1046   {
1047     uint16_t numTaps;        /**< number of filter coefficients in the filter. */
1048     q7_t *pState;            /**< points to the state variable array. The array is of length numTaps+blockSize-1. */
1049     q7_t *pCoeffs;           /**< points to the coefficient array. The array is of length numTaps.*/
1050   } arm_fir_instance_q7;
1051 
1052   /**
1053    * @brief Instance structure for the Q15 FIR filter.
1054    */
1055   typedef struct
1056   {
1057     uint16_t numTaps;         /**< number of filter coefficients in the filter. */
1058     q15_t *pState;            /**< points to the state variable array. The array is of length numTaps+blockSize-1. */
1059     q15_t *pCoeffs;           /**< points to the coefficient array. The array is of length numTaps.*/
1060   } arm_fir_instance_q15;
1061 
1062   /**
1063    * @brief Instance structure for the Q31 FIR filter.
1064    */
1065   typedef struct
1066   {
1067     uint16_t numTaps;         /**< number of filter coefficients in the filter. */
1068     q31_t *pState;            /**< points to the state variable array. The array is of length numTaps+blockSize-1. */
1069     q31_t *pCoeffs;           /**< points to the coefficient array. The array is of length numTaps. */
1070   } arm_fir_instance_q31;
1071 
1072   /**
1073    * @brief Instance structure for the floating-point FIR filter.
1074    */
1075   typedef struct
1076   {
1077     uint16_t numTaps;     /**< number of filter coefficients in the filter. */
1078     float32_t *pState;    /**< points to the state variable array. The array is of length numTaps+blockSize-1. */
1079     float32_t *pCoeffs;   /**< points to the coefficient array. The array is of length numTaps. */
1080   } arm_fir_instance_f32;
1081 
1082 
1083   /**
1084    * @brief Processing function for the Q7 FIR filter.
1085    * @param[in] *S points to an instance of the Q7 FIR filter structure.
1086    * @param[in] *pSrc points to the block of input data.
1087    * @param[out] *pDst points to the block of output data.
1088    * @param[in] blockSize number of samples to process.
1089    * @return none.
1090    */
1091   void arm_fir_q7(
1092   const arm_fir_instance_q7 * S,
1093   q7_t * pSrc,
1094   q7_t * pDst,
1095   uint32_t blockSize);
1096 
1097 
1098   /**
1099    * @brief  Initialization function for the Q7 FIR filter.
1100    * @param[in,out] *S points to an instance of the Q7 FIR structure.
1101    * @param[in] numTaps  Number of filter coefficients in the filter.
1102    * @param[in] *pCoeffs points to the filter coefficients.
1103    * @param[in] *pState points to the state buffer.
1104    * @param[in] blockSize number of samples that are processed.
1105    * @return none
1106    */
1107   void arm_fir_init_q7(
1108   arm_fir_instance_q7 * S,
1109   uint16_t numTaps,
1110   q7_t * pCoeffs,
1111   q7_t * pState,
1112   uint32_t blockSize);
1113 
1114 
1115   /**
1116    * @brief Processing function for the Q15 FIR filter.
1117    * @param[in] *S points to an instance of the Q15 FIR structure.
1118    * @param[in] *pSrc points to the block of input data.
1119    * @param[out] *pDst points to the block of output data.
1120    * @param[in] blockSize number of samples to process.
1121    * @return none.
1122    */
1123   void arm_fir_q15(
1124   const arm_fir_instance_q15 * S,
1125   q15_t * pSrc,
1126   q15_t * pDst,
1127   uint32_t blockSize);
1128 
1129   /**
1130    * @brief Processing function for the fast Q15 FIR filter for Cortex-M3 and Cortex-M4.
1131    * @param[in] *S points to an instance of the Q15 FIR filter structure.
1132    * @param[in] *pSrc points to the block of input data.
1133    * @param[out] *pDst points to the block of output data.
1134    * @param[in] blockSize number of samples to process.
1135    * @return none.
1136    */
1137   void arm_fir_fast_q15(
1138   const arm_fir_instance_q15 * S,
1139   q15_t * pSrc,
1140   q15_t * pDst,
1141   uint32_t blockSize);
1142 
1143   /**
1144    * @brief  Initialization function for the Q15 FIR filter.
1145    * @param[in,out] *S points to an instance of the Q15 FIR filter structure.
1146    * @param[in] numTaps  Number of filter coefficients in the filter. Must be even and greater than or equal to 4.
1147    * @param[in] *pCoeffs points to the filter coefficients.
1148    * @param[in] *pState points to the state buffer.
1149    * @param[in] blockSize number of samples that are processed at a time.
1150    * @return The function returns ARM_MATH_SUCCESS if initialization was successful or ARM_MATH_ARGUMENT_ERROR if
1151    * <code>numTaps</code> is not a supported value.
1152    */
1153 
1154   arm_status arm_fir_init_q15(
1155   arm_fir_instance_q15 * S,
1156   uint16_t numTaps,
1157   q15_t * pCoeffs,
1158   q15_t * pState,
1159   uint32_t blockSize);
1160 
1161   /**
1162    * @brief Processing function for the Q31 FIR filter.
1163    * @param[in] *S points to an instance of the Q31 FIR filter structure.
1164    * @param[in] *pSrc points to the block of input data.
1165    * @param[out] *pDst points to the block of output data.
1166    * @param[in] blockSize number of samples to process.
1167    * @return none.
1168    */
1169   void arm_fir_q31(
1170   const arm_fir_instance_q31 * S,
1171   q31_t * pSrc,
1172   q31_t * pDst,
1173   uint32_t blockSize);
1174 
1175   /**
1176    * @brief Processing function for the fast Q31 FIR filter for Cortex-M3 and Cortex-M4.
1177    * @param[in] *S points to an instance of the Q31 FIR structure.
1178    * @param[in] *pSrc points to the block of input data.
1179    * @param[out] *pDst points to the block of output data.
1180    * @param[in] blockSize number of samples to process.
1181    * @return none.
1182    */
1183   void arm_fir_fast_q31(
1184   const arm_fir_instance_q31 * S,
1185   q31_t * pSrc,
1186   q31_t * pDst,
1187   uint32_t blockSize);
1188 
1189   /**
1190    * @brief  Initialization function for the Q31 FIR filter.
1191    * @param[in,out] *S points to an instance of the Q31 FIR structure.
1192    * @param[in] 	numTaps  Number of filter coefficients in the filter.
1193    * @param[in] 	*pCoeffs points to the filter coefficients.
1194    * @param[in] 	*pState points to the state buffer.
1195    * @param[in] 	blockSize number of samples that are processed at a time.
1196    * @return 		none.
1197    */
1198   void arm_fir_init_q31(
1199   arm_fir_instance_q31 * S,
1200   uint16_t numTaps,
1201   q31_t * pCoeffs,
1202   q31_t * pState,
1203   uint32_t blockSize);
1204 
1205   /**
1206    * @brief Processing function for the floating-point FIR filter.
1207    * @param[in] *S points to an instance of the floating-point FIR structure.
1208    * @param[in] *pSrc points to the block of input data.
1209    * @param[out] *pDst points to the block of output data.
1210    * @param[in] blockSize number of samples to process.
1211    * @return none.
1212    */
1213   void arm_fir_f32(
1214   const arm_fir_instance_f32 * S,
1215   float32_t * pSrc,
1216   float32_t * pDst,
1217   uint32_t blockSize);
1218 
1219   /**
1220    * @brief  Initialization function for the floating-point FIR filter.
1221    * @param[in,out] *S points to an instance of the floating-point FIR filter structure.
1222    * @param[in] 	numTaps  Number of filter coefficients in the filter.
1223    * @param[in] 	*pCoeffs points to the filter coefficients.
1224    * @param[in] 	*pState points to the state buffer.
1225    * @param[in] 	blockSize number of samples that are processed at a time.
1226    * @return    	none.
1227    */
1228   void arm_fir_init_f32(
1229   arm_fir_instance_f32 * S,
1230   uint16_t numTaps,
1231   float32_t * pCoeffs,
1232   float32_t * pState,
1233   uint32_t blockSize);
1234 
1235 
1236   /**
1237    * @brief Instance structure for the Q15 Biquad cascade filter.
1238    */
1239   typedef struct
1240   {
1241     int8_t numStages;         /**< number of 2nd order stages in the filter.  Overall order is 2*numStages. */
1242     q15_t *pState;            /**< Points to the array of state coefficients.  The array is of length 4*numStages. */
1243     q15_t *pCoeffs;           /**< Points to the array of coefficients.  The array is of length 5*numStages. */
1244     int8_t postShift;         /**< Additional shift, in bits, applied to each output sample. */
1245 
1246   } arm_biquad_casd_df1_inst_q15;
1247 
1248 
1249   /**
1250    * @brief Instance structure for the Q31 Biquad cascade filter.
1251    */
1252   typedef struct
1253   {
1254     uint32_t numStages;      /**< number of 2nd order stages in the filter.  Overall order is 2*numStages. */
1255     q31_t *pState;           /**< Points to the array of state coefficients.  The array is of length 4*numStages. */
1256     q31_t *pCoeffs;          /**< Points to the array of coefficients.  The array is of length 5*numStages. */
1257     uint8_t postShift;       /**< Additional shift, in bits, applied to each output sample. */
1258 
1259   } arm_biquad_casd_df1_inst_q31;
1260 
1261   /**
1262    * @brief Instance structure for the floating-point Biquad cascade filter.
1263    */
1264   typedef struct
1265   {
1266     uint32_t numStages;         /**< number of 2nd order stages in the filter.  Overall order is 2*numStages. */
1267     float32_t *pState;          /**< Points to the array of state coefficients.  The array is of length 4*numStages. */
1268     float32_t *pCoeffs;         /**< Points to the array of coefficients.  The array is of length 5*numStages. */
1269 
1270 
1271   } arm_biquad_casd_df1_inst_f32;
1272 
1273 
1274 
1275   /**
1276    * @brief Processing function for the Q15 Biquad cascade filter.
1277    * @param[in]  *S points to an instance of the Q15 Biquad cascade structure.
1278    * @param[in]  *pSrc points to the block of input data.
1279    * @param[out] *pDst points to the block of output data.
1280    * @param[in]  blockSize number of samples to process.
1281    * @return     none.
1282    */
1283 
1284   void arm_biquad_cascade_df1_q15(
1285   const arm_biquad_casd_df1_inst_q15 * S,
1286   q15_t * pSrc,
1287   q15_t * pDst,
1288   uint32_t blockSize);
1289 
1290   /**
1291    * @brief  Initialization function for the Q15 Biquad cascade filter.
1292    * @param[in,out] *S           points to an instance of the Q15 Biquad cascade structure.
1293    * @param[in]     numStages    number of 2nd order stages in the filter.
1294    * @param[in]     *pCoeffs     points to the filter coefficients.
1295    * @param[in]     *pState      points to the state buffer.
1296    * @param[in]     postShift    Shift to be applied to the output. Varies according to the coefficients format
1297    * @return        none
1298    */
1299 
1300   void arm_biquad_cascade_df1_init_q15(
1301   arm_biquad_casd_df1_inst_q15 * S,
1302   uint8_t numStages,
1303   q15_t * pCoeffs,
1304   q15_t * pState,
1305   int8_t postShift);
1306 
1307 
1308   /**
1309    * @brief Fast but less precise processing function for the Q15 Biquad cascade filter for Cortex-M3 and Cortex-M4.
1310    * @param[in]  *S points to an instance of the Q15 Biquad cascade structure.
1311    * @param[in]  *pSrc points to the block of input data.
1312    * @param[out] *pDst points to the block of output data.
1313    * @param[in]  blockSize number of samples to process.
1314    * @return     none.
1315    */
1316 
1317   void arm_biquad_cascade_df1_fast_q15(
1318   const arm_biquad_casd_df1_inst_q15 * S,
1319   q15_t * pSrc,
1320   q15_t * pDst,
1321   uint32_t blockSize);
1322 
1323 
1324   /**
1325    * @brief Processing function for the Q31 Biquad cascade filter
1326    * @param[in]  *S         points to an instance of the Q31 Biquad cascade structure.
1327    * @param[in]  *pSrc      points to the block of input data.
1328    * @param[out] *pDst      points to the block of output data.
1329    * @param[in]  blockSize  number of samples to process.
1330    * @return     none.
1331    */
1332 
1333   void arm_biquad_cascade_df1_q31(
1334   const arm_biquad_casd_df1_inst_q31 * S,
1335   q31_t * pSrc,
1336   q31_t * pDst,
1337   uint32_t blockSize);
1338 
1339   /**
1340    * @brief Fast but less precise processing function for the Q31 Biquad cascade filter for Cortex-M3 and Cortex-M4.
1341    * @param[in]  *S         points to an instance of the Q31 Biquad cascade structure.
1342    * @param[in]  *pSrc      points to the block of input data.
1343    * @param[out] *pDst      points to the block of output data.
1344    * @param[in]  blockSize  number of samples to process.
1345    * @return     none.
1346    */
1347 
1348   void arm_biquad_cascade_df1_fast_q31(
1349   const arm_biquad_casd_df1_inst_q31 * S,
1350   q31_t * pSrc,
1351   q31_t * pDst,
1352   uint32_t blockSize);
1353 
1354   /**
1355    * @brief  Initialization function for the Q31 Biquad cascade filter.
1356    * @param[in,out] *S           points to an instance of the Q31 Biquad cascade structure.
1357    * @param[in]     numStages      number of 2nd order stages in the filter.
1358    * @param[in]     *pCoeffs     points to the filter coefficients.
1359    * @param[in]     *pState      points to the state buffer.
1360    * @param[in]     postShift    Shift to be applied to the output. Varies according to the coefficients format
1361    * @return        none
1362    */
1363 
1364   void arm_biquad_cascade_df1_init_q31(
1365   arm_biquad_casd_df1_inst_q31 * S,
1366   uint8_t numStages,
1367   q31_t * pCoeffs,
1368   q31_t * pState,
1369   int8_t postShift);
1370 
1371   /**
1372    * @brief Processing function for the floating-point Biquad cascade filter.
1373    * @param[in]  *S         points to an instance of the floating-point Biquad cascade structure.
1374    * @param[in]  *pSrc      points to the block of input data.
1375    * @param[out] *pDst      points to the block of output data.
1376    * @param[in]  blockSize  number of samples to process.
1377    * @return     none.
1378    */
1379 
1380   void arm_biquad_cascade_df1_f32(
1381   const arm_biquad_casd_df1_inst_f32 * S,
1382   float32_t * pSrc,
1383   float32_t * pDst,
1384   uint32_t blockSize);
1385 
1386   /**
1387    * @brief  Initialization function for the floating-point Biquad cascade filter.
1388    * @param[in,out] *S           points to an instance of the floating-point Biquad cascade structure.
1389    * @param[in]     numStages    number of 2nd order stages in the filter.
1390    * @param[in]     *pCoeffs     points to the filter coefficients.
1391    * @param[in]     *pState      points to the state buffer.
1392    * @return        none
1393    */
1394 
1395   void arm_biquad_cascade_df1_init_f32(
1396   arm_biquad_casd_df1_inst_f32 * S,
1397   uint8_t numStages,
1398   float32_t * pCoeffs,
1399   float32_t * pState);
1400 
1401 
1402   /**
1403    * @brief Instance structure for the floating-point matrix structure.
1404    */
1405 
1406   typedef struct
1407   {
1408     uint16_t numRows;     /**< number of rows of the matrix.     */
1409     uint16_t numCols;     /**< number of columns of the matrix.  */
1410     float32_t *pData;     /**< points to the data of the matrix. */
1411   } arm_matrix_instance_f32;
1412 
1413   /**
1414    * @brief Instance structure for the Q15 matrix structure.
1415    */
1416 
1417   typedef struct
1418   {
1419     uint16_t numRows;     /**< number of rows of the matrix.     */
1420     uint16_t numCols;     /**< number of columns of the matrix.  */
1421     q15_t *pData;         /**< points to the data of the matrix. */
1422 
1423   } arm_matrix_instance_q15;
1424 
1425   /**
1426    * @brief Instance structure for the Q31 matrix structure.
1427    */
1428 
1429   typedef struct
1430   {
1431     uint16_t numRows;     /**< number of rows of the matrix.     */
1432     uint16_t numCols;     /**< number of columns of the matrix.  */
1433     q31_t *pData;         /**< points to the data of the matrix. */
1434 
1435   } arm_matrix_instance_q31;
1436 
1437 
1438 
1439   /**
1440    * @brief Floating-point matrix addition.
1441    * @param[in]       *pSrcA points to the first input matrix structure
1442    * @param[in]       *pSrcB points to the second input matrix structure
1443    * @param[out]      *pDst points to output matrix structure
1444    * @return     The function returns either
1445    * <code>ARM_MATH_SIZE_MISMATCH</code> or <code>ARM_MATH_SUCCESS</code> based on the outcome of size checking.
1446    */
1447 
1448   arm_status arm_mat_add_f32(
1449   const arm_matrix_instance_f32 * pSrcA,
1450   const arm_matrix_instance_f32 * pSrcB,
1451   arm_matrix_instance_f32 * pDst);
1452 
1453   /**
1454    * @brief Q15 matrix addition.
1455    * @param[in]       *pSrcA points to the first input matrix structure
1456    * @param[in]       *pSrcB points to the second input matrix structure
1457    * @param[out]      *pDst points to output matrix structure
1458    * @return     The function returns either
1459    * <code>ARM_MATH_SIZE_MISMATCH</code> or <code>ARM_MATH_SUCCESS</code> based on the outcome of size checking.
1460    */
1461 
1462   arm_status arm_mat_add_q15(
1463   const arm_matrix_instance_q15 * pSrcA,
1464   const arm_matrix_instance_q15 * pSrcB,
1465   arm_matrix_instance_q15 * pDst);
1466 
1467   /**
1468    * @brief Q31 matrix addition.
1469    * @param[in]       *pSrcA points to the first input matrix structure
1470    * @param[in]       *pSrcB points to the second input matrix structure
1471    * @param[out]      *pDst points to output matrix structure
1472    * @return     The function returns either
1473    * <code>ARM_MATH_SIZE_MISMATCH</code> or <code>ARM_MATH_SUCCESS</code> based on the outcome of size checking.
1474    */
1475 
1476   arm_status arm_mat_add_q31(
1477   const arm_matrix_instance_q31 * pSrcA,
1478   const arm_matrix_instance_q31 * pSrcB,
1479   arm_matrix_instance_q31 * pDst);
1480 
1481 
1482   /**
1483    * @brief Floating-point matrix transpose.
1484    * @param[in]  *pSrc points to the input matrix
1485    * @param[out] *pDst points to the output matrix
1486    * @return 	The function returns either  <code>ARM_MATH_SIZE_MISMATCH</code>
1487    * or <code>ARM_MATH_SUCCESS</code> based on the outcome of size checking.
1488    */
1489 
1490   arm_status arm_mat_trans_f32(
1491   const arm_matrix_instance_f32 * pSrc,
1492   arm_matrix_instance_f32 * pDst);
1493 
1494 
1495   /**
1496    * @brief Q15 matrix transpose.
1497    * @param[in]  *pSrc points to the input matrix
1498    * @param[out] *pDst points to the output matrix
1499    * @return 	The function returns either  <code>ARM_MATH_SIZE_MISMATCH</code>
1500    * or <code>ARM_MATH_SUCCESS</code> based on the outcome of size checking.
1501    */
1502 
1503   arm_status arm_mat_trans_q15(
1504   const arm_matrix_instance_q15 * pSrc,
1505   arm_matrix_instance_q15 * pDst);
1506 
1507   /**
1508    * @brief Q31 matrix transpose.
1509    * @param[in]  *pSrc points to the input matrix
1510    * @param[out] *pDst points to the output matrix
1511    * @return 	The function returns either  <code>ARM_MATH_SIZE_MISMATCH</code>
1512    * or <code>ARM_MATH_SUCCESS</code> based on the outcome of size checking.
1513    */
1514 
1515   arm_status arm_mat_trans_q31(
1516   const arm_matrix_instance_q31 * pSrc,
1517   arm_matrix_instance_q31 * pDst);
1518 
1519 
1520   /**
1521    * @brief Floating-point matrix multiplication
1522    * @param[in]       *pSrcA points to the first input matrix structure
1523    * @param[in]       *pSrcB points to the second input matrix structure
1524    * @param[out]      *pDst points to output matrix structure
1525    * @return     The function returns either
1526    * <code>ARM_MATH_SIZE_MISMATCH</code> or <code>ARM_MATH_SUCCESS</code> based on the outcome of size checking.
1527    */
1528 
1529   arm_status arm_mat_mult_f32(
1530   const arm_matrix_instance_f32 * pSrcA,
1531   const arm_matrix_instance_f32 * pSrcB,
1532   arm_matrix_instance_f32 * pDst);
1533 
1534   /**
1535    * @brief Q15 matrix multiplication
1536    * @param[in]       *pSrcA points to the first input matrix structure
1537    * @param[in]       *pSrcB points to the second input matrix structure
1538    * @param[out]      *pDst points to output matrix structure
1539    * @param[in]		  *pState points to the array for storing intermediate results
1540    * @return     The function returns either
1541    * <code>ARM_MATH_SIZE_MISMATCH</code> or <code>ARM_MATH_SUCCESS</code> based on the outcome of size checking.
1542    */
1543 
1544   arm_status arm_mat_mult_q15(
1545   const arm_matrix_instance_q15 * pSrcA,
1546   const arm_matrix_instance_q15 * pSrcB,
1547   arm_matrix_instance_q15 * pDst,
1548   q15_t * pState);
1549 
1550   /**
1551    * @brief Q15 matrix multiplication (fast variant) for Cortex-M3 and Cortex-M4
1552    * @param[in]       *pSrcA  points to the first input matrix structure
1553    * @param[in]       *pSrcB  points to the second input matrix structure
1554    * @param[out]      *pDst   points to output matrix structure
1555    * @param[in]		  *pState points to the array for storing intermediate results
1556    * @return     The function returns either
1557    * <code>ARM_MATH_SIZE_MISMATCH</code> or <code>ARM_MATH_SUCCESS</code> based on the outcome of size checking.
1558    */
1559 
1560   arm_status arm_mat_mult_fast_q15(
1561   const arm_matrix_instance_q15 * pSrcA,
1562   const arm_matrix_instance_q15 * pSrcB,
1563   arm_matrix_instance_q15 * pDst,
1564   q15_t * pState);
1565 
1566   /**
1567    * @brief Q31 matrix multiplication
1568    * @param[in]       *pSrcA points to the first input matrix structure
1569    * @param[in]       *pSrcB points to the second input matrix structure
1570    * @param[out]      *pDst points to output matrix structure
1571    * @return     The function returns either
1572    * <code>ARM_MATH_SIZE_MISMATCH</code> or <code>ARM_MATH_SUCCESS</code> based on the outcome of size checking.
1573    */
1574 
1575   arm_status arm_mat_mult_q31(
1576   const arm_matrix_instance_q31 * pSrcA,
1577   const arm_matrix_instance_q31 * pSrcB,
1578   arm_matrix_instance_q31 * pDst);
1579 
1580   /**
1581    * @brief Q31 matrix multiplication (fast variant) for Cortex-M3 and Cortex-M4
1582    * @param[in]       *pSrcA points to the first input matrix structure
1583    * @param[in]       *pSrcB points to the second input matrix structure
1584    * @param[out]      *pDst points to output matrix structure
1585    * @return     The function returns either
1586    * <code>ARM_MATH_SIZE_MISMATCH</code> or <code>ARM_MATH_SUCCESS</code> based on the outcome of size checking.
1587    */
1588 
1589   arm_status arm_mat_mult_fast_q31(
1590   const arm_matrix_instance_q31 * pSrcA,
1591   const arm_matrix_instance_q31 * pSrcB,
1592   arm_matrix_instance_q31 * pDst);
1593 
1594 
1595   /**
1596    * @brief Floating-point matrix subtraction
1597    * @param[in]       *pSrcA points to the first input matrix structure
1598    * @param[in]       *pSrcB points to the second input matrix structure
1599    * @param[out]      *pDst points to output matrix structure
1600    * @return     The function returns either
1601    * <code>ARM_MATH_SIZE_MISMATCH</code> or <code>ARM_MATH_SUCCESS</code> based on the outcome of size checking.
1602    */
1603 
1604   arm_status arm_mat_sub_f32(
1605   const arm_matrix_instance_f32 * pSrcA,
1606   const arm_matrix_instance_f32 * pSrcB,
1607   arm_matrix_instance_f32 * pDst);
1608 
1609   /**
1610    * @brief Q15 matrix subtraction
1611    * @param[in]       *pSrcA points to the first input matrix structure
1612    * @param[in]       *pSrcB points to the second input matrix structure
1613    * @param[out]      *pDst points to output matrix structure
1614    * @return     The function returns either
1615    * <code>ARM_MATH_SIZE_MISMATCH</code> or <code>ARM_MATH_SUCCESS</code> based on the outcome of size checking.
1616    */
1617 
1618   arm_status arm_mat_sub_q15(
1619   const arm_matrix_instance_q15 * pSrcA,
1620   const arm_matrix_instance_q15 * pSrcB,
1621   arm_matrix_instance_q15 * pDst);
1622 
1623   /**
1624    * @brief Q31 matrix subtraction
1625    * @param[in]       *pSrcA points to the first input matrix structure
1626    * @param[in]       *pSrcB points to the second input matrix structure
1627    * @param[out]      *pDst points to output matrix structure
1628    * @return     The function returns either
1629    * <code>ARM_MATH_SIZE_MISMATCH</code> or <code>ARM_MATH_SUCCESS</code> based on the outcome of size checking.
1630    */
1631 
1632   arm_status arm_mat_sub_q31(
1633   const arm_matrix_instance_q31 * pSrcA,
1634   const arm_matrix_instance_q31 * pSrcB,
1635   arm_matrix_instance_q31 * pDst);
1636 
1637   /**
1638    * @brief Floating-point matrix scaling.
1639    * @param[in]  *pSrc points to the input matrix
1640    * @param[in]  scale scale factor
1641    * @param[out] *pDst points to the output matrix
1642    * @return     The function returns either
1643    * <code>ARM_MATH_SIZE_MISMATCH</code> or <code>ARM_MATH_SUCCESS</code> based on the outcome of size checking.
1644    */
1645 
1646   arm_status arm_mat_scale_f32(
1647   const arm_matrix_instance_f32 * pSrc,
1648   float32_t scale,
1649   arm_matrix_instance_f32 * pDst);
1650 
1651   /**
1652    * @brief Q15 matrix scaling.
1653    * @param[in]       *pSrc points to input matrix
1654    * @param[in]       scaleFract fractional portion of the scale factor
1655    * @param[in]       shift number of bits to shift the result by
1656    * @param[out]      *pDst points to output matrix
1657    * @return     The function returns either
1658    * <code>ARM_MATH_SIZE_MISMATCH</code> or <code>ARM_MATH_SUCCESS</code> based on the outcome of size checking.
1659    */
1660 
1661   arm_status arm_mat_scale_q15(
1662   const arm_matrix_instance_q15 * pSrc,
1663   q15_t scaleFract,
1664   int32_t shift,
1665   arm_matrix_instance_q15 * pDst);
1666 
1667   /**
1668    * @brief Q31 matrix scaling.
1669    * @param[in]       *pSrc points to input matrix
1670    * @param[in]       scaleFract fractional portion of the scale factor
1671    * @param[in]       shift number of bits to shift the result by
1672    * @param[out]      *pDst points to output matrix structure
1673    * @return     The function returns either
1674    * <code>ARM_MATH_SIZE_MISMATCH</code> or <code>ARM_MATH_SUCCESS</code> based on the outcome of size checking.
1675    */
1676 
1677   arm_status arm_mat_scale_q31(
1678   const arm_matrix_instance_q31 * pSrc,
1679   q31_t scaleFract,
1680   int32_t shift,
1681   arm_matrix_instance_q31 * pDst);
1682 
1683 
1684   /**
1685    * @brief  Q31 matrix initialization.
1686    * @param[in,out] *S             points to an instance of the floating-point matrix structure.
1687    * @param[in]     nRows          number of rows in the matrix.
1688    * @param[in]     nColumns       number of columns in the matrix.
1689    * @param[in]     *pData	       points to the matrix data array.
1690    * @return        none
1691    */
1692 
1693   void arm_mat_init_q31(
1694   arm_matrix_instance_q31 * S,
1695   uint16_t nRows,
1696   uint16_t nColumns,
1697   q31_t * pData);
1698 
1699   /**
1700    * @brief  Q15 matrix initialization.
1701    * @param[in,out] *S             points to an instance of the floating-point matrix structure.
1702    * @param[in]     nRows          number of rows in the matrix.
1703    * @param[in]     nColumns       number of columns in the matrix.
1704    * @param[in]     *pData	       points to the matrix data array.
1705    * @return        none
1706    */
1707 
1708   void arm_mat_init_q15(
1709   arm_matrix_instance_q15 * S,
1710   uint16_t nRows,
1711   uint16_t nColumns,
1712   q15_t * pData);
1713 
1714   /**
1715    * @brief  Floating-point matrix initialization.
1716    * @param[in,out] *S             points to an instance of the floating-point matrix structure.
1717    * @param[in]     nRows          number of rows in the matrix.
1718    * @param[in]     nColumns       number of columns in the matrix.
1719    * @param[in]     *pData	       points to the matrix data array.
1720    * @return        none
1721    */
1722 
1723   void arm_mat_init_f32(
1724   arm_matrix_instance_f32 * S,
1725   uint16_t nRows,
1726   uint16_t nColumns,
1727   float32_t * pData);
1728 
1729 
1730 
1731   /**
1732    * @brief Instance structure for the Q15 PID Control.
1733    */
1734   typedef struct
1735   {
1736     q15_t A0;    /**< The derived gain, A0 = Kp + Ki + Kd . */
1737 #ifdef ARM_MATH_CM0_FAMILY
1738     q15_t A1;
1739     q15_t A2;
1740 #else
1741     q31_t A1;           /**< The derived gain A1 = -Kp - 2Kd | Kd.*/
1742 #endif
1743     q15_t state[3];       /**< The state array of length 3. */
1744     q15_t Kp;           /**< The proportional gain. */
1745     q15_t Ki;           /**< The integral gain. */
1746     q15_t Kd;           /**< The derivative gain. */
1747   } arm_pid_instance_q15;
1748 
1749   /**
1750    * @brief Instance structure for the Q31 PID Control.
1751    */
1752   typedef struct
1753   {
1754     q31_t A0;            /**< The derived gain, A0 = Kp + Ki + Kd . */
1755     q31_t A1;            /**< The derived gain, A1 = -Kp - 2Kd. */
1756     q31_t A2;            /**< The derived gain, A2 = Kd . */
1757     q31_t state[3];      /**< The state array of length 3. */
1758     q31_t Kp;            /**< The proportional gain. */
1759     q31_t Ki;            /**< The integral gain. */
1760     q31_t Kd;            /**< The derivative gain. */
1761 
1762   } arm_pid_instance_q31;
1763 
1764   /**
1765    * @brief Instance structure for the floating-point PID Control.
1766    */
1767   typedef struct
1768   {
1769     float32_t A0;          /**< The derived gain, A0 = Kp + Ki + Kd . */
1770     float32_t A1;          /**< The derived gain, A1 = -Kp - 2Kd. */
1771     float32_t A2;          /**< The derived gain, A2 = Kd . */
1772     float32_t state[3];    /**< The state array of length 3. */
1773     float32_t Kp;               /**< The proportional gain. */
1774     float32_t Ki;               /**< The integral gain. */
1775     float32_t Kd;               /**< The derivative gain. */
1776   } arm_pid_instance_f32;
1777 
1778 
1779 
1780   /**
1781    * @brief  Initialization function for the floating-point PID Control.
1782    * @param[in,out] *S      points to an instance of the PID structure.
1783    * @param[in]     resetStateFlag  flag to reset the state. 0 = no change in state 1 = reset the state.
1784    * @return none.
1785    */
1786   void arm_pid_init_f32(
1787   arm_pid_instance_f32 * S,
1788   int32_t resetStateFlag);
1789 
1790   /**
1791    * @brief  Reset function for the floating-point PID Control.
1792    * @param[in,out] *S is an instance of the floating-point PID Control structure
1793    * @return none
1794    */
1795   void arm_pid_reset_f32(
1796   arm_pid_instance_f32 * S);
1797 
1798 
1799   /**
1800    * @brief  Initialization function for the Q31 PID Control.
1801    * @param[in,out] *S points to an instance of the Q15 PID structure.
1802    * @param[in]     resetStateFlag  flag to reset the state. 0 = no change in state 1 = reset the state.
1803    * @return none.
1804    */
1805   void arm_pid_init_q31(
1806   arm_pid_instance_q31 * S,
1807   int32_t resetStateFlag);
1808 
1809 
1810   /**
1811    * @brief  Reset function for the Q31 PID Control.
1812    * @param[in,out] *S points to an instance of the Q31 PID Control structure
1813    * @return none
1814    */
1815 
1816   void arm_pid_reset_q31(
1817   arm_pid_instance_q31 * S);
1818 
1819   /**
1820    * @brief  Initialization function for the Q15 PID Control.
1821    * @param[in,out] *S points to an instance of the Q15 PID structure.
1822    * @param[in] resetStateFlag  flag to reset the state. 0 = no change in state 1 = reset the state.
1823    * @return none.
1824    */
1825   void arm_pid_init_q15(
1826   arm_pid_instance_q15 * S,
1827   int32_t resetStateFlag);
1828 
1829   /**
1830    * @brief  Reset function for the Q15 PID Control.
1831    * @param[in,out] *S points to an instance of the q15 PID Control structure
1832    * @return none
1833    */
1834   void arm_pid_reset_q15(
1835   arm_pid_instance_q15 * S);
1836 
1837 
1838   /**
1839    * @brief Instance structure for the floating-point Linear Interpolate function.
1840    */
1841   typedef struct
1842   {
1843     uint32_t nValues;           /**< nValues */
1844     float32_t x1;               /**< x1 */
1845     float32_t xSpacing;         /**< xSpacing */
1846     float32_t *pYData;          /**< pointer to the table of Y values */
1847   } arm_linear_interp_instance_f32;
1848 
1849   /**
1850    * @brief Instance structure for the floating-point bilinear interpolation function.
1851    */
1852 
1853   typedef struct
1854   {
1855     uint16_t numRows;   /**< number of rows in the data table. */
1856     uint16_t numCols;   /**< number of columns in the data table. */
1857     float32_t *pData;   /**< points to the data table. */
1858   } arm_bilinear_interp_instance_f32;
1859 
1860    /**
1861    * @brief Instance structure for the Q31 bilinear interpolation function.
1862    */
1863 
1864   typedef struct
1865   {
1866     uint16_t numRows;   /**< number of rows in the data table. */
1867     uint16_t numCols;   /**< number of columns in the data table. */
1868     q31_t *pData;       /**< points to the data table. */
1869   } arm_bilinear_interp_instance_q31;
1870 
1871    /**
1872    * @brief Instance structure for the Q15 bilinear interpolation function.
1873    */
1874 
1875   typedef struct
1876   {
1877     uint16_t numRows;   /**< number of rows in the data table. */
1878     uint16_t numCols;   /**< number of columns in the data table. */
1879     q15_t *pData;       /**< points to the data table. */
1880   } arm_bilinear_interp_instance_q15;
1881 
1882    /**
1883    * @brief Instance structure for the Q15 bilinear interpolation function.
1884    */
1885 
1886   typedef struct
1887   {
1888     uint16_t numRows;   /**< number of rows in the data table. */
1889     uint16_t numCols;   /**< number of columns in the data table. */
1890     q7_t *pData;                /**< points to the data table. */
1891   } arm_bilinear_interp_instance_q7;
1892 
1893 
1894   /**
1895    * @brief Q7 vector multiplication.
1896    * @param[in]       *pSrcA points to the first input vector
1897    * @param[in]       *pSrcB points to the second input vector
1898    * @param[out]      *pDst  points to the output vector
1899    * @param[in]       blockSize number of samples in each vector
1900    * @return none.
1901    */
1902 
1903   void arm_mult_q7(
1904   q7_t * pSrcA,
1905   q7_t * pSrcB,
1906   q7_t * pDst,
1907   uint32_t blockSize);
1908 
1909   /**
1910    * @brief Q15 vector multiplication.
1911    * @param[in]       *pSrcA points to the first input vector
1912    * @param[in]       *pSrcB points to the second input vector
1913    * @param[out]      *pDst  points to the output vector
1914    * @param[in]       blockSize number of samples in each vector
1915    * @return none.
1916    */
1917 
1918   void arm_mult_q15(
1919   q15_t * pSrcA,
1920   q15_t * pSrcB,
1921   q15_t * pDst,
1922   uint32_t blockSize);
1923 
1924   /**
1925    * @brief Q31 vector multiplication.
1926    * @param[in]       *pSrcA points to the first input vector
1927    * @param[in]       *pSrcB points to the second input vector
1928    * @param[out]      *pDst points to the output vector
1929    * @param[in]       blockSize number of samples in each vector
1930    * @return none.
1931    */
1932 
1933   void arm_mult_q31(
1934   q31_t * pSrcA,
1935   q31_t * pSrcB,
1936   q31_t * pDst,
1937   uint32_t blockSize);
1938 
1939   /**
1940    * @brief Floating-point vector multiplication.
1941    * @param[in]       *pSrcA points to the first input vector
1942    * @param[in]       *pSrcB points to the second input vector
1943    * @param[out]      *pDst points to the output vector
1944    * @param[in]       blockSize number of samples in each vector
1945    * @return none.
1946    */
1947 
1948   void arm_mult_f32(
1949   float32_t * pSrcA,
1950   float32_t * pSrcB,
1951   float32_t * pDst,
1952   uint32_t blockSize);
1953 
1954 
1955 
1956 
1957 
1958 
1959   /**
1960    * @brief Instance structure for the Q15 CFFT/CIFFT function.
1961    */
1962 
1963   typedef struct
1964   {
1965     uint16_t fftLen;                 /**< length of the FFT. */
1966     uint8_t ifftFlag;                /**< flag that selects forward (ifftFlag=0) or inverse (ifftFlag=1) transform. */
1967     uint8_t bitReverseFlag;          /**< flag that enables (bitReverseFlag=1) or disables (bitReverseFlag=0) bit reversal of output. */
1968     q15_t *pTwiddle;                     /**< points to the Sin twiddle factor table. */
1969     uint16_t *pBitRevTable;          /**< points to the bit reversal table. */
1970     uint16_t twidCoefModifier;       /**< twiddle coefficient modifier that supports different size FFTs with the same twiddle factor table. */
1971     uint16_t bitRevFactor;           /**< bit reversal modifier that supports different size FFTs with the same bit reversal table. */
1972   } arm_cfft_radix2_instance_q15;
1973 
1974   arm_status arm_cfft_radix2_init_q15(
1975   arm_cfft_radix2_instance_q15 * S,
1976   uint16_t fftLen,
1977   uint8_t ifftFlag,
1978   uint8_t bitReverseFlag);
1979 
1980   void arm_cfft_radix2_q15(
1981   const arm_cfft_radix2_instance_q15 * S,
1982   q15_t * pSrc);
1983 
1984 
1985 
1986   /**
1987    * @brief Instance structure for the Q15 CFFT/CIFFT function.
1988    */
1989 
1990   typedef struct
1991   {
1992     uint16_t fftLen;                 /**< length of the FFT. */
1993     uint8_t ifftFlag;                /**< flag that selects forward (ifftFlag=0) or inverse (ifftFlag=1) transform. */
1994     uint8_t bitReverseFlag;          /**< flag that enables (bitReverseFlag=1) or disables (bitReverseFlag=0) bit reversal of output. */
1995     q15_t *pTwiddle;                 /**< points to the twiddle factor table. */
1996     uint16_t *pBitRevTable;          /**< points to the bit reversal table. */
1997     uint16_t twidCoefModifier;       /**< twiddle coefficient modifier that supports different size FFTs with the same twiddle factor table. */
1998     uint16_t bitRevFactor;           /**< bit reversal modifier that supports different size FFTs with the same bit reversal table. */
1999   } arm_cfft_radix4_instance_q15;
2000 
2001   arm_status arm_cfft_radix4_init_q15(
2002   arm_cfft_radix4_instance_q15 * S,
2003   uint16_t fftLen,
2004   uint8_t ifftFlag,
2005   uint8_t bitReverseFlag);
2006 
2007   void arm_cfft_radix4_q15(
2008   const arm_cfft_radix4_instance_q15 * S,
2009   q15_t * pSrc);
2010 
2011   /**
2012    * @brief Instance structure for the Radix-2 Q31 CFFT/CIFFT function.
2013    */
2014 
2015   typedef struct
2016   {
2017     uint16_t fftLen;                 /**< length of the FFT. */
2018     uint8_t ifftFlag;                /**< flag that selects forward (ifftFlag=0) or inverse (ifftFlag=1) transform. */
2019     uint8_t bitReverseFlag;          /**< flag that enables (bitReverseFlag=1) or disables (bitReverseFlag=0) bit reversal of output. */
2020     q31_t *pTwiddle;                     /**< points to the Twiddle factor table. */
2021     uint16_t *pBitRevTable;          /**< points to the bit reversal table. */
2022     uint16_t twidCoefModifier;       /**< twiddle coefficient modifier that supports different size FFTs with the same twiddle factor table. */
2023     uint16_t bitRevFactor;           /**< bit reversal modifier that supports different size FFTs with the same bit reversal table. */
2024   } arm_cfft_radix2_instance_q31;
2025 
2026   arm_status arm_cfft_radix2_init_q31(
2027   arm_cfft_radix2_instance_q31 * S,
2028   uint16_t fftLen,
2029   uint8_t ifftFlag,
2030   uint8_t bitReverseFlag);
2031 
2032   void arm_cfft_radix2_q31(
2033   const arm_cfft_radix2_instance_q31 * S,
2034   q31_t * pSrc);
2035 
2036   /**
2037    * @brief Instance structure for the Q31 CFFT/CIFFT function.
2038    */
2039 
2040   typedef struct
2041   {
2042     uint16_t fftLen;                 /**< length of the FFT. */
2043     uint8_t ifftFlag;                /**< flag that selects forward (ifftFlag=0) or inverse (ifftFlag=1) transform. */
2044     uint8_t bitReverseFlag;          /**< flag that enables (bitReverseFlag=1) or disables (bitReverseFlag=0) bit reversal of output. */
2045     q31_t *pTwiddle;                 /**< points to the twiddle factor table. */
2046     uint16_t *pBitRevTable;          /**< points to the bit reversal table. */
2047     uint16_t twidCoefModifier;       /**< twiddle coefficient modifier that supports different size FFTs with the same twiddle factor table. */
2048     uint16_t bitRevFactor;           /**< bit reversal modifier that supports different size FFTs with the same bit reversal table. */
2049   } arm_cfft_radix4_instance_q31;
2050 
2051 
2052   void arm_cfft_radix4_q31(
2053   const arm_cfft_radix4_instance_q31 * S,
2054   q31_t * pSrc);
2055 
2056   arm_status arm_cfft_radix4_init_q31(
2057   arm_cfft_radix4_instance_q31 * S,
2058   uint16_t fftLen,
2059   uint8_t ifftFlag,
2060   uint8_t bitReverseFlag);
2061 
2062   /**
2063    * @brief Instance structure for the floating-point CFFT/CIFFT function.
2064    */
2065 
2066   typedef struct
2067   {
2068     uint16_t fftLen;                   /**< length of the FFT. */
2069     uint8_t ifftFlag;                  /**< flag that selects forward (ifftFlag=0) or inverse (ifftFlag=1) transform. */
2070     uint8_t bitReverseFlag;            /**< flag that enables (bitReverseFlag=1) or disables (bitReverseFlag=0) bit reversal of output. */
2071     float32_t *pTwiddle;               /**< points to the Twiddle factor table. */
2072     uint16_t *pBitRevTable;            /**< points to the bit reversal table. */
2073     uint16_t twidCoefModifier;         /**< twiddle coefficient modifier that supports different size FFTs with the same twiddle factor table. */
2074     uint16_t bitRevFactor;             /**< bit reversal modifier that supports different size FFTs with the same bit reversal table. */
2075     float32_t onebyfftLen;                 /**< value of 1/fftLen. */
2076   } arm_cfft_radix2_instance_f32;
2077 
2078 /* Deprecated */
2079   arm_status arm_cfft_radix2_init_f32(
2080   arm_cfft_radix2_instance_f32 * S,
2081   uint16_t fftLen,
2082   uint8_t ifftFlag,
2083   uint8_t bitReverseFlag);
2084 
2085 /* Deprecated */
2086   void arm_cfft_radix2_f32(
2087   const arm_cfft_radix2_instance_f32 * S,
2088   float32_t * pSrc);
2089 
2090   /**
2091    * @brief Instance structure for the floating-point CFFT/CIFFT function.
2092    */
2093 
2094   typedef struct
2095   {
2096     uint16_t fftLen;                   /**< length of the FFT. */
2097     uint8_t ifftFlag;                  /**< flag that selects forward (ifftFlag=0) or inverse (ifftFlag=1) transform. */
2098     uint8_t bitReverseFlag;            /**< flag that enables (bitReverseFlag=1) or disables (bitReverseFlag=0) bit reversal of output. */
2099     float32_t *pTwiddle;               /**< points to the Twiddle factor table. */
2100     uint16_t *pBitRevTable;            /**< points to the bit reversal table. */
2101     uint16_t twidCoefModifier;         /**< twiddle coefficient modifier that supports different size FFTs with the same twiddle factor table. */
2102     uint16_t bitRevFactor;             /**< bit reversal modifier that supports different size FFTs with the same bit reversal table. */
2103     float32_t onebyfftLen;                 /**< value of 1/fftLen. */
2104   } arm_cfft_radix4_instance_f32;
2105 
2106 /* Deprecated */
2107   arm_status arm_cfft_radix4_init_f32(
2108   arm_cfft_radix4_instance_f32 * S,
2109   uint16_t fftLen,
2110   uint8_t ifftFlag,
2111   uint8_t bitReverseFlag);
2112 
2113 /* Deprecated */
2114   void arm_cfft_radix4_f32(
2115   const arm_cfft_radix4_instance_f32 * S,
2116   float32_t * pSrc);
2117 
2118   /**
2119    * @brief Instance structure for the floating-point CFFT/CIFFT function.
2120    */
2121 
2122   typedef struct
2123   {
2124     uint16_t fftLen;                   /**< length of the FFT. */
2125     const float32_t *pTwiddle;         /**< points to the Twiddle factor table. */
2126     const uint16_t *pBitRevTable;      /**< points to the bit reversal table. */
2127     uint16_t bitRevLength;             /**< bit reversal table length. */
2128   } arm_cfft_instance_f32;
2129 
2130   void arm_cfft_f32(
2131   const arm_cfft_instance_f32 * S,
2132   float32_t * p1,
2133   uint8_t ifftFlag,
2134   uint8_t bitReverseFlag);
2135 
2136   /**
2137    * @brief Instance structure for the Q15 RFFT/RIFFT function.
2138    */
2139 
2140   typedef struct
2141   {
2142     uint32_t fftLenReal;                      /**< length of the real FFT. */
2143     uint32_t fftLenBy2;                       /**< length of the complex FFT. */
2144     uint8_t ifftFlagR;                        /**< flag that selects forward (ifftFlagR=0) or inverse (ifftFlagR=1) transform. */
2145     uint8_t bitReverseFlagR;                      /**< flag that enables (bitReverseFlagR=1) or disables (bitReverseFlagR=0) bit reversal of output. */
2146     uint32_t twidCoefRModifier;               /**< twiddle coefficient modifier that supports different size FFTs with the same twiddle factor table. */
2147     q15_t *pTwiddleAReal;                     /**< points to the real twiddle factor table. */
2148     q15_t *pTwiddleBReal;                     /**< points to the imag twiddle factor table. */
2149     arm_cfft_radix4_instance_q15 *pCfft;          /**< points to the complex FFT instance. */
2150   } arm_rfft_instance_q15;
2151 
2152   arm_status arm_rfft_init_q15(
2153   arm_rfft_instance_q15 * S,
2154   arm_cfft_radix4_instance_q15 * S_CFFT,
2155   uint32_t fftLenReal,
2156   uint32_t ifftFlagR,
2157   uint32_t bitReverseFlag);
2158 
2159   void arm_rfft_q15(
2160   const arm_rfft_instance_q15 * S,
2161   q15_t * pSrc,
2162   q15_t * pDst);
2163 
2164   /**
2165    * @brief Instance structure for the Q31 RFFT/RIFFT function.
2166    */
2167 
2168   typedef struct
2169   {
2170     uint32_t fftLenReal;                        /**< length of the real FFT. */
2171     uint32_t fftLenBy2;                         /**< length of the complex FFT. */
2172     uint8_t ifftFlagR;                          /**< flag that selects forward (ifftFlagR=0) or inverse (ifftFlagR=1) transform. */
2173     uint8_t bitReverseFlagR;                        /**< flag that enables (bitReverseFlagR=1) or disables (bitReverseFlagR=0) bit reversal of output. */
2174     uint32_t twidCoefRModifier;                 /**< twiddle coefficient modifier that supports different size FFTs with the same twiddle factor table. */
2175     q31_t *pTwiddleAReal;                       /**< points to the real twiddle factor table. */
2176     q31_t *pTwiddleBReal;                       /**< points to the imag twiddle factor table. */
2177     arm_cfft_radix4_instance_q31 *pCfft;        /**< points to the complex FFT instance. */
2178   } arm_rfft_instance_q31;
2179 
2180   arm_status arm_rfft_init_q31(
2181   arm_rfft_instance_q31 * S,
2182   arm_cfft_radix4_instance_q31 * S_CFFT,
2183   uint32_t fftLenReal,
2184   uint32_t ifftFlagR,
2185   uint32_t bitReverseFlag);
2186 
2187   void arm_rfft_q31(
2188   const arm_rfft_instance_q31 * S,
2189   q31_t * pSrc,
2190   q31_t * pDst);
2191 
2192   /**
2193    * @brief Instance structure for the floating-point RFFT/RIFFT function.
2194    */
2195 
2196   typedef struct
2197   {
2198     uint32_t fftLenReal;                        /**< length of the real FFT. */
2199     uint16_t fftLenBy2;                         /**< length of the complex FFT. */
2200     uint8_t ifftFlagR;                          /**< flag that selects forward (ifftFlagR=0) or inverse (ifftFlagR=1) transform. */
2201     uint8_t bitReverseFlagR;                    /**< flag that enables (bitReverseFlagR=1) or disables (bitReverseFlagR=0) bit reversal of output. */
2202     uint32_t twidCoefRModifier;                     /**< twiddle coefficient modifier that supports different size FFTs with the same twiddle factor table. */
2203     float32_t *pTwiddleAReal;                   /**< points to the real twiddle factor table. */
2204     float32_t *pTwiddleBReal;                   /**< points to the imag twiddle factor table. */
2205     arm_cfft_radix4_instance_f32 *pCfft;        /**< points to the complex FFT instance. */
2206   } arm_rfft_instance_f32;
2207 
2208   arm_status arm_rfft_init_f32(
2209   arm_rfft_instance_f32 * S,
2210   arm_cfft_radix4_instance_f32 * S_CFFT,
2211   uint32_t fftLenReal,
2212   uint32_t ifftFlagR,
2213   uint32_t bitReverseFlag);
2214 
2215   void arm_rfft_f32(
2216   const arm_rfft_instance_f32 * S,
2217   float32_t * pSrc,
2218   float32_t * pDst);
2219 
2220   /**
2221    * @brief Instance structure for the floating-point RFFT/RIFFT function.
2222    */
2223 
2224 typedef struct
2225   {
2226     arm_cfft_instance_f32 Sint;      /**< Internal CFFT structure. */
2227     uint16_t fftLenRFFT;                        /**< length of the real sequence */
2228 	float32_t * pTwiddleRFFT;					/**< Twiddle factors real stage  */
2229   } arm_rfft_fast_instance_f32 ;
2230 
2231 arm_status arm_rfft_fast_init_f32 (
2232 	arm_rfft_fast_instance_f32 * S,
2233 	uint16_t fftLen);
2234 
2235 void arm_rfft_fast_f32(
2236   arm_rfft_fast_instance_f32 * S,
2237   float32_t * p, float32_t * pOut,
2238   uint8_t ifftFlag);
2239 
2240   /**
2241    * @brief Instance structure for the floating-point DCT4/IDCT4 function.
2242    */
2243 
2244   typedef struct
2245   {
2246     uint16_t N;                         /**< length of the DCT4. */
2247     uint16_t Nby2;                      /**< half of the length of the DCT4. */
2248     float32_t normalize;                /**< normalizing factor. */
2249     float32_t *pTwiddle;                /**< points to the twiddle factor table. */
2250     float32_t *pCosFactor;              /**< points to the cosFactor table. */
2251     arm_rfft_instance_f32 *pRfft;        /**< points to the real FFT instance. */
2252     arm_cfft_radix4_instance_f32 *pCfft; /**< points to the complex FFT instance. */
2253   } arm_dct4_instance_f32;
2254 
2255   /**
2256    * @brief  Initialization function for the floating-point DCT4/IDCT4.
2257    * @param[in,out] *S         points to an instance of floating-point DCT4/IDCT4 structure.
2258    * @param[in]     *S_RFFT    points to an instance of floating-point RFFT/RIFFT structure.
2259    * @param[in]     *S_CFFT    points to an instance of floating-point CFFT/CIFFT structure.
2260    * @param[in]     N          length of the DCT4.
2261    * @param[in]     Nby2       half of the length of the DCT4.
2262    * @param[in]     normalize  normalizing factor.
2263    * @return		arm_status function returns ARM_MATH_SUCCESS if initialization is successful or ARM_MATH_ARGUMENT_ERROR if <code>fftLenReal</code> is not a supported transform length.
2264    */
2265 
2266   arm_status arm_dct4_init_f32(
2267   arm_dct4_instance_f32 * S,
2268   arm_rfft_instance_f32 * S_RFFT,
2269   arm_cfft_radix4_instance_f32 * S_CFFT,
2270   uint16_t N,
2271   uint16_t Nby2,
2272   float32_t normalize);
2273 
2274   /**
2275    * @brief Processing function for the floating-point DCT4/IDCT4.
2276    * @param[in]       *S             points to an instance of the floating-point DCT4/IDCT4 structure.
2277    * @param[in]       *pState        points to state buffer.
2278    * @param[in,out]   *pInlineBuffer points to the in-place input and output buffer.
2279    * @return none.
2280    */
2281 
2282   void arm_dct4_f32(
2283   const arm_dct4_instance_f32 * S,
2284   float32_t * pState,
2285   float32_t * pInlineBuffer);
2286 
2287   /**
2288    * @brief Instance structure for the Q31 DCT4/IDCT4 function.
2289    */
2290 
2291   typedef struct
2292   {
2293     uint16_t N;                         /**< length of the DCT4. */
2294     uint16_t Nby2;                      /**< half of the length of the DCT4. */
2295     q31_t normalize;                    /**< normalizing factor. */
2296     q31_t *pTwiddle;                    /**< points to the twiddle factor table. */
2297     q31_t *pCosFactor;                  /**< points to the cosFactor table. */
2298     arm_rfft_instance_q31 *pRfft;        /**< points to the real FFT instance. */
2299     arm_cfft_radix4_instance_q31 *pCfft; /**< points to the complex FFT instance. */
2300   } arm_dct4_instance_q31;
2301 
2302   /**
2303    * @brief  Initialization function for the Q31 DCT4/IDCT4.
2304    * @param[in,out] *S         points to an instance of Q31 DCT4/IDCT4 structure.
2305    * @param[in]     *S_RFFT    points to an instance of Q31 RFFT/RIFFT structure
2306    * @param[in]     *S_CFFT    points to an instance of Q31 CFFT/CIFFT structure
2307    * @param[in]     N          length of the DCT4.
2308    * @param[in]     Nby2       half of the length of the DCT4.
2309    * @param[in]     normalize  normalizing factor.
2310    * @return		arm_status function returns ARM_MATH_SUCCESS if initialization is successful or ARM_MATH_ARGUMENT_ERROR if <code>N</code> is not a supported transform length.
2311    */
2312 
2313   arm_status arm_dct4_init_q31(
2314   arm_dct4_instance_q31 * S,
2315   arm_rfft_instance_q31 * S_RFFT,
2316   arm_cfft_radix4_instance_q31 * S_CFFT,
2317   uint16_t N,
2318   uint16_t Nby2,
2319   q31_t normalize);
2320 
2321   /**
2322    * @brief Processing function for the Q31 DCT4/IDCT4.
2323    * @param[in]       *S             points to an instance of the Q31 DCT4 structure.
2324    * @param[in]       *pState        points to state buffer.
2325    * @param[in,out]   *pInlineBuffer points to the in-place input and output buffer.
2326    * @return none.
2327    */
2328 
2329   void arm_dct4_q31(
2330   const arm_dct4_instance_q31 * S,
2331   q31_t * pState,
2332   q31_t * pInlineBuffer);
2333 
2334   /**
2335    * @brief Instance structure for the Q15 DCT4/IDCT4 function.
2336    */
2337 
2338   typedef struct
2339   {
2340     uint16_t N;                         /**< length of the DCT4. */
2341     uint16_t Nby2;                      /**< half of the length of the DCT4. */
2342     q15_t normalize;                    /**< normalizing factor. */
2343     q15_t *pTwiddle;                    /**< points to the twiddle factor table. */
2344     q15_t *pCosFactor;                  /**< points to the cosFactor table. */
2345     arm_rfft_instance_q15 *pRfft;        /**< points to the real FFT instance. */
2346     arm_cfft_radix4_instance_q15 *pCfft; /**< points to the complex FFT instance. */
2347   } arm_dct4_instance_q15;
2348 
2349   /**
2350    * @brief  Initialization function for the Q15 DCT4/IDCT4.
2351    * @param[in,out] *S         points to an instance of Q15 DCT4/IDCT4 structure.
2352    * @param[in]     *S_RFFT    points to an instance of Q15 RFFT/RIFFT structure.
2353    * @param[in]     *S_CFFT    points to an instance of Q15 CFFT/CIFFT structure.
2354    * @param[in]     N          length of the DCT4.
2355    * @param[in]     Nby2       half of the length of the DCT4.
2356    * @param[in]     normalize  normalizing factor.
2357    * @return		arm_status function returns ARM_MATH_SUCCESS if initialization is successful or ARM_MATH_ARGUMENT_ERROR if <code>N</code> is not a supported transform length.
2358    */
2359 
2360   arm_status arm_dct4_init_q15(
2361   arm_dct4_instance_q15 * S,
2362   arm_rfft_instance_q15 * S_RFFT,
2363   arm_cfft_radix4_instance_q15 * S_CFFT,
2364   uint16_t N,
2365   uint16_t Nby2,
2366   q15_t normalize);
2367 
2368   /**
2369    * @brief Processing function for the Q15 DCT4/IDCT4.
2370    * @param[in]       *S             points to an instance of the Q15 DCT4 structure.
2371    * @param[in]       *pState        points to state buffer.
2372    * @param[in,out]   *pInlineBuffer points to the in-place input and output buffer.
2373    * @return none.
2374    */
2375 
2376   void arm_dct4_q15(
2377   const arm_dct4_instance_q15 * S,
2378   q15_t * pState,
2379   q15_t * pInlineBuffer);
2380 
2381   /**
2382    * @brief Floating-point vector addition.
2383    * @param[in]       *pSrcA points to the first input vector
2384    * @param[in]       *pSrcB points to the second input vector
2385    * @param[out]      *pDst points to the output vector
2386    * @param[in]       blockSize number of samples in each vector
2387    * @return none.
2388    */
2389 
2390   void arm_add_f32(
2391   float32_t * pSrcA,
2392   float32_t * pSrcB,
2393   float32_t * pDst,
2394   uint32_t blockSize);
2395 
2396   /**
2397    * @brief Q7 vector addition.
2398    * @param[in]       *pSrcA points to the first input vector
2399    * @param[in]       *pSrcB points to the second input vector
2400    * @param[out]      *pDst points to the output vector
2401    * @param[in]       blockSize number of samples in each vector
2402    * @return none.
2403    */
2404 
2405   void arm_add_q7(
2406   q7_t * pSrcA,
2407   q7_t * pSrcB,
2408   q7_t * pDst,
2409   uint32_t blockSize);
2410 
2411   /**
2412    * @brief Q15 vector addition.
2413    * @param[in]       *pSrcA points to the first input vector
2414    * @param[in]       *pSrcB points to the second input vector
2415    * @param[out]      *pDst points to the output vector
2416    * @param[in]       blockSize number of samples in each vector
2417    * @return none.
2418    */
2419 
2420   void arm_add_q15(
2421   q15_t * pSrcA,
2422   q15_t * pSrcB,
2423   q15_t * pDst,
2424   uint32_t blockSize);
2425 
2426   /**
2427    * @brief Q31 vector addition.
2428    * @param[in]       *pSrcA points to the first input vector
2429    * @param[in]       *pSrcB points to the second input vector
2430    * @param[out]      *pDst points to the output vector
2431    * @param[in]       blockSize number of samples in each vector
2432    * @return none.
2433    */
2434 
2435   void arm_add_q31(
2436   q31_t * pSrcA,
2437   q31_t * pSrcB,
2438   q31_t * pDst,
2439   uint32_t blockSize);
2440 
2441   /**
2442    * @brief Floating-point vector subtraction.
2443    * @param[in]       *pSrcA points to the first input vector
2444    * @param[in]       *pSrcB points to the second input vector
2445    * @param[out]      *pDst points to the output vector
2446    * @param[in]       blockSize number of samples in each vector
2447    * @return none.
2448    */
2449 
2450   void arm_sub_f32(
2451   float32_t * pSrcA,
2452   float32_t * pSrcB,
2453   float32_t * pDst,
2454   uint32_t blockSize);
2455 
2456   /**
2457    * @brief Q7 vector subtraction.
2458    * @param[in]       *pSrcA points to the first input vector
2459    * @param[in]       *pSrcB points to the second input vector
2460    * @param[out]      *pDst points to the output vector
2461    * @param[in]       blockSize number of samples in each vector
2462    * @return none.
2463    */
2464 
2465   void arm_sub_q7(
2466   q7_t * pSrcA,
2467   q7_t * pSrcB,
2468   q7_t * pDst,
2469   uint32_t blockSize);
2470 
2471   /**
2472    * @brief Q15 vector subtraction.
2473    * @param[in]       *pSrcA points to the first input vector
2474    * @param[in]       *pSrcB points to the second input vector
2475    * @param[out]      *pDst points to the output vector
2476    * @param[in]       blockSize number of samples in each vector
2477    * @return none.
2478    */
2479 
2480   void arm_sub_q15(
2481   q15_t * pSrcA,
2482   q15_t * pSrcB,
2483   q15_t * pDst,
2484   uint32_t blockSize);
2485 
2486   /**
2487    * @brief Q31 vector subtraction.
2488    * @param[in]       *pSrcA points to the first input vector
2489    * @param[in]       *pSrcB points to the second input vector
2490    * @param[out]      *pDst points to the output vector
2491    * @param[in]       blockSize number of samples in each vector
2492    * @return none.
2493    */
2494 
2495   void arm_sub_q31(
2496   q31_t * pSrcA,
2497   q31_t * pSrcB,
2498   q31_t * pDst,
2499   uint32_t blockSize);
2500 
2501   /**
2502    * @brief Multiplies a floating-point vector by a scalar.
2503    * @param[in]       *pSrc points to the input vector
2504    * @param[in]       scale scale factor to be applied
2505    * @param[out]      *pDst points to the output vector
2506    * @param[in]       blockSize number of samples in the vector
2507    * @return none.
2508    */
2509 
2510   void arm_scale_f32(
2511   float32_t * pSrc,
2512   float32_t scale,
2513   float32_t * pDst,
2514   uint32_t blockSize);
2515 
2516   /**
2517    * @brief Multiplies a Q7 vector by a scalar.
2518    * @param[in]       *pSrc points to the input vector
2519    * @param[in]       scaleFract fractional portion of the scale value
2520    * @param[in]       shift number of bits to shift the result by
2521    * @param[out]      *pDst points to the output vector
2522    * @param[in]       blockSize number of samples in the vector
2523    * @return none.
2524    */
2525 
2526   void arm_scale_q7(
2527   q7_t * pSrc,
2528   q7_t scaleFract,
2529   int8_t shift,
2530   q7_t * pDst,
2531   uint32_t blockSize);
2532 
2533   /**
2534    * @brief Multiplies a Q15 vector by a scalar.
2535    * @param[in]       *pSrc points to the input vector
2536    * @param[in]       scaleFract fractional portion of the scale value
2537    * @param[in]       shift number of bits to shift the result by
2538    * @param[out]      *pDst points to the output vector
2539    * @param[in]       blockSize number of samples in the vector
2540    * @return none.
2541    */
2542 
2543   void arm_scale_q15(
2544   q15_t * pSrc,
2545   q15_t scaleFract,
2546   int8_t shift,
2547   q15_t * pDst,
2548   uint32_t blockSize);
2549 
2550   /**
2551    * @brief Multiplies a Q31 vector by a scalar.
2552    * @param[in]       *pSrc points to the input vector
2553    * @param[in]       scaleFract fractional portion of the scale value
2554    * @param[in]       shift number of bits to shift the result by
2555    * @param[out]      *pDst points to the output vector
2556    * @param[in]       blockSize number of samples in the vector
2557    * @return none.
2558    */
2559 
2560   void arm_scale_q31(
2561   q31_t * pSrc,
2562   q31_t scaleFract,
2563   int8_t shift,
2564   q31_t * pDst,
2565   uint32_t blockSize);
2566 
2567   /**
2568    * @brief Q7 vector absolute value.
2569    * @param[in]       *pSrc points to the input buffer
2570    * @param[out]      *pDst points to the output buffer
2571    * @param[in]       blockSize number of samples in each vector
2572    * @return none.
2573    */
2574 
2575   void arm_abs_q7(
2576   q7_t * pSrc,
2577   q7_t * pDst,
2578   uint32_t blockSize);
2579 
2580   /**
2581    * @brief Floating-point vector absolute value.
2582    * @param[in]       *pSrc points to the input buffer
2583    * @param[out]      *pDst points to the output buffer
2584    * @param[in]       blockSize number of samples in each vector
2585    * @return none.
2586    */
2587 
2588   void arm_abs_f32(
2589   float32_t * pSrc,
2590   float32_t * pDst,
2591   uint32_t blockSize);
2592 
2593   /**
2594    * @brief Q15 vector absolute value.
2595    * @param[in]       *pSrc points to the input buffer
2596    * @param[out]      *pDst points to the output buffer
2597    * @param[in]       blockSize number of samples in each vector
2598    * @return none.
2599    */
2600 
2601   void arm_abs_q15(
2602   q15_t * pSrc,
2603   q15_t * pDst,
2604   uint32_t blockSize);
2605 
2606   /**
2607    * @brief Q31 vector absolute value.
2608    * @param[in]       *pSrc points to the input buffer
2609    * @param[out]      *pDst points to the output buffer
2610    * @param[in]       blockSize number of samples in each vector
2611    * @return none.
2612    */
2613 
2614   void arm_abs_q31(
2615   q31_t * pSrc,
2616   q31_t * pDst,
2617   uint32_t blockSize);
2618 
2619   /**
2620    * @brief Dot product of floating-point vectors.
2621    * @param[in]       *pSrcA points to the first input vector
2622    * @param[in]       *pSrcB points to the second input vector
2623    * @param[in]       blockSize number of samples in each vector
2624    * @param[out]      *result output result returned here
2625    * @return none.
2626    */
2627 
2628   void arm_dot_prod_f32(
2629   float32_t * pSrcA,
2630   float32_t * pSrcB,
2631   uint32_t blockSize,
2632   float32_t * result);
2633 
2634   /**
2635    * @brief Dot product of Q7 vectors.
2636    * @param[in]       *pSrcA points to the first input vector
2637    * @param[in]       *pSrcB points to the second input vector
2638    * @param[in]       blockSize number of samples in each vector
2639    * @param[out]      *result output result returned here
2640    * @return none.
2641    */
2642 
2643   void arm_dot_prod_q7(
2644   q7_t * pSrcA,
2645   q7_t * pSrcB,
2646   uint32_t blockSize,
2647   q31_t * result);
2648 
2649   /**
2650    * @brief Dot product of Q15 vectors.
2651    * @param[in]       *pSrcA points to the first input vector
2652    * @param[in]       *pSrcB points to the second input vector
2653    * @param[in]       blockSize number of samples in each vector
2654    * @param[out]      *result output result returned here
2655    * @return none.
2656    */
2657 
2658   void arm_dot_prod_q15(
2659   q15_t * pSrcA,
2660   q15_t * pSrcB,
2661   uint32_t blockSize,
2662   q63_t * result);
2663 
2664   /**
2665    * @brief Dot product of Q31 vectors.
2666    * @param[in]       *pSrcA points to the first input vector
2667    * @param[in]       *pSrcB points to the second input vector
2668    * @param[in]       blockSize number of samples in each vector
2669    * @param[out]      *result output result returned here
2670    * @return none.
2671    */
2672 
2673   void arm_dot_prod_q31(
2674   q31_t * pSrcA,
2675   q31_t * pSrcB,
2676   uint32_t blockSize,
2677   q63_t * result);
2678 
2679   /**
2680    * @brief  Shifts the elements of a Q7 vector a specified number of bits.
2681    * @param[in]  *pSrc points to the input vector
2682    * @param[in]  shiftBits number of bits to shift.  A positive value shifts left; a negative value shifts right.
2683    * @param[out]  *pDst points to the output vector
2684    * @param[in]  blockSize number of samples in the vector
2685    * @return none.
2686    */
2687 
2688   void arm_shift_q7(
2689   q7_t * pSrc,
2690   int8_t shiftBits,
2691   q7_t * pDst,
2692   uint32_t blockSize);
2693 
2694   /**
2695    * @brief  Shifts the elements of a Q15 vector a specified number of bits.
2696    * @param[in]  *pSrc points to the input vector
2697    * @param[in]  shiftBits number of bits to shift.  A positive value shifts left; a negative value shifts right.
2698    * @param[out]  *pDst points to the output vector
2699    * @param[in]  blockSize number of samples in the vector
2700    * @return none.
2701    */
2702 
2703   void arm_shift_q15(
2704   q15_t * pSrc,
2705   int8_t shiftBits,
2706   q15_t * pDst,
2707   uint32_t blockSize);
2708 
2709   /**
2710    * @brief  Shifts the elements of a Q31 vector a specified number of bits.
2711    * @param[in]  *pSrc points to the input vector
2712    * @param[in]  shiftBits number of bits to shift.  A positive value shifts left; a negative value shifts right.
2713    * @param[out]  *pDst points to the output vector
2714    * @param[in]  blockSize number of samples in the vector
2715    * @return none.
2716    */
2717 
2718   void arm_shift_q31(
2719   q31_t * pSrc,
2720   int8_t shiftBits,
2721   q31_t * pDst,
2722   uint32_t blockSize);
2723 
2724   /**
2725    * @brief  Adds a constant offset to a floating-point vector.
2726    * @param[in]  *pSrc points to the input vector
2727    * @param[in]  offset is the offset to be added
2728    * @param[out]  *pDst points to the output vector
2729    * @param[in]  blockSize number of samples in the vector
2730    * @return none.
2731    */
2732 
2733   void arm_offset_f32(
2734   float32_t * pSrc,
2735   float32_t offset,
2736   float32_t * pDst,
2737   uint32_t blockSize);
2738 
2739   /**
2740    * @brief  Adds a constant offset to a Q7 vector.
2741    * @param[in]  *pSrc points to the input vector
2742    * @param[in]  offset is the offset to be added
2743    * @param[out]  *pDst points to the output vector
2744    * @param[in]  blockSize number of samples in the vector
2745    * @return none.
2746    */
2747 
2748   void arm_offset_q7(
2749   q7_t * pSrc,
2750   q7_t offset,
2751   q7_t * pDst,
2752   uint32_t blockSize);
2753 
2754   /**
2755    * @brief  Adds a constant offset to a Q15 vector.
2756    * @param[in]  *pSrc points to the input vector
2757    * @param[in]  offset is the offset to be added
2758    * @param[out]  *pDst points to the output vector
2759    * @param[in]  blockSize number of samples in the vector
2760    * @return none.
2761    */
2762 
2763   void arm_offset_q15(
2764   q15_t * pSrc,
2765   q15_t offset,
2766   q15_t * pDst,
2767   uint32_t blockSize);
2768 
2769   /**
2770    * @brief  Adds a constant offset to a Q31 vector.
2771    * @param[in]  *pSrc points to the input vector
2772    * @param[in]  offset is the offset to be added
2773    * @param[out]  *pDst points to the output vector
2774    * @param[in]  blockSize number of samples in the vector
2775    * @return none.
2776    */
2777 
2778   void arm_offset_q31(
2779   q31_t * pSrc,
2780   q31_t offset,
2781   q31_t * pDst,
2782   uint32_t blockSize);
2783 
2784   /**
2785    * @brief  Negates the elements of a floating-point vector.
2786    * @param[in]  *pSrc points to the input vector
2787    * @param[out]  *pDst points to the output vector
2788    * @param[in]  blockSize number of samples in the vector
2789    * @return none.
2790    */
2791 
2792   void arm_negate_f32(
2793   float32_t * pSrc,
2794   float32_t * pDst,
2795   uint32_t blockSize);
2796 
2797   /**
2798    * @brief  Negates the elements of a Q7 vector.
2799    * @param[in]  *pSrc points to the input vector
2800    * @param[out]  *pDst points to the output vector
2801    * @param[in]  blockSize number of samples in the vector
2802    * @return none.
2803    */
2804 
2805   void arm_negate_q7(
2806   q7_t * pSrc,
2807   q7_t * pDst,
2808   uint32_t blockSize);
2809 
2810   /**
2811    * @brief  Negates the elements of a Q15 vector.
2812    * @param[in]  *pSrc points to the input vector
2813    * @param[out]  *pDst points to the output vector
2814    * @param[in]  blockSize number of samples in the vector
2815    * @return none.
2816    */
2817 
2818   void arm_negate_q15(
2819   q15_t * pSrc,
2820   q15_t * pDst,
2821   uint32_t blockSize);
2822 
2823   /**
2824    * @brief  Negates the elements of a Q31 vector.
2825    * @param[in]  *pSrc points to the input vector
2826    * @param[out]  *pDst points to the output vector
2827    * @param[in]  blockSize number of samples in the vector
2828    * @return none.
2829    */
2830 
2831   void arm_negate_q31(
2832   q31_t * pSrc,
2833   q31_t * pDst,
2834   uint32_t blockSize);
2835   /**
2836    * @brief  Copies the elements of a floating-point vector.
2837    * @param[in]  *pSrc input pointer
2838    * @param[out]  *pDst output pointer
2839    * @param[in]  blockSize number of samples to process
2840    * @return none.
2841    */
2842   void arm_copy_f32(
2843   float32_t * pSrc,
2844   float32_t * pDst,
2845   uint32_t blockSize);
2846 
2847   /**
2848    * @brief  Copies the elements of a Q7 vector.
2849    * @param[in]  *pSrc input pointer
2850    * @param[out]  *pDst output pointer
2851    * @param[in]  blockSize number of samples to process
2852    * @return none.
2853    */
2854   void arm_copy_q7(
2855   q7_t * pSrc,
2856   q7_t * pDst,
2857   uint32_t blockSize);
2858 
2859   /**
2860    * @brief  Copies the elements of a Q15 vector.
2861    * @param[in]  *pSrc input pointer
2862    * @param[out]  *pDst output pointer
2863    * @param[in]  blockSize number of samples to process
2864    * @return none.
2865    */
2866   void arm_copy_q15(
2867   q15_t * pSrc,
2868   q15_t * pDst,
2869   uint32_t blockSize);
2870 
2871   /**
2872    * @brief  Copies the elements of a Q31 vector.
2873    * @param[in]  *pSrc input pointer
2874    * @param[out]  *pDst output pointer
2875    * @param[in]  blockSize number of samples to process
2876    * @return none.
2877    */
2878   void arm_copy_q31(
2879   q31_t * pSrc,
2880   q31_t * pDst,
2881   uint32_t blockSize);
2882   /**
2883    * @brief  Fills a constant value into a floating-point vector.
2884    * @param[in]  value input value to be filled
2885    * @param[out]  *pDst output pointer
2886    * @param[in]  blockSize number of samples to process
2887    * @return none.
2888    */
2889   void arm_fill_f32(
2890   float32_t value,
2891   float32_t * pDst,
2892   uint32_t blockSize);
2893 
2894   /**
2895    * @brief  Fills a constant value into a Q7 vector.
2896    * @param[in]  value input value to be filled
2897    * @param[out]  *pDst output pointer
2898    * @param[in]  blockSize number of samples to process
2899    * @return none.
2900    */
2901   void arm_fill_q7(
2902   q7_t value,
2903   q7_t * pDst,
2904   uint32_t blockSize);
2905 
2906   /**
2907    * @brief  Fills a constant value into a Q15 vector.
2908    * @param[in]  value input value to be filled
2909    * @param[out]  *pDst output pointer
2910    * @param[in]  blockSize number of samples to process
2911    * @return none.
2912    */
2913   void arm_fill_q15(
2914   q15_t value,
2915   q15_t * pDst,
2916   uint32_t blockSize);
2917 
2918   /**
2919    * @brief  Fills a constant value into a Q31 vector.
2920    * @param[in]  value input value to be filled
2921    * @param[out]  *pDst output pointer
2922    * @param[in]  blockSize number of samples to process
2923    * @return none.
2924    */
2925   void arm_fill_q31(
2926   q31_t value,
2927   q31_t * pDst,
2928   uint32_t blockSize);
2929 
2930 /**
2931  * @brief Convolution of floating-point sequences.
2932  * @param[in] *pSrcA points to the first input sequence.
2933  * @param[in] srcALen length of the first input sequence.
2934  * @param[in] *pSrcB points to the second input sequence.
2935  * @param[in] srcBLen length of the second input sequence.
2936  * @param[out] *pDst points to the location where the output result is written.  Length srcALen+srcBLen-1.
2937  * @return none.
2938  */
2939 
2940   void arm_conv_f32(
2941   float32_t * pSrcA,
2942   uint32_t srcALen,
2943   float32_t * pSrcB,
2944   uint32_t srcBLen,
2945   float32_t * pDst);
2946 
2947 
2948   /**
2949    * @brief Convolution of Q15 sequences.
2950    * @param[in] *pSrcA points to the first input sequence.
2951    * @param[in] srcALen length of the first input sequence.
2952    * @param[in] *pSrcB points to the second input sequence.
2953    * @param[in] srcBLen length of the second input sequence.
2954    * @param[out] *pDst points to the block of output data  Length srcALen+srcBLen-1.
2955    * @param[in]  *pScratch1 points to scratch buffer of size max(srcALen, srcBLen) + 2*min(srcALen, srcBLen) - 2.
2956    * @param[in]  *pScratch2 points to scratch buffer of size min(srcALen, srcBLen).
2957    * @return none.
2958    */
2959 
2960 
2961   void arm_conv_opt_q15(
2962   q15_t * pSrcA,
2963   uint32_t srcALen,
2964   q15_t * pSrcB,
2965   uint32_t srcBLen,
2966   q15_t * pDst,
2967   q15_t * pScratch1,
2968   q15_t * pScratch2);
2969 
2970 
2971 /**
2972  * @brief Convolution of Q15 sequences.
2973  * @param[in] *pSrcA points to the first input sequence.
2974  * @param[in] srcALen length of the first input sequence.
2975  * @param[in] *pSrcB points to the second input sequence.
2976  * @param[in] srcBLen length of the second input sequence.
2977  * @param[out] *pDst points to the location where the output result is written.  Length srcALen+srcBLen-1.
2978  * @return none.
2979  */
2980 
2981   void arm_conv_q15(
2982   q15_t * pSrcA,
2983   uint32_t srcALen,
2984   q15_t * pSrcB,
2985   uint32_t srcBLen,
2986   q15_t * pDst);
2987 
2988   /**
2989    * @brief Convolution of Q15 sequences (fast version) for Cortex-M3 and Cortex-M4
2990    * @param[in] *pSrcA points to the first input sequence.
2991    * @param[in] srcALen length of the first input sequence.
2992    * @param[in] *pSrcB points to the second input sequence.
2993    * @param[in] srcBLen length of the second input sequence.
2994    * @param[out] *pDst points to the block of output data  Length srcALen+srcBLen-1.
2995    * @return none.
2996    */
2997 
2998   void arm_conv_fast_q15(
2999 			  q15_t * pSrcA,
3000 			 uint32_t srcALen,
3001 			  q15_t * pSrcB,
3002 			 uint32_t srcBLen,
3003 			 q15_t * pDst);
3004 
3005   /**
3006    * @brief Convolution of Q15 sequences (fast version) for Cortex-M3 and Cortex-M4
3007    * @param[in] *pSrcA points to the first input sequence.
3008    * @param[in] srcALen length of the first input sequence.
3009    * @param[in] *pSrcB points to the second input sequence.
3010    * @param[in] srcBLen length of the second input sequence.
3011    * @param[out] *pDst points to the block of output data  Length srcALen+srcBLen-1.
3012    * @param[in]  *pScratch1 points to scratch buffer of size max(srcALen, srcBLen) + 2*min(srcALen, srcBLen) - 2.
3013    * @param[in]  *pScratch2 points to scratch buffer of size min(srcALen, srcBLen).
3014    * @return none.
3015    */
3016 
3017   void arm_conv_fast_opt_q15(
3018   q15_t * pSrcA,
3019   uint32_t srcALen,
3020   q15_t * pSrcB,
3021   uint32_t srcBLen,
3022   q15_t * pDst,
3023   q15_t * pScratch1,
3024   q15_t * pScratch2);
3025 
3026 
3027 
3028   /**
3029    * @brief Convolution of Q31 sequences.
3030    * @param[in] *pSrcA points to the first input sequence.
3031    * @param[in] srcALen length of the first input sequence.
3032    * @param[in] *pSrcB points to the second input sequence.
3033    * @param[in] srcBLen length of the second input sequence.
3034    * @param[out] *pDst points to the block of output data  Length srcALen+srcBLen-1.
3035    * @return none.
3036    */
3037 
3038   void arm_conv_q31(
3039   q31_t * pSrcA,
3040   uint32_t srcALen,
3041   q31_t * pSrcB,
3042   uint32_t srcBLen,
3043   q31_t * pDst);
3044 
3045   /**
3046    * @brief Convolution of Q31 sequences (fast version) for Cortex-M3 and Cortex-M4
3047    * @param[in] *pSrcA points to the first input sequence.
3048    * @param[in] srcALen length of the first input sequence.
3049    * @param[in] *pSrcB points to the second input sequence.
3050    * @param[in] srcBLen length of the second input sequence.
3051    * @param[out] *pDst points to the block of output data  Length srcALen+srcBLen-1.
3052    * @return none.
3053    */
3054 
3055   void arm_conv_fast_q31(
3056   q31_t * pSrcA,
3057   uint32_t srcALen,
3058   q31_t * pSrcB,
3059   uint32_t srcBLen,
3060   q31_t * pDst);
3061 
3062 
3063     /**
3064    * @brief Convolution of Q7 sequences.
3065    * @param[in] *pSrcA points to the first input sequence.
3066    * @param[in] srcALen length of the first input sequence.
3067    * @param[in] *pSrcB points to the second input sequence.
3068    * @param[in] srcBLen length of the second input sequence.
3069    * @param[out] *pDst points to the block of output data  Length srcALen+srcBLen-1.
3070    * @param[in]  *pScratch1 points to scratch buffer(of type q15_t) of size max(srcALen, srcBLen) + 2*min(srcALen, srcBLen) - 2.
3071    * @param[in]  *pScratch2 points to scratch buffer (of type q15_t) of size min(srcALen, srcBLen).
3072    * @return none.
3073    */
3074 
3075   void arm_conv_opt_q7(
3076   q7_t * pSrcA,
3077   uint32_t srcALen,
3078   q7_t * pSrcB,
3079   uint32_t srcBLen,
3080   q7_t * pDst,
3081   q15_t * pScratch1,
3082   q15_t * pScratch2);
3083 
3084 
3085 
3086   /**
3087    * @brief Convolution of Q7 sequences.
3088    * @param[in] *pSrcA points to the first input sequence.
3089    * @param[in] srcALen length of the first input sequence.
3090    * @param[in] *pSrcB points to the second input sequence.
3091    * @param[in] srcBLen length of the second input sequence.
3092    * @param[out] *pDst points to the block of output data  Length srcALen+srcBLen-1.
3093    * @return none.
3094    */
3095 
3096   void arm_conv_q7(
3097   q7_t * pSrcA,
3098   uint32_t srcALen,
3099   q7_t * pSrcB,
3100   uint32_t srcBLen,
3101   q7_t * pDst);
3102 
3103 
3104   /**
3105    * @brief Partial convolution of floating-point sequences.
3106    * @param[in]       *pSrcA points to the first input sequence.
3107    * @param[in]       srcALen length of the first input sequence.
3108    * @param[in]       *pSrcB points to the second input sequence.
3109    * @param[in]       srcBLen length of the second input sequence.
3110    * @param[out]      *pDst points to the block of output data
3111    * @param[in]       firstIndex is the first output sample to start with.
3112    * @param[in]       numPoints is the number of output points to be computed.
3113    * @return  Returns either ARM_MATH_SUCCESS if the function completed correctly or ARM_MATH_ARGUMENT_ERROR if the requested subset is not in the range [0 srcALen+srcBLen-2].
3114    */
3115 
3116   arm_status arm_conv_partial_f32(
3117   float32_t * pSrcA,
3118   uint32_t srcALen,
3119   float32_t * pSrcB,
3120   uint32_t srcBLen,
3121   float32_t * pDst,
3122   uint32_t firstIndex,
3123   uint32_t numPoints);
3124 
3125     /**
3126    * @brief Partial convolution of Q15 sequences.
3127    * @param[in]       *pSrcA points to the first input sequence.
3128    * @param[in]       srcALen length of the first input sequence.
3129    * @param[in]       *pSrcB points to the second input sequence.
3130    * @param[in]       srcBLen length of the second input sequence.
3131    * @param[out]      *pDst points to the block of output data
3132    * @param[in]       firstIndex is the first output sample to start with.
3133    * @param[in]       numPoints is the number of output points to be computed.
3134    * @param[in]       * pScratch1 points to scratch buffer of size max(srcALen, srcBLen) + 2*min(srcALen, srcBLen) - 2.
3135    * @param[in]       * pScratch2 points to scratch buffer of size min(srcALen, srcBLen).
3136    * @return  Returns either ARM_MATH_SUCCESS if the function completed correctly or ARM_MATH_ARGUMENT_ERROR if the requested subset is not in the range [0 srcALen+srcBLen-2].
3137    */
3138 
3139   arm_status arm_conv_partial_opt_q15(
3140   q15_t * pSrcA,
3141   uint32_t srcALen,
3142   q15_t * pSrcB,
3143   uint32_t srcBLen,
3144   q15_t * pDst,
3145   uint32_t firstIndex,
3146   uint32_t numPoints,
3147   q15_t * pScratch1,
3148   q15_t * pScratch2);
3149 
3150 
3151 /**
3152    * @brief Partial convolution of Q15 sequences.
3153    * @param[in]       *pSrcA points to the first input sequence.
3154    * @param[in]       srcALen length of the first input sequence.
3155    * @param[in]       *pSrcB points to the second input sequence.
3156    * @param[in]       srcBLen length of the second input sequence.
3157    * @param[out]      *pDst points to the block of output data
3158    * @param[in]       firstIndex is the first output sample to start with.
3159    * @param[in]       numPoints is the number of output points to be computed.
3160    * @return  Returns either ARM_MATH_SUCCESS if the function completed correctly or ARM_MATH_ARGUMENT_ERROR if the requested subset is not in the range [0 srcALen+srcBLen-2].
3161    */
3162 
3163   arm_status arm_conv_partial_q15(
3164   q15_t * pSrcA,
3165   uint32_t srcALen,
3166   q15_t * pSrcB,
3167   uint32_t srcBLen,
3168   q15_t * pDst,
3169   uint32_t firstIndex,
3170   uint32_t numPoints);
3171 
3172   /**
3173    * @brief Partial convolution of Q15 sequences (fast version) for Cortex-M3 and Cortex-M4
3174    * @param[in]       *pSrcA points to the first input sequence.
3175    * @param[in]       srcALen length of the first input sequence.
3176    * @param[in]       *pSrcB points to the second input sequence.
3177    * @param[in]       srcBLen length of the second input sequence.
3178    * @param[out]      *pDst points to the block of output data
3179    * @param[in]       firstIndex is the first output sample to start with.
3180    * @param[in]       numPoints is the number of output points to be computed.
3181    * @return  Returns either ARM_MATH_SUCCESS if the function completed correctly or ARM_MATH_ARGUMENT_ERROR if the requested subset is not in the range [0 srcALen+srcBLen-2].
3182    */
3183 
3184   arm_status arm_conv_partial_fast_q15(
3185 				        q15_t * pSrcA,
3186 				       uint32_t srcALen,
3187 				        q15_t * pSrcB,
3188 				       uint32_t srcBLen,
3189 				       q15_t * pDst,
3190 				       uint32_t firstIndex,
3191 				       uint32_t numPoints);
3192 
3193 
3194   /**
3195    * @brief Partial convolution of Q15 sequences (fast version) for Cortex-M3 and Cortex-M4
3196    * @param[in]       *pSrcA points to the first input sequence.
3197    * @param[in]       srcALen length of the first input sequence.
3198    * @param[in]       *pSrcB points to the second input sequence.
3199    * @param[in]       srcBLen length of the second input sequence.
3200    * @param[out]      *pDst points to the block of output data
3201    * @param[in]       firstIndex is the first output sample to start with.
3202    * @param[in]       numPoints is the number of output points to be computed.
3203    * @param[in]       * pScratch1 points to scratch buffer of size max(srcALen, srcBLen) + 2*min(srcALen, srcBLen) - 2.
3204    * @param[in]       * pScratch2 points to scratch buffer of size min(srcALen, srcBLen).
3205    * @return  Returns either ARM_MATH_SUCCESS if the function completed correctly or ARM_MATH_ARGUMENT_ERROR if the requested subset is not in the range [0 srcALen+srcBLen-2].
3206    */
3207 
3208   arm_status arm_conv_partial_fast_opt_q15(
3209   q15_t * pSrcA,
3210   uint32_t srcALen,
3211   q15_t * pSrcB,
3212   uint32_t srcBLen,
3213   q15_t * pDst,
3214   uint32_t firstIndex,
3215   uint32_t numPoints,
3216   q15_t * pScratch1,
3217   q15_t * pScratch2);
3218 
3219 
3220   /**
3221    * @brief Partial convolution of Q31 sequences.
3222    * @param[in]       *pSrcA points to the first input sequence.
3223    * @param[in]       srcALen length of the first input sequence.
3224    * @param[in]       *pSrcB points to the second input sequence.
3225    * @param[in]       srcBLen length of the second input sequence.
3226    * @param[out]      *pDst points to the block of output data
3227    * @param[in]       firstIndex is the first output sample to start with.
3228    * @param[in]       numPoints is the number of output points to be computed.
3229    * @return  Returns either ARM_MATH_SUCCESS if the function completed correctly or ARM_MATH_ARGUMENT_ERROR if the requested subset is not in the range [0 srcALen+srcBLen-2].
3230    */
3231 
3232   arm_status arm_conv_partial_q31(
3233   q31_t * pSrcA,
3234   uint32_t srcALen,
3235   q31_t * pSrcB,
3236   uint32_t srcBLen,
3237   q31_t * pDst,
3238   uint32_t firstIndex,
3239   uint32_t numPoints);
3240 
3241 
3242   /**
3243    * @brief Partial convolution of Q31 sequences (fast version) for Cortex-M3 and Cortex-M4
3244    * @param[in]       *pSrcA points to the first input sequence.
3245    * @param[in]       srcALen length of the first input sequence.
3246    * @param[in]       *pSrcB points to the second input sequence.
3247    * @param[in]       srcBLen length of the second input sequence.
3248    * @param[out]      *pDst points to the block of output data
3249    * @param[in]       firstIndex is the first output sample to start with.
3250    * @param[in]       numPoints is the number of output points to be computed.
3251    * @return  Returns either ARM_MATH_SUCCESS if the function completed correctly or ARM_MATH_ARGUMENT_ERROR if the requested subset is not in the range [0 srcALen+srcBLen-2].
3252    */
3253 
3254   arm_status arm_conv_partial_fast_q31(
3255   q31_t * pSrcA,
3256   uint32_t srcALen,
3257   q31_t * pSrcB,
3258   uint32_t srcBLen,
3259   q31_t * pDst,
3260   uint32_t firstIndex,
3261   uint32_t numPoints);
3262 
3263 
3264   /**
3265    * @brief Partial convolution of Q7 sequences
3266    * @param[in]       *pSrcA points to the first input sequence.
3267    * @param[in]       srcALen length of the first input sequence.
3268    * @param[in]       *pSrcB points to the second input sequence.
3269    * @param[in]       srcBLen length of the second input sequence.
3270    * @param[out]      *pDst points to the block of output data
3271    * @param[in]       firstIndex is the first output sample to start with.
3272    * @param[in]       numPoints is the number of output points to be computed.
3273    * @param[in]  *pScratch1 points to scratch buffer(of type q15_t) of size max(srcALen, srcBLen) + 2*min(srcALen, srcBLen) - 2.
3274    * @param[in]  *pScratch2 points to scratch buffer (of type q15_t) of size min(srcALen, srcBLen).
3275    * @return  Returns either ARM_MATH_SUCCESS if the function completed correctly or ARM_MATH_ARGUMENT_ERROR if the requested subset is not in the range [0 srcALen+srcBLen-2].
3276    */
3277 
3278   arm_status arm_conv_partial_opt_q7(
3279   q7_t * pSrcA,
3280   uint32_t srcALen,
3281   q7_t * pSrcB,
3282   uint32_t srcBLen,
3283   q7_t * pDst,
3284   uint32_t firstIndex,
3285   uint32_t numPoints,
3286   q15_t * pScratch1,
3287   q15_t * pScratch2);
3288 
3289 
3290 /**
3291    * @brief Partial convolution of Q7 sequences.
3292    * @param[in]       *pSrcA points to the first input sequence.
3293    * @param[in]       srcALen length of the first input sequence.
3294    * @param[in]       *pSrcB points to the second input sequence.
3295    * @param[in]       srcBLen length of the second input sequence.
3296    * @param[out]      *pDst points to the block of output data
3297    * @param[in]       firstIndex is the first output sample to start with.
3298    * @param[in]       numPoints is the number of output points to be computed.
3299    * @return  Returns either ARM_MATH_SUCCESS if the function completed correctly or ARM_MATH_ARGUMENT_ERROR if the requested subset is not in the range [0 srcALen+srcBLen-2].
3300    */
3301 
3302   arm_status arm_conv_partial_q7(
3303   q7_t * pSrcA,
3304   uint32_t srcALen,
3305   q7_t * pSrcB,
3306   uint32_t srcBLen,
3307   q7_t * pDst,
3308   uint32_t firstIndex,
3309   uint32_t numPoints);
3310 
3311 
3312 
3313   /**
3314    * @brief Instance structure for the Q15 FIR decimator.
3315    */
3316 
3317   typedef struct
3318   {
3319     uint8_t M;                      /**< decimation factor. */
3320     uint16_t numTaps;               /**< number of coefficients in the filter. */
3321     q15_t *pCoeffs;                  /**< points to the coefficient array. The array is of length numTaps.*/
3322     q15_t *pState;                   /**< points to the state variable array. The array is of length numTaps+blockSize-1. */
3323   } arm_fir_decimate_instance_q15;
3324 
3325   /**
3326    * @brief Instance structure for the Q31 FIR decimator.
3327    */
3328 
3329   typedef struct
3330   {
3331     uint8_t M;                  /**< decimation factor. */
3332     uint16_t numTaps;           /**< number of coefficients in the filter. */
3333     q31_t *pCoeffs;              /**< points to the coefficient array. The array is of length numTaps.*/
3334     q31_t *pState;               /**< points to the state variable array. The array is of length numTaps+blockSize-1. */
3335 
3336   } arm_fir_decimate_instance_q31;
3337 
3338   /**
3339    * @brief Instance structure for the floating-point FIR decimator.
3340    */
3341 
3342   typedef struct
3343   {
3344     uint8_t M;                          /**< decimation factor. */
3345     uint16_t numTaps;                   /**< number of coefficients in the filter. */
3346     float32_t *pCoeffs;                  /**< points to the coefficient array. The array is of length numTaps.*/
3347     float32_t *pState;                   /**< points to the state variable array. The array is of length numTaps+blockSize-1. */
3348 
3349   } arm_fir_decimate_instance_f32;
3350 
3351 
3352 
3353   /**
3354    * @brief Processing function for the floating-point FIR decimator.
3355    * @param[in] *S points to an instance of the floating-point FIR decimator structure.
3356    * @param[in] *pSrc points to the block of input data.
3357    * @param[out] *pDst points to the block of output data
3358    * @param[in] blockSize number of input samples to process per call.
3359    * @return none
3360    */
3361 
3362   void arm_fir_decimate_f32(
3363   const arm_fir_decimate_instance_f32 * S,
3364   float32_t * pSrc,
3365   float32_t * pDst,
3366   uint32_t blockSize);
3367 
3368 
3369   /**
3370    * @brief  Initialization function for the floating-point FIR decimator.
3371    * @param[in,out] *S points to an instance of the floating-point FIR decimator structure.
3372    * @param[in] numTaps  number of coefficients in the filter.
3373    * @param[in] M  decimation factor.
3374    * @param[in] *pCoeffs points to the filter coefficients.
3375    * @param[in] *pState points to the state buffer.
3376    * @param[in] blockSize number of input samples to process per call.
3377    * @return    The function returns ARM_MATH_SUCCESS if initialization is successful or ARM_MATH_LENGTH_ERROR if
3378    * <code>blockSize</code> is not a multiple of <code>M</code>.
3379    */
3380 
3381   arm_status arm_fir_decimate_init_f32(
3382   arm_fir_decimate_instance_f32 * S,
3383   uint16_t numTaps,
3384   uint8_t M,
3385   float32_t * pCoeffs,
3386   float32_t * pState,
3387   uint32_t blockSize);
3388 
3389   /**
3390    * @brief Processing function for the Q15 FIR decimator.
3391    * @param[in] *S points to an instance of the Q15 FIR decimator structure.
3392    * @param[in] *pSrc points to the block of input data.
3393    * @param[out] *pDst points to the block of output data
3394    * @param[in] blockSize number of input samples to process per call.
3395    * @return none
3396    */
3397 
3398   void arm_fir_decimate_q15(
3399   const arm_fir_decimate_instance_q15 * S,
3400   q15_t * pSrc,
3401   q15_t * pDst,
3402   uint32_t blockSize);
3403 
3404   /**
3405    * @brief Processing function for the Q15 FIR decimator (fast variant) for Cortex-M3 and Cortex-M4.
3406    * @param[in] *S points to an instance of the Q15 FIR decimator structure.
3407    * @param[in] *pSrc points to the block of input data.
3408    * @param[out] *pDst points to the block of output data
3409    * @param[in] blockSize number of input samples to process per call.
3410    * @return none
3411    */
3412 
3413   void arm_fir_decimate_fast_q15(
3414   const arm_fir_decimate_instance_q15 * S,
3415   q15_t * pSrc,
3416   q15_t * pDst,
3417   uint32_t blockSize);
3418 
3419 
3420 
3421   /**
3422    * @brief  Initialization function for the Q15 FIR decimator.
3423    * @param[in,out] *S points to an instance of the Q15 FIR decimator structure.
3424    * @param[in] numTaps  number of coefficients in the filter.
3425    * @param[in] M  decimation factor.
3426    * @param[in] *pCoeffs points to the filter coefficients.
3427    * @param[in] *pState points to the state buffer.
3428    * @param[in] blockSize number of input samples to process per call.
3429    * @return    The function returns ARM_MATH_SUCCESS if initialization is successful or ARM_MATH_LENGTH_ERROR if
3430    * <code>blockSize</code> is not a multiple of <code>M</code>.
3431    */
3432 
3433   arm_status arm_fir_decimate_init_q15(
3434   arm_fir_decimate_instance_q15 * S,
3435   uint16_t numTaps,
3436   uint8_t M,
3437   q15_t * pCoeffs,
3438   q15_t * pState,
3439   uint32_t blockSize);
3440 
3441   /**
3442    * @brief Processing function for the Q31 FIR decimator.
3443    * @param[in] *S points to an instance of the Q31 FIR decimator structure.
3444    * @param[in] *pSrc points to the block of input data.
3445    * @param[out] *pDst points to the block of output data
3446    * @param[in] blockSize number of input samples to process per call.
3447    * @return none
3448    */
3449 
3450   void arm_fir_decimate_q31(
3451   const arm_fir_decimate_instance_q31 * S,
3452   q31_t * pSrc,
3453   q31_t * pDst,
3454   uint32_t blockSize);
3455 
3456   /**
3457    * @brief Processing function for the Q31 FIR decimator (fast variant) for Cortex-M3 and Cortex-M4.
3458    * @param[in] *S points to an instance of the Q31 FIR decimator structure.
3459    * @param[in] *pSrc points to the block of input data.
3460    * @param[out] *pDst points to the block of output data
3461    * @param[in] blockSize number of input samples to process per call.
3462    * @return none
3463    */
3464 
3465   void arm_fir_decimate_fast_q31(
3466   arm_fir_decimate_instance_q31 * S,
3467   q31_t * pSrc,
3468   q31_t * pDst,
3469   uint32_t blockSize);
3470 
3471 
3472   /**
3473    * @brief  Initialization function for the Q31 FIR decimator.
3474    * @param[in,out] *S points to an instance of the Q31 FIR decimator structure.
3475    * @param[in] numTaps  number of coefficients in the filter.
3476    * @param[in] M  decimation factor.
3477    * @param[in] *pCoeffs points to the filter coefficients.
3478    * @param[in] *pState points to the state buffer.
3479    * @param[in] blockSize number of input samples to process per call.
3480    * @return    The function returns ARM_MATH_SUCCESS if initialization is successful or ARM_MATH_LENGTH_ERROR if
3481    * <code>blockSize</code> is not a multiple of <code>M</code>.
3482    */
3483 
3484   arm_status arm_fir_decimate_init_q31(
3485   arm_fir_decimate_instance_q31 * S,
3486   uint16_t numTaps,
3487   uint8_t M,
3488   q31_t * pCoeffs,
3489   q31_t * pState,
3490   uint32_t blockSize);
3491 
3492 
3493 
3494   /**
3495    * @brief Instance structure for the Q15 FIR interpolator.
3496    */
3497 
3498   typedef struct
3499   {
3500     uint8_t L;                      /**< upsample factor. */
3501     uint16_t phaseLength;           /**< length of each polyphase filter component. */
3502     q15_t *pCoeffs;                 /**< points to the coefficient array. The array is of length L*phaseLength. */
3503     q15_t *pState;                  /**< points to the state variable array. The array is of length blockSize+phaseLength-1. */
3504   } arm_fir_interpolate_instance_q15;
3505 
3506   /**
3507    * @brief Instance structure for the Q31 FIR interpolator.
3508    */
3509 
3510   typedef struct
3511   {
3512     uint8_t L;                      /**< upsample factor. */
3513     uint16_t phaseLength;           /**< length of each polyphase filter component. */
3514     q31_t *pCoeffs;                  /**< points to the coefficient array. The array is of length L*phaseLength. */
3515     q31_t *pState;                   /**< points to the state variable array. The array is of length blockSize+phaseLength-1. */
3516   } arm_fir_interpolate_instance_q31;
3517 
3518   /**
3519    * @brief Instance structure for the floating-point FIR interpolator.
3520    */
3521 
3522   typedef struct
3523   {
3524     uint8_t L;                     /**< upsample factor. */
3525     uint16_t phaseLength;          /**< length of each polyphase filter component. */
3526     float32_t *pCoeffs;             /**< points to the coefficient array. The array is of length L*phaseLength. */
3527     float32_t *pState;              /**< points to the state variable array. The array is of length phaseLength+numTaps-1. */
3528   } arm_fir_interpolate_instance_f32;
3529 
3530 
3531   /**
3532    * @brief Processing function for the Q15 FIR interpolator.
3533    * @param[in] *S        points to an instance of the Q15 FIR interpolator structure.
3534    * @param[in] *pSrc     points to the block of input data.
3535    * @param[out] *pDst    points to the block of output data.
3536    * @param[in] blockSize number of input samples to process per call.
3537    * @return none.
3538    */
3539 
3540   void arm_fir_interpolate_q15(
3541   const arm_fir_interpolate_instance_q15 * S,
3542   q15_t * pSrc,
3543   q15_t * pDst,
3544   uint32_t blockSize);
3545 
3546 
3547   /**
3548    * @brief  Initialization function for the Q15 FIR interpolator.
3549    * @param[in,out] *S        points to an instance of the Q15 FIR interpolator structure.
3550    * @param[in]     L         upsample factor.
3551    * @param[in]     numTaps   number of filter coefficients in the filter.
3552    * @param[in]     *pCoeffs  points to the filter coefficient buffer.
3553    * @param[in]     *pState   points to the state buffer.
3554    * @param[in]     blockSize number of input samples to process per call.
3555    * @return        The function returns ARM_MATH_SUCCESS if initialization is successful or ARM_MATH_LENGTH_ERROR if
3556    * the filter length <code>numTaps</code> is not a multiple of the interpolation factor <code>L</code>.
3557    */
3558 
3559   arm_status arm_fir_interpolate_init_q15(
3560   arm_fir_interpolate_instance_q15 * S,
3561   uint8_t L,
3562   uint16_t numTaps,
3563   q15_t * pCoeffs,
3564   q15_t * pState,
3565   uint32_t blockSize);
3566 
3567   /**
3568    * @brief Processing function for the Q31 FIR interpolator.
3569    * @param[in] *S        points to an instance of the Q15 FIR interpolator structure.
3570    * @param[in] *pSrc     points to the block of input data.
3571    * @param[out] *pDst    points to the block of output data.
3572    * @param[in] blockSize number of input samples to process per call.
3573    * @return none.
3574    */
3575 
3576   void arm_fir_interpolate_q31(
3577   const arm_fir_interpolate_instance_q31 * S,
3578   q31_t * pSrc,
3579   q31_t * pDst,
3580   uint32_t blockSize);
3581 
3582   /**
3583    * @brief  Initialization function for the Q31 FIR interpolator.
3584    * @param[in,out] *S        points to an instance of the Q31 FIR interpolator structure.
3585    * @param[in]     L         upsample factor.
3586    * @param[in]     numTaps   number of filter coefficients in the filter.
3587    * @param[in]     *pCoeffs  points to the filter coefficient buffer.
3588    * @param[in]     *pState   points to the state buffer.
3589    * @param[in]     blockSize number of input samples to process per call.
3590    * @return        The function returns ARM_MATH_SUCCESS if initialization is successful or ARM_MATH_LENGTH_ERROR if
3591    * the filter length <code>numTaps</code> is not a multiple of the interpolation factor <code>L</code>.
3592    */
3593 
3594   arm_status arm_fir_interpolate_init_q31(
3595   arm_fir_interpolate_instance_q31 * S,
3596   uint8_t L,
3597   uint16_t numTaps,
3598   q31_t * pCoeffs,
3599   q31_t * pState,
3600   uint32_t blockSize);
3601 
3602 
3603   /**
3604    * @brief Processing function for the floating-point FIR interpolator.
3605    * @param[in] *S        points to an instance of the floating-point FIR interpolator structure.
3606    * @param[in] *pSrc     points to the block of input data.
3607    * @param[out] *pDst    points to the block of output data.
3608    * @param[in] blockSize number of input samples to process per call.
3609    * @return none.
3610    */
3611 
3612   void arm_fir_interpolate_f32(
3613   const arm_fir_interpolate_instance_f32 * S,
3614   float32_t * pSrc,
3615   float32_t * pDst,
3616   uint32_t blockSize);
3617 
3618   /**
3619    * @brief  Initialization function for the floating-point FIR interpolator.
3620    * @param[in,out] *S        points to an instance of the floating-point FIR interpolator structure.
3621    * @param[in]     L         upsample factor.
3622    * @param[in]     numTaps   number of filter coefficients in the filter.
3623    * @param[in]     *pCoeffs  points to the filter coefficient buffer.
3624    * @param[in]     *pState   points to the state buffer.
3625    * @param[in]     blockSize number of input samples to process per call.
3626    * @return        The function returns ARM_MATH_SUCCESS if initialization is successful or ARM_MATH_LENGTH_ERROR if
3627    * the filter length <code>numTaps</code> is not a multiple of the interpolation factor <code>L</code>.
3628    */
3629 
3630   arm_status arm_fir_interpolate_init_f32(
3631   arm_fir_interpolate_instance_f32 * S,
3632   uint8_t L,
3633   uint16_t numTaps,
3634   float32_t * pCoeffs,
3635   float32_t * pState,
3636   uint32_t blockSize);
3637 
3638   /**
3639    * @brief Instance structure for the high precision Q31 Biquad cascade filter.
3640    */
3641 
3642   typedef struct
3643   {
3644     uint8_t numStages;       /**< number of 2nd order stages in the filter.  Overall order is 2*numStages. */
3645     q63_t *pState;           /**< points to the array of state coefficients.  The array is of length 4*numStages. */
3646     q31_t *pCoeffs;          /**< points to the array of coefficients.  The array is of length 5*numStages. */
3647     uint8_t postShift;       /**< additional shift, in bits, applied to each output sample. */
3648 
3649   } arm_biquad_cas_df1_32x64_ins_q31;
3650 
3651 
3652   /**
3653    * @param[in]  *S        points to an instance of the high precision Q31 Biquad cascade filter structure.
3654    * @param[in]  *pSrc     points to the block of input data.
3655    * @param[out] *pDst     points to the block of output data
3656    * @param[in]  blockSize number of samples to process.
3657    * @return none.
3658    */
3659 
3660   void arm_biquad_cas_df1_32x64_q31(
3661   const arm_biquad_cas_df1_32x64_ins_q31 * S,
3662   q31_t * pSrc,
3663   q31_t * pDst,
3664   uint32_t blockSize);
3665 
3666 
3667   /**
3668    * @param[in,out] *S           points to an instance of the high precision Q31 Biquad cascade filter structure.
3669    * @param[in]     numStages    number of 2nd order stages in the filter.
3670    * @param[in]     *pCoeffs     points to the filter coefficients.
3671    * @param[in]     *pState      points to the state buffer.
3672    * @param[in]     postShift    shift to be applied to the output. Varies according to the coefficients format
3673    * @return        none
3674    */
3675 
3676   void arm_biquad_cas_df1_32x64_init_q31(
3677   arm_biquad_cas_df1_32x64_ins_q31 * S,
3678   uint8_t numStages,
3679   q31_t * pCoeffs,
3680   q63_t * pState,
3681   uint8_t postShift);
3682 
3683 
3684 
3685   /**
3686    * @brief Instance structure for the floating-point transposed direct form II Biquad cascade filter.
3687    */
3688 
3689   typedef struct
3690   {
3691     uint8_t numStages;         /**< number of 2nd order stages in the filter.  Overall order is 2*numStages. */
3692     float32_t *pState;         /**< points to the array of state coefficients.  The array is of length 2*numStages. */
3693     float32_t *pCoeffs;        /**< points to the array of coefficients.  The array is of length 5*numStages. */
3694   } arm_biquad_cascade_df2T_instance_f32;
3695 
3696 
3697   /**
3698    * @brief Processing function for the floating-point transposed direct form II Biquad cascade filter.
3699    * @param[in]  *S        points to an instance of the filter data structure.
3700    * @param[in]  *pSrc     points to the block of input data.
3701    * @param[out] *pDst     points to the block of output data
3702    * @param[in]  blockSize number of samples to process.
3703    * @return none.
3704    */
3705 
3706   void arm_biquad_cascade_df2T_f32(
3707   const arm_biquad_cascade_df2T_instance_f32 * S,
3708   float32_t * pSrc,
3709   float32_t * pDst,
3710   uint32_t blockSize);
3711 
3712 
3713   /**
3714    * @brief  Initialization function for the floating-point transposed direct form II Biquad cascade filter.
3715    * @param[in,out] *S           points to an instance of the filter data structure.
3716    * @param[in]     numStages    number of 2nd order stages in the filter.
3717    * @param[in]     *pCoeffs     points to the filter coefficients.
3718    * @param[in]     *pState      points to the state buffer.
3719    * @return        none
3720    */
3721 
3722   void arm_biquad_cascade_df2T_init_f32(
3723   arm_biquad_cascade_df2T_instance_f32 * S,
3724   uint8_t numStages,
3725   float32_t * pCoeffs,
3726   float32_t * pState);
3727 
3728 
3729 
3730   /**
3731    * @brief Instance structure for the Q15 FIR lattice filter.
3732    */
3733 
3734   typedef struct
3735   {
3736     uint16_t numStages;                          /**< number of filter stages. */
3737     q15_t *pState;                               /**< points to the state variable array. The array is of length numStages. */
3738     q15_t *pCoeffs;                              /**< points to the coefficient array. The array is of length numStages. */
3739   } arm_fir_lattice_instance_q15;
3740 
3741   /**
3742    * @brief Instance structure for the Q31 FIR lattice filter.
3743    */
3744 
3745   typedef struct
3746   {
3747     uint16_t numStages;                          /**< number of filter stages. */
3748     q31_t *pState;                               /**< points to the state variable array. The array is of length numStages. */
3749     q31_t *pCoeffs;                              /**< points to the coefficient array. The array is of length numStages. */
3750   } arm_fir_lattice_instance_q31;
3751 
3752   /**
3753    * @brief Instance structure for the floating-point FIR lattice filter.
3754    */
3755 
3756   typedef struct
3757   {
3758     uint16_t numStages;                  /**< number of filter stages. */
3759     float32_t *pState;                   /**< points to the state variable array. The array is of length numStages. */
3760     float32_t *pCoeffs;                  /**< points to the coefficient array. The array is of length numStages. */
3761   } arm_fir_lattice_instance_f32;
3762 
3763   /**
3764    * @brief Initialization function for the Q15 FIR lattice filter.
3765    * @param[in] *S points to an instance of the Q15 FIR lattice structure.
3766    * @param[in] numStages  number of filter stages.
3767    * @param[in] *pCoeffs points to the coefficient buffer.  The array is of length numStages.
3768    * @param[in] *pState points to the state buffer.  The array is of length numStages.
3769    * @return none.
3770    */
3771 
3772   void arm_fir_lattice_init_q15(
3773   arm_fir_lattice_instance_q15 * S,
3774   uint16_t numStages,
3775   q15_t * pCoeffs,
3776   q15_t * pState);
3777 
3778 
3779   /**
3780    * @brief Processing function for the Q15 FIR lattice filter.
3781    * @param[in] *S points to an instance of the Q15 FIR lattice structure.
3782    * @param[in] *pSrc points to the block of input data.
3783    * @param[out] *pDst points to the block of output data.
3784    * @param[in] blockSize number of samples to process.
3785    * @return none.
3786    */
3787   void arm_fir_lattice_q15(
3788   const arm_fir_lattice_instance_q15 * S,
3789   q15_t * pSrc,
3790   q15_t * pDst,
3791   uint32_t blockSize);
3792 
3793   /**
3794    * @brief Initialization function for the Q31 FIR lattice filter.
3795    * @param[in] *S points to an instance of the Q31 FIR lattice structure.
3796    * @param[in] numStages  number of filter stages.
3797    * @param[in] *pCoeffs points to the coefficient buffer.  The array is of length numStages.
3798    * @param[in] *pState points to the state buffer.   The array is of length numStages.
3799    * @return none.
3800    */
3801 
3802   void arm_fir_lattice_init_q31(
3803   arm_fir_lattice_instance_q31 * S,
3804   uint16_t numStages,
3805   q31_t * pCoeffs,
3806   q31_t * pState);
3807 
3808 
3809   /**
3810    * @brief Processing function for the Q31 FIR lattice filter.
3811    * @param[in]  *S        points to an instance of the Q31 FIR lattice structure.
3812    * @param[in]  *pSrc     points to the block of input data.
3813    * @param[out] *pDst     points to the block of output data
3814    * @param[in]  blockSize number of samples to process.
3815    * @return none.
3816    */
3817 
3818   void arm_fir_lattice_q31(
3819   const arm_fir_lattice_instance_q31 * S,
3820   q31_t * pSrc,
3821   q31_t * pDst,
3822   uint32_t blockSize);
3823 
3824 /**
3825  * @brief Initialization function for the floating-point FIR lattice filter.
3826  * @param[in] *S points to an instance of the floating-point FIR lattice structure.
3827  * @param[in] numStages  number of filter stages.
3828  * @param[in] *pCoeffs points to the coefficient buffer.  The array is of length numStages.
3829  * @param[in] *pState points to the state buffer.  The array is of length numStages.
3830  * @return none.
3831  */
3832 
3833   void arm_fir_lattice_init_f32(
3834   arm_fir_lattice_instance_f32 * S,
3835   uint16_t numStages,
3836   float32_t * pCoeffs,
3837   float32_t * pState);
3838 
3839   /**
3840    * @brief Processing function for the floating-point FIR lattice filter.
3841    * @param[in]  *S        points to an instance of the floating-point FIR lattice structure.
3842    * @param[in]  *pSrc     points to the block of input data.
3843    * @param[out] *pDst     points to the block of output data
3844    * @param[in]  blockSize number of samples to process.
3845    * @return none.
3846    */
3847 
3848   void arm_fir_lattice_f32(
3849   const arm_fir_lattice_instance_f32 * S,
3850   float32_t * pSrc,
3851   float32_t * pDst,
3852   uint32_t blockSize);
3853 
3854   /**
3855    * @brief Instance structure for the Q15 IIR lattice filter.
3856    */
3857   typedef struct
3858   {
3859     uint16_t numStages;                         /**< number of stages in the filter. */
3860     q15_t *pState;                              /**< points to the state variable array. The array is of length numStages+blockSize. */
3861     q15_t *pkCoeffs;                            /**< points to the reflection coefficient array. The array is of length numStages. */
3862     q15_t *pvCoeffs;                            /**< points to the ladder coefficient array. The array is of length numStages+1. */
3863   } arm_iir_lattice_instance_q15;
3864 
3865   /**
3866    * @brief Instance structure for the Q31 IIR lattice filter.
3867    */
3868   typedef struct
3869   {
3870     uint16_t numStages;                         /**< number of stages in the filter. */
3871     q31_t *pState;                              /**< points to the state variable array. The array is of length numStages+blockSize. */
3872     q31_t *pkCoeffs;                            /**< points to the reflection coefficient array. The array is of length numStages. */
3873     q31_t *pvCoeffs;                            /**< points to the ladder coefficient array. The array is of length numStages+1. */
3874   } arm_iir_lattice_instance_q31;
3875 
3876   /**
3877    * @brief Instance structure for the floating-point IIR lattice filter.
3878    */
3879   typedef struct
3880   {
3881     uint16_t numStages;                         /**< number of stages in the filter. */
3882     float32_t *pState;                          /**< points to the state variable array. The array is of length numStages+blockSize. */
3883     float32_t *pkCoeffs;                        /**< points to the reflection coefficient array. The array is of length numStages. */
3884     float32_t *pvCoeffs;                        /**< points to the ladder coefficient array. The array is of length numStages+1. */
3885   } arm_iir_lattice_instance_f32;
3886 
3887   /**
3888    * @brief Processing function for the floating-point IIR lattice filter.
3889    * @param[in] *S points to an instance of the floating-point IIR lattice structure.
3890    * @param[in] *pSrc points to the block of input data.
3891    * @param[out] *pDst points to the block of output data.
3892    * @param[in] blockSize number of samples to process.
3893    * @return none.
3894    */
3895 
3896   void arm_iir_lattice_f32(
3897   const arm_iir_lattice_instance_f32 * S,
3898   float32_t * pSrc,
3899   float32_t * pDst,
3900   uint32_t blockSize);
3901 
3902   /**
3903    * @brief Initialization function for the floating-point IIR lattice filter.
3904    * @param[in] *S points to an instance of the floating-point IIR lattice structure.
3905    * @param[in] numStages number of stages in the filter.
3906    * @param[in] *pkCoeffs points to the reflection coefficient buffer.  The array is of length numStages.
3907    * @param[in] *pvCoeffs points to the ladder coefficient buffer.  The array is of length numStages+1.
3908    * @param[in] *pState points to the state buffer.  The array is of length numStages+blockSize-1.
3909    * @param[in] blockSize number of samples to process.
3910    * @return none.
3911    */
3912 
3913   void arm_iir_lattice_init_f32(
3914   arm_iir_lattice_instance_f32 * S,
3915   uint16_t numStages,
3916   float32_t * pkCoeffs,
3917   float32_t * pvCoeffs,
3918   float32_t * pState,
3919   uint32_t blockSize);
3920 
3921 
3922   /**
3923    * @brief Processing function for the Q31 IIR lattice filter.
3924    * @param[in] *S points to an instance of the Q31 IIR lattice structure.
3925    * @param[in] *pSrc points to the block of input data.
3926    * @param[out] *pDst points to the block of output data.
3927    * @param[in] blockSize number of samples to process.
3928    * @return none.
3929    */
3930 
3931   void arm_iir_lattice_q31(
3932   const arm_iir_lattice_instance_q31 * S,
3933   q31_t * pSrc,
3934   q31_t * pDst,
3935   uint32_t blockSize);
3936 
3937 
3938   /**
3939    * @brief Initialization function for the Q31 IIR lattice filter.
3940    * @param[in] *S points to an instance of the Q31 IIR lattice structure.
3941    * @param[in] numStages number of stages in the filter.
3942    * @param[in] *pkCoeffs points to the reflection coefficient buffer.  The array is of length numStages.
3943    * @param[in] *pvCoeffs points to the ladder coefficient buffer.  The array is of length numStages+1.
3944    * @param[in] *pState points to the state buffer.  The array is of length numStages+blockSize.
3945    * @param[in] blockSize number of samples to process.
3946    * @return none.
3947    */
3948 
3949   void arm_iir_lattice_init_q31(
3950   arm_iir_lattice_instance_q31 * S,
3951   uint16_t numStages,
3952   q31_t * pkCoeffs,
3953   q31_t * pvCoeffs,
3954   q31_t * pState,
3955   uint32_t blockSize);
3956 
3957 
3958   /**
3959    * @brief Processing function for the Q15 IIR lattice filter.
3960    * @param[in] *S points to an instance of the Q15 IIR lattice structure.
3961    * @param[in] *pSrc points to the block of input data.
3962    * @param[out] *pDst points to the block of output data.
3963    * @param[in] blockSize number of samples to process.
3964    * @return none.
3965    */
3966 
3967   void arm_iir_lattice_q15(
3968   const arm_iir_lattice_instance_q15 * S,
3969   q15_t * pSrc,
3970   q15_t * pDst,
3971   uint32_t blockSize);
3972 
3973 
3974 /**
3975  * @brief Initialization function for the Q15 IIR lattice filter.
3976  * @param[in] *S points to an instance of the fixed-point Q15 IIR lattice structure.
3977  * @param[in] numStages  number of stages in the filter.
3978  * @param[in] *pkCoeffs points to reflection coefficient buffer.  The array is of length numStages.
3979  * @param[in] *pvCoeffs points to ladder coefficient buffer.  The array is of length numStages+1.
3980  * @param[in] *pState points to state buffer.  The array is of length numStages+blockSize.
3981  * @param[in] blockSize number of samples to process per call.
3982  * @return none.
3983  */
3984 
3985   void arm_iir_lattice_init_q15(
3986   arm_iir_lattice_instance_q15 * S,
3987   uint16_t numStages,
3988   q15_t * pkCoeffs,
3989   q15_t * pvCoeffs,
3990   q15_t * pState,
3991   uint32_t blockSize);
3992 
3993   /**
3994    * @brief Instance structure for the floating-point LMS filter.
3995    */
3996 
3997   typedef struct
3998   {
3999     uint16_t numTaps;    /**< number of coefficients in the filter. */
4000     float32_t *pState;   /**< points to the state variable array. The array is of length numTaps+blockSize-1. */
4001     float32_t *pCoeffs;  /**< points to the coefficient array. The array is of length numTaps. */
4002     float32_t mu;        /**< step size that controls filter coefficient updates. */
4003   } arm_lms_instance_f32;
4004 
4005   /**
4006    * @brief Processing function for floating-point LMS filter.
4007    * @param[in]  *S points to an instance of the floating-point LMS filter structure.
4008    * @param[in]  *pSrc points to the block of input data.
4009    * @param[in]  *pRef points to the block of reference data.
4010    * @param[out] *pOut points to the block of output data.
4011    * @param[out] *pErr points to the block of error data.
4012    * @param[in]  blockSize number of samples to process.
4013    * @return     none.
4014    */
4015 
4016   void arm_lms_f32(
4017   const arm_lms_instance_f32 * S,
4018   float32_t * pSrc,
4019   float32_t * pRef,
4020   float32_t * pOut,
4021   float32_t * pErr,
4022   uint32_t blockSize);
4023 
4024   /**
4025    * @brief Initialization function for floating-point LMS filter.
4026    * @param[in] *S points to an instance of the floating-point LMS filter structure.
4027    * @param[in] numTaps  number of filter coefficients.
4028    * @param[in] *pCoeffs points to the coefficient buffer.
4029    * @param[in] *pState points to state buffer.
4030    * @param[in] mu step size that controls filter coefficient updates.
4031    * @param[in] blockSize number of samples to process.
4032    * @return none.
4033    */
4034 
4035   void arm_lms_init_f32(
4036   arm_lms_instance_f32 * S,
4037   uint16_t numTaps,
4038   float32_t * pCoeffs,
4039   float32_t * pState,
4040   float32_t mu,
4041   uint32_t blockSize);
4042 
4043   /**
4044    * @brief Instance structure for the Q15 LMS filter.
4045    */
4046 
4047   typedef struct
4048   {
4049     uint16_t numTaps;    /**< number of coefficients in the filter. */
4050     q15_t *pState;       /**< points to the state variable array. The array is of length numTaps+blockSize-1. */
4051     q15_t *pCoeffs;      /**< points to the coefficient array. The array is of length numTaps. */
4052     q15_t mu;            /**< step size that controls filter coefficient updates. */
4053     uint32_t postShift;  /**< bit shift applied to coefficients. */
4054   } arm_lms_instance_q15;
4055 
4056 
4057   /**
4058    * @brief Initialization function for the Q15 LMS filter.
4059    * @param[in] *S points to an instance of the Q15 LMS filter structure.
4060    * @param[in] numTaps  number of filter coefficients.
4061    * @param[in] *pCoeffs points to the coefficient buffer.
4062    * @param[in] *pState points to the state buffer.
4063    * @param[in] mu step size that controls filter coefficient updates.
4064    * @param[in] blockSize number of samples to process.
4065    * @param[in] postShift bit shift applied to coefficients.
4066    * @return    none.
4067    */
4068 
4069   void arm_lms_init_q15(
4070   arm_lms_instance_q15 * S,
4071   uint16_t numTaps,
4072   q15_t * pCoeffs,
4073   q15_t * pState,
4074   q15_t mu,
4075   uint32_t blockSize,
4076   uint32_t postShift);
4077 
4078   /**
4079    * @brief Processing function for Q15 LMS filter.
4080    * @param[in] *S points to an instance of the Q15 LMS filter structure.
4081    * @param[in] *pSrc points to the block of input data.
4082    * @param[in] *pRef points to the block of reference data.
4083    * @param[out] *pOut points to the block of output data.
4084    * @param[out] *pErr points to the block of error data.
4085    * @param[in] blockSize number of samples to process.
4086    * @return none.
4087    */
4088 
4089   void arm_lms_q15(
4090   const arm_lms_instance_q15 * S,
4091   q15_t * pSrc,
4092   q15_t * pRef,
4093   q15_t * pOut,
4094   q15_t * pErr,
4095   uint32_t blockSize);
4096 
4097 
4098   /**
4099    * @brief Instance structure for the Q31 LMS filter.
4100    */
4101 
4102   typedef struct
4103   {
4104     uint16_t numTaps;    /**< number of coefficients in the filter. */
4105     q31_t *pState;       /**< points to the state variable array. The array is of length numTaps+blockSize-1. */
4106     q31_t *pCoeffs;      /**< points to the coefficient array. The array is of length numTaps. */
4107     q31_t mu;            /**< step size that controls filter coefficient updates. */
4108     uint32_t postShift;  /**< bit shift applied to coefficients. */
4109 
4110   } arm_lms_instance_q31;
4111 
4112   /**
4113    * @brief Processing function for Q31 LMS filter.
4114    * @param[in]  *S points to an instance of the Q15 LMS filter structure.
4115    * @param[in]  *pSrc points to the block of input data.
4116    * @param[in]  *pRef points to the block of reference data.
4117    * @param[out] *pOut points to the block of output data.
4118    * @param[out] *pErr points to the block of error data.
4119    * @param[in]  blockSize number of samples to process.
4120    * @return     none.
4121    */
4122 
4123   void arm_lms_q31(
4124   const arm_lms_instance_q31 * S,
4125   q31_t * pSrc,
4126   q31_t * pRef,
4127   q31_t * pOut,
4128   q31_t * pErr,
4129   uint32_t blockSize);
4130 
4131   /**
4132    * @brief Initialization function for Q31 LMS filter.
4133    * @param[in] *S points to an instance of the Q31 LMS filter structure.
4134    * @param[in] numTaps  number of filter coefficients.
4135    * @param[in] *pCoeffs points to coefficient buffer.
4136    * @param[in] *pState points to state buffer.
4137    * @param[in] mu step size that controls filter coefficient updates.
4138    * @param[in] blockSize number of samples to process.
4139    * @param[in] postShift bit shift applied to coefficients.
4140    * @return none.
4141    */
4142 
4143   void arm_lms_init_q31(
4144   arm_lms_instance_q31 * S,
4145   uint16_t numTaps,
4146   q31_t * pCoeffs,
4147   q31_t * pState,
4148   q31_t mu,
4149   uint32_t blockSize,
4150   uint32_t postShift);
4151 
4152   /**
4153    * @brief Instance structure for the floating-point normalized LMS filter.
4154    */
4155 
4156   typedef struct
4157   {
4158     uint16_t numTaps;     /**< number of coefficients in the filter. */
4159     float32_t *pState;    /**< points to the state variable array. The array is of length numTaps+blockSize-1. */
4160     float32_t *pCoeffs;   /**< points to the coefficient array. The array is of length numTaps. */
4161     float32_t mu;        /**< step size that control filter coefficient updates. */
4162     float32_t energy;    /**< saves previous frame energy. */
4163     float32_t x0;        /**< saves previous input sample. */
4164   } arm_lms_norm_instance_f32;
4165 
4166   /**
4167    * @brief Processing function for floating-point normalized LMS filter.
4168    * @param[in] *S points to an instance of the floating-point normalized LMS filter structure.
4169    * @param[in] *pSrc points to the block of input data.
4170    * @param[in] *pRef points to the block of reference data.
4171    * @param[out] *pOut points to the block of output data.
4172    * @param[out] *pErr points to the block of error data.
4173    * @param[in] blockSize number of samples to process.
4174    * @return none.
4175    */
4176 
4177   void arm_lms_norm_f32(
4178   arm_lms_norm_instance_f32 * S,
4179   float32_t * pSrc,
4180   float32_t * pRef,
4181   float32_t * pOut,
4182   float32_t * pErr,
4183   uint32_t blockSize);
4184 
4185   /**
4186    * @brief Initialization function for floating-point normalized LMS filter.
4187    * @param[in] *S points to an instance of the floating-point LMS filter structure.
4188    * @param[in] numTaps  number of filter coefficients.
4189    * @param[in] *pCoeffs points to coefficient buffer.
4190    * @param[in] *pState points to state buffer.
4191    * @param[in] mu step size that controls filter coefficient updates.
4192    * @param[in] blockSize number of samples to process.
4193    * @return none.
4194    */
4195 
4196   void arm_lms_norm_init_f32(
4197   arm_lms_norm_instance_f32 * S,
4198   uint16_t numTaps,
4199   float32_t * pCoeffs,
4200   float32_t * pState,
4201   float32_t mu,
4202   uint32_t blockSize);
4203 
4204 
4205   /**
4206    * @brief Instance structure for the Q31 normalized LMS filter.
4207    */
4208   typedef struct
4209   {
4210     uint16_t numTaps;     /**< number of coefficients in the filter. */
4211     q31_t *pState;        /**< points to the state variable array. The array is of length numTaps+blockSize-1. */
4212     q31_t *pCoeffs;       /**< points to the coefficient array. The array is of length numTaps. */
4213     q31_t mu;             /**< step size that controls filter coefficient updates. */
4214     uint8_t postShift;    /**< bit shift applied to coefficients. */
4215     q31_t *recipTable;    /**< points to the reciprocal initial value table. */
4216     q31_t energy;         /**< saves previous frame energy. */
4217     q31_t x0;             /**< saves previous input sample. */
4218   } arm_lms_norm_instance_q31;
4219 
4220   /**
4221    * @brief Processing function for Q31 normalized LMS filter.
4222    * @param[in] *S points to an instance of the Q31 normalized LMS filter structure.
4223    * @param[in] *pSrc points to the block of input data.
4224    * @param[in] *pRef points to the block of reference data.
4225    * @param[out] *pOut points to the block of output data.
4226    * @param[out] *pErr points to the block of error data.
4227    * @param[in] blockSize number of samples to process.
4228    * @return none.
4229    */
4230 
4231   void arm_lms_norm_q31(
4232   arm_lms_norm_instance_q31 * S,
4233   q31_t * pSrc,
4234   q31_t * pRef,
4235   q31_t * pOut,
4236   q31_t * pErr,
4237   uint32_t blockSize);
4238 
4239   /**
4240    * @brief Initialization function for Q31 normalized LMS filter.
4241    * @param[in] *S points to an instance of the Q31 normalized LMS filter structure.
4242    * @param[in] numTaps  number of filter coefficients.
4243    * @param[in] *pCoeffs points to coefficient buffer.
4244    * @param[in] *pState points to state buffer.
4245    * @param[in] mu step size that controls filter coefficient updates.
4246    * @param[in] blockSize number of samples to process.
4247    * @param[in] postShift bit shift applied to coefficients.
4248    * @return none.
4249    */
4250 
4251   void arm_lms_norm_init_q31(
4252   arm_lms_norm_instance_q31 * S,
4253   uint16_t numTaps,
4254   q31_t * pCoeffs,
4255   q31_t * pState,
4256   q31_t mu,
4257   uint32_t blockSize,
4258   uint8_t postShift);
4259 
4260   /**
4261    * @brief Instance structure for the Q15 normalized LMS filter.
4262    */
4263 
4264   typedef struct
4265   {
4266     uint16_t numTaps;    /**< Number of coefficients in the filter. */
4267     q15_t *pState;        /**< points to the state variable array. The array is of length numTaps+blockSize-1. */
4268     q15_t *pCoeffs;       /**< points to the coefficient array. The array is of length numTaps. */
4269     q15_t mu;            /**< step size that controls filter coefficient updates. */
4270     uint8_t postShift;   /**< bit shift applied to coefficients. */
4271     q15_t *recipTable;   /**< Points to the reciprocal initial value table. */
4272     q15_t energy;        /**< saves previous frame energy. */
4273     q15_t x0;            /**< saves previous input sample. */
4274   } arm_lms_norm_instance_q15;
4275 
4276   /**
4277    * @brief Processing function for Q15 normalized LMS filter.
4278    * @param[in] *S points to an instance of the Q15 normalized LMS filter structure.
4279    * @param[in] *pSrc points to the block of input data.
4280    * @param[in] *pRef points to the block of reference data.
4281    * @param[out] *pOut points to the block of output data.
4282    * @param[out] *pErr points to the block of error data.
4283    * @param[in] blockSize number of samples to process.
4284    * @return none.
4285    */
4286 
4287   void arm_lms_norm_q15(
4288   arm_lms_norm_instance_q15 * S,
4289   q15_t * pSrc,
4290   q15_t * pRef,
4291   q15_t * pOut,
4292   q15_t * pErr,
4293   uint32_t blockSize);
4294 
4295 
4296   /**
4297    * @brief Initialization function for Q15 normalized LMS filter.
4298    * @param[in] *S points to an instance of the Q15 normalized LMS filter structure.
4299    * @param[in] numTaps  number of filter coefficients.
4300    * @param[in] *pCoeffs points to coefficient buffer.
4301    * @param[in] *pState points to state buffer.
4302    * @param[in] mu step size that controls filter coefficient updates.
4303    * @param[in] blockSize number of samples to process.
4304    * @param[in] postShift bit shift applied to coefficients.
4305    * @return none.
4306    */
4307 
4308   void arm_lms_norm_init_q15(
4309   arm_lms_norm_instance_q15 * S,
4310   uint16_t numTaps,
4311   q15_t * pCoeffs,
4312   q15_t * pState,
4313   q15_t mu,
4314   uint32_t blockSize,
4315   uint8_t postShift);
4316 
4317   /**
4318    * @brief Correlation of floating-point sequences.
4319    * @param[in] *pSrcA points to the first input sequence.
4320    * @param[in] srcALen length of the first input sequence.
4321    * @param[in] *pSrcB points to the second input sequence.
4322    * @param[in] srcBLen length of the second input sequence.
4323    * @param[out] *pDst points to the block of output data  Length 2 * max(srcALen, srcBLen) - 1.
4324    * @return none.
4325    */
4326 
4327   void arm_correlate_f32(
4328   float32_t * pSrcA,
4329   uint32_t srcALen,
4330   float32_t * pSrcB,
4331   uint32_t srcBLen,
4332   float32_t * pDst);
4333 
4334 
4335    /**
4336    * @brief Correlation of Q15 sequences
4337    * @param[in] *pSrcA points to the first input sequence.
4338    * @param[in] srcALen length of the first input sequence.
4339    * @param[in] *pSrcB points to the second input sequence.
4340    * @param[in] srcBLen length of the second input sequence.
4341    * @param[out] *pDst points to the block of output data  Length 2 * max(srcALen, srcBLen) - 1.
4342    * @param[in]  *pScratch points to scratch buffer of size max(srcALen, srcBLen) + 2*min(srcALen, srcBLen) - 2.
4343    * @return none.
4344    */
4345   void arm_correlate_opt_q15(
4346   q15_t * pSrcA,
4347   uint32_t srcALen,
4348   q15_t * pSrcB,
4349   uint32_t srcBLen,
4350   q15_t * pDst,
4351   q15_t * pScratch);
4352 
4353 
4354   /**
4355    * @brief Correlation of Q15 sequences.
4356    * @param[in] *pSrcA points to the first input sequence.
4357    * @param[in] srcALen length of the first input sequence.
4358    * @param[in] *pSrcB points to the second input sequence.
4359    * @param[in] srcBLen length of the second input sequence.
4360    * @param[out] *pDst points to the block of output data  Length 2 * max(srcALen, srcBLen) - 1.
4361    * @return none.
4362    */
4363 
4364   void arm_correlate_q15(
4365   q15_t * pSrcA,
4366   uint32_t srcALen,
4367   q15_t * pSrcB,
4368   uint32_t srcBLen,
4369   q15_t * pDst);
4370 
4371   /**
4372    * @brief Correlation of Q15 sequences (fast version) for Cortex-M3 and Cortex-M4.
4373    * @param[in] *pSrcA points to the first input sequence.
4374    * @param[in] srcALen length of the first input sequence.
4375    * @param[in] *pSrcB points to the second input sequence.
4376    * @param[in] srcBLen length of the second input sequence.
4377    * @param[out] *pDst points to the block of output data  Length 2 * max(srcALen, srcBLen) - 1.
4378    * @return none.
4379    */
4380 
4381   void arm_correlate_fast_q15(
4382 			       q15_t * pSrcA,
4383 			      uint32_t srcALen,
4384 			       q15_t * pSrcB,
4385 			      uint32_t srcBLen,
4386 			      q15_t * pDst);
4387 
4388 
4389 
4390   /**
4391    * @brief Correlation of Q15 sequences (fast version) for Cortex-M3 and Cortex-M4.
4392    * @param[in] *pSrcA points to the first input sequence.
4393    * @param[in] srcALen length of the first input sequence.
4394    * @param[in] *pSrcB points to the second input sequence.
4395    * @param[in] srcBLen length of the second input sequence.
4396    * @param[out] *pDst points to the block of output data  Length 2 * max(srcALen, srcBLen) - 1.
4397    * @param[in]  *pScratch points to scratch buffer of size max(srcALen, srcBLen) + 2*min(srcALen, srcBLen) - 2.
4398    * @return none.
4399    */
4400 
4401   void arm_correlate_fast_opt_q15(
4402   q15_t * pSrcA,
4403   uint32_t srcALen,
4404   q15_t * pSrcB,
4405   uint32_t srcBLen,
4406   q15_t * pDst,
4407   q15_t * pScratch);
4408 
4409   /**
4410    * @brief Correlation of Q31 sequences.
4411    * @param[in] *pSrcA points to the first input sequence.
4412    * @param[in] srcALen length of the first input sequence.
4413    * @param[in] *pSrcB points to the second input sequence.
4414    * @param[in] srcBLen length of the second input sequence.
4415    * @param[out] *pDst points to the block of output data  Length 2 * max(srcALen, srcBLen) - 1.
4416    * @return none.
4417    */
4418 
4419   void arm_correlate_q31(
4420   q31_t * pSrcA,
4421   uint32_t srcALen,
4422   q31_t * pSrcB,
4423   uint32_t srcBLen,
4424   q31_t * pDst);
4425 
4426   /**
4427    * @brief Correlation of Q31 sequences (fast version) for Cortex-M3 and Cortex-M4
4428    * @param[in] *pSrcA points to the first input sequence.
4429    * @param[in] srcALen length of the first input sequence.
4430    * @param[in] *pSrcB points to the second input sequence.
4431    * @param[in] srcBLen length of the second input sequence.
4432    * @param[out] *pDst points to the block of output data  Length 2 * max(srcALen, srcBLen) - 1.
4433    * @return none.
4434    */
4435 
4436   void arm_correlate_fast_q31(
4437   q31_t * pSrcA,
4438   uint32_t srcALen,
4439   q31_t * pSrcB,
4440   uint32_t srcBLen,
4441   q31_t * pDst);
4442 
4443 
4444 
4445  /**
4446    * @brief Correlation of Q7 sequences.
4447    * @param[in] *pSrcA points to the first input sequence.
4448    * @param[in] srcALen length of the first input sequence.
4449    * @param[in] *pSrcB points to the second input sequence.
4450    * @param[in] srcBLen length of the second input sequence.
4451    * @param[out] *pDst points to the block of output data  Length 2 * max(srcALen, srcBLen) - 1.
4452    * @param[in]  *pScratch1 points to scratch buffer(of type q15_t) of size max(srcALen, srcBLen) + 2*min(srcALen, srcBLen) - 2.
4453    * @param[in]  *pScratch2 points to scratch buffer (of type q15_t) of size min(srcALen, srcBLen).
4454    * @return none.
4455    */
4456 
4457   void arm_correlate_opt_q7(
4458   q7_t * pSrcA,
4459   uint32_t srcALen,
4460   q7_t * pSrcB,
4461   uint32_t srcBLen,
4462   q7_t * pDst,
4463   q15_t * pScratch1,
4464   q15_t * pScratch2);
4465 
4466 
4467   /**
4468    * @brief Correlation of Q7 sequences.
4469    * @param[in] *pSrcA points to the first input sequence.
4470    * @param[in] srcALen length of the first input sequence.
4471    * @param[in] *pSrcB points to the second input sequence.
4472    * @param[in] srcBLen length of the second input sequence.
4473    * @param[out] *pDst points to the block of output data  Length 2 * max(srcALen, srcBLen) - 1.
4474    * @return none.
4475    */
4476 
4477   void arm_correlate_q7(
4478   q7_t * pSrcA,
4479   uint32_t srcALen,
4480   q7_t * pSrcB,
4481   uint32_t srcBLen,
4482   q7_t * pDst);
4483 
4484 
4485   /**
4486    * @brief Instance structure for the floating-point sparse FIR filter.
4487    */
4488   typedef struct
4489   {
4490     uint16_t numTaps;             /**< number of coefficients in the filter. */
4491     uint16_t stateIndex;          /**< state buffer index.  Points to the oldest sample in the state buffer. */
4492     float32_t *pState;            /**< points to the state buffer array. The array is of length maxDelay+blockSize-1. */
4493     float32_t *pCoeffs;           /**< points to the coefficient array. The array is of length numTaps.*/
4494     uint16_t maxDelay;            /**< maximum offset specified by the pTapDelay array. */
4495     int32_t *pTapDelay;           /**< points to the array of delay values.  The array is of length numTaps. */
4496   } arm_fir_sparse_instance_f32;
4497 
4498   /**
4499    * @brief Instance structure for the Q31 sparse FIR filter.
4500    */
4501 
4502   typedef struct
4503   {
4504     uint16_t numTaps;             /**< number of coefficients in the filter. */
4505     uint16_t stateIndex;          /**< state buffer index.  Points to the oldest sample in the state buffer. */
4506     q31_t *pState;                /**< points to the state buffer array. The array is of length maxDelay+blockSize-1. */
4507     q31_t *pCoeffs;               /**< points to the coefficient array. The array is of length numTaps.*/
4508     uint16_t maxDelay;            /**< maximum offset specified by the pTapDelay array. */
4509     int32_t *pTapDelay;           /**< points to the array of delay values.  The array is of length numTaps. */
4510   } arm_fir_sparse_instance_q31;
4511 
4512   /**
4513    * @brief Instance structure for the Q15 sparse FIR filter.
4514    */
4515 
4516   typedef struct
4517   {
4518     uint16_t numTaps;             /**< number of coefficients in the filter. */
4519     uint16_t stateIndex;          /**< state buffer index.  Points to the oldest sample in the state buffer. */
4520     q15_t *pState;                /**< points to the state buffer array. The array is of length maxDelay+blockSize-1. */
4521     q15_t *pCoeffs;               /**< points to the coefficient array. The array is of length numTaps.*/
4522     uint16_t maxDelay;            /**< maximum offset specified by the pTapDelay array. */
4523     int32_t *pTapDelay;           /**< points to the array of delay values.  The array is of length numTaps. */
4524   } arm_fir_sparse_instance_q15;
4525 
4526   /**
4527    * @brief Instance structure for the Q7 sparse FIR filter.
4528    */
4529 
4530   typedef struct
4531   {
4532     uint16_t numTaps;             /**< number of coefficients in the filter. */
4533     uint16_t stateIndex;          /**< state buffer index.  Points to the oldest sample in the state buffer. */
4534     q7_t *pState;                 /**< points to the state buffer array. The array is of length maxDelay+blockSize-1. */
4535     q7_t *pCoeffs;                /**< points to the coefficient array. The array is of length numTaps.*/
4536     uint16_t maxDelay;            /**< maximum offset specified by the pTapDelay array. */
4537     int32_t *pTapDelay;           /**< points to the array of delay values.  The array is of length numTaps. */
4538   } arm_fir_sparse_instance_q7;
4539 
4540   /**
4541    * @brief Processing function for the floating-point sparse FIR filter.
4542    * @param[in]  *S          points to an instance of the floating-point sparse FIR structure.
4543    * @param[in]  *pSrc       points to the block of input data.
4544    * @param[out] *pDst       points to the block of output data
4545    * @param[in]  *pScratchIn points to a temporary buffer of size blockSize.
4546    * @param[in]  blockSize   number of input samples to process per call.
4547    * @return none.
4548    */
4549 
4550   void arm_fir_sparse_f32(
4551   arm_fir_sparse_instance_f32 * S,
4552   float32_t * pSrc,
4553   float32_t * pDst,
4554   float32_t * pScratchIn,
4555   uint32_t blockSize);
4556 
4557   /**
4558    * @brief  Initialization function for the floating-point sparse FIR filter.
4559    * @param[in,out] *S         points to an instance of the floating-point sparse FIR structure.
4560    * @param[in]     numTaps    number of nonzero coefficients in the filter.
4561    * @param[in]     *pCoeffs   points to the array of filter coefficients.
4562    * @param[in]     *pState    points to the state buffer.
4563    * @param[in]     *pTapDelay points to the array of offset times.
4564    * @param[in]     maxDelay   maximum offset time supported.
4565    * @param[in]     blockSize  number of samples that will be processed per block.
4566    * @return none
4567    */
4568 
4569   void arm_fir_sparse_init_f32(
4570   arm_fir_sparse_instance_f32 * S,
4571   uint16_t numTaps,
4572   float32_t * pCoeffs,
4573   float32_t * pState,
4574   int32_t * pTapDelay,
4575   uint16_t maxDelay,
4576   uint32_t blockSize);
4577 
4578   /**
4579    * @brief Processing function for the Q31 sparse FIR filter.
4580    * @param[in]  *S          points to an instance of the Q31 sparse FIR structure.
4581    * @param[in]  *pSrc       points to the block of input data.
4582    * @param[out] *pDst       points to the block of output data
4583    * @param[in]  *pScratchIn points to a temporary buffer of size blockSize.
4584    * @param[in]  blockSize   number of input samples to process per call.
4585    * @return none.
4586    */
4587 
4588   void arm_fir_sparse_q31(
4589   arm_fir_sparse_instance_q31 * S,
4590   q31_t * pSrc,
4591   q31_t * pDst,
4592   q31_t * pScratchIn,
4593   uint32_t blockSize);
4594 
4595   /**
4596    * @brief  Initialization function for the Q31 sparse FIR filter.
4597    * @param[in,out] *S         points to an instance of the Q31 sparse FIR structure.
4598    * @param[in]     numTaps    number of nonzero coefficients in the filter.
4599    * @param[in]     *pCoeffs   points to the array of filter coefficients.
4600    * @param[in]     *pState    points to the state buffer.
4601    * @param[in]     *pTapDelay points to the array of offset times.
4602    * @param[in]     maxDelay   maximum offset time supported.
4603    * @param[in]     blockSize  number of samples that will be processed per block.
4604    * @return none
4605    */
4606 
4607   void arm_fir_sparse_init_q31(
4608   arm_fir_sparse_instance_q31 * S,
4609   uint16_t numTaps,
4610   q31_t * pCoeffs,
4611   q31_t * pState,
4612   int32_t * pTapDelay,
4613   uint16_t maxDelay,
4614   uint32_t blockSize);
4615 
4616   /**
4617    * @brief Processing function for the Q15 sparse FIR filter.
4618    * @param[in]  *S           points to an instance of the Q15 sparse FIR structure.
4619    * @param[in]  *pSrc        points to the block of input data.
4620    * @param[out] *pDst        points to the block of output data
4621    * @param[in]  *pScratchIn  points to a temporary buffer of size blockSize.
4622    * @param[in]  *pScratchOut points to a temporary buffer of size blockSize.
4623    * @param[in]  blockSize    number of input samples to process per call.
4624    * @return none.
4625    */
4626 
4627   void arm_fir_sparse_q15(
4628   arm_fir_sparse_instance_q15 * S,
4629   q15_t * pSrc,
4630   q15_t * pDst,
4631   q15_t * pScratchIn,
4632   q31_t * pScratchOut,
4633   uint32_t blockSize);
4634 
4635 
4636   /**
4637    * @brief  Initialization function for the Q15 sparse FIR filter.
4638    * @param[in,out] *S         points to an instance of the Q15 sparse FIR structure.
4639    * @param[in]     numTaps    number of nonzero coefficients in the filter.
4640    * @param[in]     *pCoeffs   points to the array of filter coefficients.
4641    * @param[in]     *pState    points to the state buffer.
4642    * @param[in]     *pTapDelay points to the array of offset times.
4643    * @param[in]     maxDelay   maximum offset time supported.
4644    * @param[in]     blockSize  number of samples that will be processed per block.
4645    * @return none
4646    */
4647 
4648   void arm_fir_sparse_init_q15(
4649   arm_fir_sparse_instance_q15 * S,
4650   uint16_t numTaps,
4651   q15_t * pCoeffs,
4652   q15_t * pState,
4653   int32_t * pTapDelay,
4654   uint16_t maxDelay,
4655   uint32_t blockSize);
4656 
4657   /**
4658    * @brief Processing function for the Q7 sparse FIR filter.
4659    * @param[in]  *S           points to an instance of the Q7 sparse FIR structure.
4660    * @param[in]  *pSrc        points to the block of input data.
4661    * @param[out] *pDst        points to the block of output data
4662    * @param[in]  *pScratchIn  points to a temporary buffer of size blockSize.
4663    * @param[in]  *pScratchOut points to a temporary buffer of size blockSize.
4664    * @param[in]  blockSize    number of input samples to process per call.
4665    * @return none.
4666    */
4667 
4668   void arm_fir_sparse_q7(
4669   arm_fir_sparse_instance_q7 * S,
4670   q7_t * pSrc,
4671   q7_t * pDst,
4672   q7_t * pScratchIn,
4673   q31_t * pScratchOut,
4674   uint32_t blockSize);
4675 
4676   /**
4677    * @brief  Initialization function for the Q7 sparse FIR filter.
4678    * @param[in,out] *S         points to an instance of the Q7 sparse FIR structure.
4679    * @param[in]     numTaps    number of nonzero coefficients in the filter.
4680    * @param[in]     *pCoeffs   points to the array of filter coefficients.
4681    * @param[in]     *pState    points to the state buffer.
4682    * @param[in]     *pTapDelay points to the array of offset times.
4683    * @param[in]     maxDelay   maximum offset time supported.
4684    * @param[in]     blockSize  number of samples that will be processed per block.
4685    * @return none
4686    */
4687 
4688   void arm_fir_sparse_init_q7(
4689   arm_fir_sparse_instance_q7 * S,
4690   uint16_t numTaps,
4691   q7_t * pCoeffs,
4692   q7_t * pState,
4693   int32_t * pTapDelay,
4694   uint16_t maxDelay,
4695   uint32_t blockSize);
4696 
4697 
4698   /*
4699    * @brief  Floating-point sin_cos function.
4700    * @param[in]  theta    input value in degrees
4701    * @param[out] *pSinVal points to the processed sine output.
4702    * @param[out] *pCosVal points to the processed cos output.
4703    * @return none.
4704    */
4705 
4706   void arm_sin_cos_f32(
4707   float32_t theta,
4708   float32_t * pSinVal,
4709   float32_t * pCcosVal);
4710 
4711   /*
4712    * @brief  Q31 sin_cos function.
4713    * @param[in]  theta    scaled input value in degrees
4714    * @param[out] *pSinVal points to the processed sine output.
4715    * @param[out] *pCosVal points to the processed cosine output.
4716    * @return none.
4717    */
4718 
4719   void arm_sin_cos_q31(
4720   q31_t theta,
4721   q31_t * pSinVal,
4722   q31_t * pCosVal);
4723 
4724 
4725   /**
4726    * @brief  Floating-point complex conjugate.
4727    * @param[in]  *pSrc points to the input vector
4728    * @param[out]  *pDst points to the output vector
4729    * @param[in]  numSamples number of complex samples in each vector
4730    * @return none.
4731    */
4732 
4733   void arm_cmplx_conj_f32(
4734   float32_t * pSrc,
4735   float32_t * pDst,
4736   uint32_t numSamples);
4737 
4738   /**
4739    * @brief  Q31 complex conjugate.
4740    * @param[in]  *pSrc points to the input vector
4741    * @param[out]  *pDst points to the output vector
4742    * @param[in]  numSamples number of complex samples in each vector
4743    * @return none.
4744    */
4745 
4746   void arm_cmplx_conj_q31(
4747   q31_t * pSrc,
4748   q31_t * pDst,
4749   uint32_t numSamples);
4750 
4751   /**
4752    * @brief  Q15 complex conjugate.
4753    * @param[in]  *pSrc points to the input vector
4754    * @param[out]  *pDst points to the output vector
4755    * @param[in]  numSamples number of complex samples in each vector
4756    * @return none.
4757    */
4758 
4759   void arm_cmplx_conj_q15(
4760   q15_t * pSrc,
4761   q15_t * pDst,
4762   uint32_t numSamples);
4763 
4764 
4765 
4766   /**
4767    * @brief  Floating-point complex magnitude squared
4768    * @param[in]  *pSrc points to the complex input vector
4769    * @param[out]  *pDst points to the real output vector
4770    * @param[in]  numSamples number of complex samples in the input vector
4771    * @return none.
4772    */
4773 
4774   void arm_cmplx_mag_squared_f32(
4775   float32_t * pSrc,
4776   float32_t * pDst,
4777   uint32_t numSamples);
4778 
4779   /**
4780    * @brief  Q31 complex magnitude squared
4781    * @param[in]  *pSrc points to the complex input vector
4782    * @param[out]  *pDst points to the real output vector
4783    * @param[in]  numSamples number of complex samples in the input vector
4784    * @return none.
4785    */
4786 
4787   void arm_cmplx_mag_squared_q31(
4788   q31_t * pSrc,
4789   q31_t * pDst,
4790   uint32_t numSamples);
4791 
4792   /**
4793    * @brief  Q15 complex magnitude squared
4794    * @param[in]  *pSrc points to the complex input vector
4795    * @param[out]  *pDst points to the real output vector
4796    * @param[in]  numSamples number of complex samples in the input vector
4797    * @return none.
4798    */
4799 
4800   void arm_cmplx_mag_squared_q15(
4801   q15_t * pSrc,
4802   q15_t * pDst,
4803   uint32_t numSamples);
4804 
4805 
4806  /**
4807    * @ingroup groupController
4808    */
4809 
4810   /**
4811    * @defgroup PID PID Motor Control
4812    *
4813    * A Proportional Integral Derivative (PID) controller is a generic feedback control
4814    * loop mechanism widely used in industrial control systems.
4815    * A PID controller is the most commonly used type of feedback controller.
4816    *
4817    * This set of functions implements (PID) controllers
4818    * for Q15, Q31, and floating-point data types.  The functions operate on a single sample
4819    * of data and each call to the function returns a single processed value.
4820    * <code>S</code> points to an instance of the PID control data structure.  <code>in</code>
4821    * is the input sample value. The functions return the output value.
4822    *
4823    * \par Algorithm:
4824    * <pre>
4825    *    y[n] = y[n-1] + A0 * x[n] + A1 * x[n-1] + A2 * x[n-2]
4826    *    A0 = Kp + Ki + Kd
4827    *    A1 = (-Kp ) - (2 * Kd )
4828    *    A2 = Kd  </pre>
4829    *
4830    * \par
4831    * where \c Kp is proportional constant, \c Ki is Integral constant and \c Kd is Derivative constant
4832    *
4833    * \par
4834    * \image html PID.gif "Proportional Integral Derivative Controller"
4835    *
4836    * \par
4837    * The PID controller calculates an "error" value as the difference between
4838    * the measured output and the reference input.
4839    * The controller attempts to minimize the error by adjusting the process control inputs.
4840    * The proportional value determines the reaction to the current error,
4841    * the integral value determines the reaction based on the sum of recent errors,
4842    * and the derivative value determines the reaction based on the rate at which the error has been changing.
4843    *
4844    * \par Instance Structure
4845    * The Gains A0, A1, A2 and state variables for a PID controller are stored together in an instance data structure.
4846    * A separate instance structure must be defined for each PID Controller.
4847    * There are separate instance structure declarations for each of the 3 supported data types.
4848    *
4849    * \par Reset Functions
4850    * There is also an associated reset function for each data type which clears the state array.
4851    *
4852    * \par Initialization Functions
4853    * There is also an associated initialization function for each data type.
4854    * The initialization function performs the following operations:
4855    * - Initializes the Gains A0, A1, A2 from Kp,Ki, Kd gains.
4856    * - Zeros out the values in the state buffer.
4857    *
4858    * \par
4859    * Instance structure cannot be placed into a const data section and it is recommended to use the initialization function.
4860    *
4861    * \par Fixed-Point Behavior
4862    * Care must be taken when using the fixed-point versions of the PID Controller functions.
4863    * In particular, the overflow and saturation behavior of the accumulator used in each function must be considered.
4864    * Refer to the function specific documentation below for usage guidelines.
4865    */
4866 
4867   /**
4868    * @addtogroup PID
4869    * @{
4870    */
4871 
4872   /**
4873    * @brief  Process function for the floating-point PID Control.
4874    * @param[in,out] *S is an instance of the floating-point PID Control structure
4875    * @param[in] in input sample to process
4876    * @return out processed output sample.
4877    */
4878 
4879 
4880   static __INLINE float32_t arm_pid_f32(
4881   arm_pid_instance_f32 * S,
4882   float32_t in)
4883   {
4884     float32_t out;
4885 
4886     /* y[n] = y[n-1] + A0 * x[n] + A1 * x[n-1] + A2 * x[n-2]  */
4887     out = (S->A0 * in) +
4888       (S->A1 * S->state[0]) + (S->A2 * S->state[1]) + (S->state[2]);
4889 
4890     /* Update state */
4891     S->state[1] = S->state[0];
4892     S->state[0] = in;
4893     S->state[2] = out;
4894 
4895     /* return to application */
4896     return (out);
4897 
4898   }
4899 
4900   /**
4901    * @brief  Process function for the Q31 PID Control.
4902    * @param[in,out] *S points to an instance of the Q31 PID Control structure
4903    * @param[in] in input sample to process
4904    * @return out processed output sample.
4905    *
4906    * <b>Scaling and Overflow Behavior:</b>
4907    * \par
4908    * The function is implemented using an internal 64-bit accumulator.
4909    * The accumulator has a 2.62 format and maintains full precision of the intermediate multiplication results but provides only a single guard bit.
4910    * Thus, if the accumulator result overflows it wraps around rather than clip.
4911    * In order to avoid overflows completely the input signal must be scaled down by 2 bits as there are four additions.
4912    * After all multiply-accumulates are performed, the 2.62 accumulator is truncated to 1.32 format and then saturated to 1.31 format.
4913    */
4914 
4915   static __INLINE q31_t arm_pid_q31(
4916   arm_pid_instance_q31 * S,
4917   q31_t in)
4918   {
4919     q63_t acc;
4920     q31_t out;
4921 
4922     /* acc = A0 * x[n]  */
4923     acc = (q63_t) S->A0 * in;
4924 
4925     /* acc += A1 * x[n-1] */
4926     acc += (q63_t) S->A1 * S->state[0];
4927 
4928     /* acc += A2 * x[n-2]  */
4929     acc += (q63_t) S->A2 * S->state[1];
4930 
4931     /* convert output to 1.31 format to add y[n-1] */
4932     out = (q31_t) (acc >> 31u);
4933 
4934     /* out += y[n-1] */
4935     out += S->state[2];
4936 
4937     /* Update state */
4938     S->state[1] = S->state[0];
4939     S->state[0] = in;
4940     S->state[2] = out;
4941 
4942     /* return to application */
4943     return (out);
4944 
4945   }
4946 
4947   /**
4948    * @brief  Process function for the Q15 PID Control.
4949    * @param[in,out] *S points to an instance of the Q15 PID Control structure
4950    * @param[in] in input sample to process
4951    * @return out processed output sample.
4952    *
4953    * <b>Scaling and Overflow Behavior:</b>
4954    * \par
4955    * The function is implemented using a 64-bit internal accumulator.
4956    * Both Gains and state variables are represented in 1.15 format and multiplications yield a 2.30 result.
4957    * The 2.30 intermediate results are accumulated in a 64-bit accumulator in 34.30 format.
4958    * There is no risk of internal overflow with this approach and the full precision of intermediate multiplications is preserved.
4959    * After all additions have been performed, the accumulator is truncated to 34.15 format by discarding low 15 bits.
4960    * Lastly, the accumulator is saturated to yield a result in 1.15 format.
4961    */
4962 
4963   static __INLINE q15_t arm_pid_q15(
4964   arm_pid_instance_q15 * S,
4965   q15_t in)
4966   {
4967     q63_t acc;
4968     q15_t out;
4969 
4970 #ifndef ARM_MATH_CM0_FAMILY
4971     __SIMD32_TYPE *vstate;
4972 
4973     /* Implementation of PID controller */
4974 
4975     /* acc = A0 * x[n]  */
4976     acc = (q31_t) __SMUAD(S->A0, in);
4977 
4978     /* acc += A1 * x[n-1] + A2 * x[n-2]  */
4979     vstate = __SIMD32_CONST(S->state);
4980     acc = __SMLALD(S->A1, (q31_t) *vstate, acc);
4981 
4982 #else
4983     /* acc = A0 * x[n]  */
4984     acc = ((q31_t) S->A0) * in;
4985 
4986     /* acc += A1 * x[n-1] + A2 * x[n-2]  */
4987     acc += (q31_t) S->A1 * S->state[0];
4988     acc += (q31_t) S->A2 * S->state[1];
4989 
4990 #endif
4991 
4992     /* acc += y[n-1] */
4993     acc += (q31_t) S->state[2] << 15;
4994 
4995     /* saturate the output */
4996     out = (q15_t) (__SSAT((acc >> 15), 16));
4997 
4998     /* Update state */
4999     S->state[1] = S->state[0];
5000     S->state[0] = in;
5001     S->state[2] = out;
5002 
5003     /* return to application */
5004     return (out);
5005 
5006   }
5007 
5008   /**
5009    * @} end of PID group
5010    */
5011 
5012 
5013   /**
5014    * @brief Floating-point matrix inverse.
5015    * @param[in]  *src points to the instance of the input floating-point matrix structure.
5016    * @param[out] *dst points to the instance of the output floating-point matrix structure.
5017    * @return The function returns ARM_MATH_SIZE_MISMATCH, if the dimensions do not match.
5018    * If the input matrix is singular (does not have an inverse), then the algorithm terminates and returns error status ARM_MATH_SINGULAR.
5019    */
5020 
5021   arm_status arm_mat_inverse_f32(
5022   const arm_matrix_instance_f32 * src,
5023   arm_matrix_instance_f32 * dst);
5024 
5025 
5026 
5027   /**
5028    * @ingroup groupController
5029    */
5030 
5031 
5032   /**
5033    * @defgroup clarke Vector Clarke Transform
5034    * Forward Clarke transform converts the instantaneous stator phases into a two-coordinate time invariant vector.
5035    * Generally the Clarke transform uses three-phase currents <code>Ia, Ib and Ic</code> to calculate currents
5036    * in the two-phase orthogonal stator axis <code>Ialpha</code> and <code>Ibeta</code>.
5037    * When <code>Ialpha</code> is superposed with <code>Ia</code> as shown in the figure below
5038    * \image html clarke.gif Stator current space vector and its components in (a,b).
5039    * and <code>Ia + Ib + Ic = 0</code>, in this condition <code>Ialpha</code> and <code>Ibeta</code>
5040    * can be calculated using only <code>Ia</code> and <code>Ib</code>.
5041    *
5042    * The function operates on a single sample of data and each call to the function returns the processed output.
5043    * The library provides separate functions for Q31 and floating-point data types.
5044    * \par Algorithm
5045    * \image html clarkeFormula.gif
5046    * where <code>Ia</code> and <code>Ib</code> are the instantaneous stator phases and
5047    * <code>pIalpha</code> and <code>pIbeta</code> are the two coordinates of time invariant vector.
5048    * \par Fixed-Point Behavior
5049    * Care must be taken when using the Q31 version of the Clarke transform.
5050    * In particular, the overflow and saturation behavior of the accumulator used must be considered.
5051    * Refer to the function specific documentation below for usage guidelines.
5052    */
5053 
5054   /**
5055    * @addtogroup clarke
5056    * @{
5057    */
5058 
5059   /**
5060    *
5061    * @brief  Floating-point Clarke transform
5062    * @param[in]       Ia       input three-phase coordinate <code>a</code>
5063    * @param[in]       Ib       input three-phase coordinate <code>b</code>
5064    * @param[out]      *pIalpha points to output two-phase orthogonal vector axis alpha
5065    * @param[out]      *pIbeta  points to output two-phase orthogonal vector axis beta
5066    * @return none.
5067    */
5068 
5069   static __INLINE void arm_clarke_f32(
5070   float32_t Ia,
5071   float32_t Ib,
5072   float32_t * pIalpha,
5073   float32_t * pIbeta)
5074   {
5075     /* Calculate pIalpha using the equation, pIalpha = Ia */
5076     *pIalpha = Ia;
5077 
5078     /* Calculate pIbeta using the equation, pIbeta = (1/sqrt(3)) * Ia + (2/sqrt(3)) * Ib */
5079     *pIbeta =
5080       ((float32_t) 0.57735026919 * Ia + (float32_t) 1.15470053838 * Ib);
5081 
5082   }
5083 
5084   /**
5085    * @brief  Clarke transform for Q31 version
5086    * @param[in]       Ia       input three-phase coordinate <code>a</code>
5087    * @param[in]       Ib       input three-phase coordinate <code>b</code>
5088    * @param[out]      *pIalpha points to output two-phase orthogonal vector axis alpha
5089    * @param[out]      *pIbeta  points to output two-phase orthogonal vector axis beta
5090    * @return none.
5091    *
5092    * <b>Scaling and Overflow Behavior:</b>
5093    * \par
5094    * The function is implemented using an internal 32-bit accumulator.
5095    * The accumulator maintains 1.31 format by truncating lower 31 bits of the intermediate multiplication in 2.62 format.
5096    * There is saturation on the addition, hence there is no risk of overflow.
5097    */
5098 
5099   static __INLINE void arm_clarke_q31(
5100   q31_t Ia,
5101   q31_t Ib,
5102   q31_t * pIalpha,
5103   q31_t * pIbeta)
5104   {
5105     q31_t product1, product2;                    /* Temporary variables used to store intermediate results */
5106 
5107     /* Calculating pIalpha from Ia by equation pIalpha = Ia */
5108     *pIalpha = Ia;
5109 
5110     /* Intermediate product is calculated by (1/(sqrt(3)) * Ia) */
5111     product1 = (q31_t) (((q63_t) Ia * 0x24F34E8B) >> 30);
5112 
5113     /* Intermediate product is calculated by (2/sqrt(3) * Ib) */
5114     product2 = (q31_t) (((q63_t) Ib * 0x49E69D16) >> 30);
5115 
5116     /* pIbeta is calculated by adding the intermediate products */
5117     *pIbeta = __QADD(product1, product2);
5118   }
5119 
5120   /**
5121    * @} end of clarke group
5122    */
5123 
5124   /**
5125    * @brief  Converts the elements of the Q7 vector to Q31 vector.
5126    * @param[in]  *pSrc     input pointer
5127    * @param[out]  *pDst    output pointer
5128    * @param[in]  blockSize number of samples to process
5129    * @return none.
5130    */
5131   void arm_q7_to_q31(
5132   q7_t * pSrc,
5133   q31_t * pDst,
5134   uint32_t blockSize);
5135 
5136 
5137 
5138 
5139   /**
5140    * @ingroup groupController
5141    */
5142 
5143   /**
5144    * @defgroup inv_clarke Vector Inverse Clarke Transform
5145    * Inverse Clarke transform converts the two-coordinate time invariant vector into instantaneous stator phases.
5146    *
5147    * The function operates on a single sample of data and each call to the function returns the processed output.
5148    * The library provides separate functions for Q31 and floating-point data types.
5149    * \par Algorithm
5150    * \image html clarkeInvFormula.gif
5151    * where <code>pIa</code> and <code>pIb</code> are the instantaneous stator phases and
5152    * <code>Ialpha</code> and <code>Ibeta</code> are the two coordinates of time invariant vector.
5153    * \par Fixed-Point Behavior
5154    * Care must be taken when using the Q31 version of the Clarke transform.
5155    * In particular, the overflow and saturation behavior of the accumulator used must be considered.
5156    * Refer to the function specific documentation below for usage guidelines.
5157    */
5158 
5159   /**
5160    * @addtogroup inv_clarke
5161    * @{
5162    */
5163 
5164    /**
5165    * @brief  Floating-point Inverse Clarke transform
5166    * @param[in]       Ialpha  input two-phase orthogonal vector axis alpha
5167    * @param[in]       Ibeta   input two-phase orthogonal vector axis beta
5168    * @param[out]      *pIa    points to output three-phase coordinate <code>a</code>
5169    * @param[out]      *pIb    points to output three-phase coordinate <code>b</code>
5170    * @return none.
5171    */
5172 
5173 
5174   static __INLINE void arm_inv_clarke_f32(
5175   float32_t Ialpha,
5176   float32_t Ibeta,
5177   float32_t * pIa,
5178   float32_t * pIb)
5179   {
5180     /* Calculating pIa from Ialpha by equation pIa = Ialpha */
5181     *pIa = Ialpha;
5182 
5183     /* Calculating pIb from Ialpha and Ibeta by equation pIb = -(1/2) * Ialpha + (sqrt(3)/2) * Ibeta */
5184     *pIb = -0.5 * Ialpha + (float32_t) 0.8660254039 *Ibeta;
5185 
5186   }
5187 
5188   /**
5189    * @brief  Inverse Clarke transform for Q31 version
5190    * @param[in]       Ialpha  input two-phase orthogonal vector axis alpha
5191    * @param[in]       Ibeta   input two-phase orthogonal vector axis beta
5192    * @param[out]      *pIa    points to output three-phase coordinate <code>a</code>
5193    * @param[out]      *pIb    points to output three-phase coordinate <code>b</code>
5194    * @return none.
5195    *
5196    * <b>Scaling and Overflow Behavior:</b>
5197    * \par
5198    * The function is implemented using an internal 32-bit accumulator.
5199    * The accumulator maintains 1.31 format by truncating lower 31 bits of the intermediate multiplication in 2.62 format.
5200    * There is saturation on the subtraction, hence there is no risk of overflow.
5201    */
5202 
5203   static __INLINE void arm_inv_clarke_q31(
5204   q31_t Ialpha,
5205   q31_t Ibeta,
5206   q31_t * pIa,
5207   q31_t * pIb)
5208   {
5209     q31_t product1, product2;                    /* Temporary variables used to store intermediate results */
5210 
5211     /* Calculating pIa from Ialpha by equation pIa = Ialpha */
5212     *pIa = Ialpha;
5213 
5214     /* Intermediate product is calculated by (1/(2*sqrt(3)) * Ia) */
5215     product1 = (q31_t) (((q63_t) (Ialpha) * (0x40000000)) >> 31);
5216 
5217     /* Intermediate product is calculated by (1/sqrt(3) * pIb) */
5218     product2 = (q31_t) (((q63_t) (Ibeta) * (0x6ED9EBA1)) >> 31);
5219 
5220     /* pIb is calculated by subtracting the products */
5221     *pIb = __QSUB(product2, product1);
5222 
5223   }
5224 
5225   /**
5226    * @} end of inv_clarke group
5227    */
5228 
5229   /**
5230    * @brief  Converts the elements of the Q7 vector to Q15 vector.
5231    * @param[in]  *pSrc     input pointer
5232    * @param[out] *pDst     output pointer
5233    * @param[in]  blockSize number of samples to process
5234    * @return none.
5235    */
5236   void arm_q7_to_q15(
5237   q7_t * pSrc,
5238   q15_t * pDst,
5239   uint32_t blockSize);
5240 
5241 
5242 
5243   /**
5244    * @ingroup groupController
5245    */
5246 
5247   /**
5248    * @defgroup park Vector Park Transform
5249    *
5250    * Forward Park transform converts the input two-coordinate vector to flux and torque components.
5251    * The Park transform can be used to realize the transformation of the <code>Ialpha</code> and the <code>Ibeta</code> currents
5252    * from the stationary to the moving reference frame and control the spatial relationship between
5253    * the stator vector current and rotor flux vector.
5254    * If we consider the d axis aligned with the rotor flux, the diagram below shows the
5255    * current vector and the relationship from the two reference frames:
5256    * \image html park.gif "Stator current space vector and its component in (a,b) and in the d,q rotating reference frame"
5257    *
5258    * The function operates on a single sample of data and each call to the function returns the processed output.
5259    * The library provides separate functions for Q31 and floating-point data types.
5260    * \par Algorithm
5261    * \image html parkFormula.gif
5262    * where <code>Ialpha</code> and <code>Ibeta</code> are the stator vector components,
5263    * <code>pId</code> and <code>pIq</code> are rotor vector components and <code>cosVal</code> and <code>sinVal</code> are the
5264    * cosine and sine values of theta (rotor flux position).
5265    * \par Fixed-Point Behavior
5266    * Care must be taken when using the Q31 version of the Park transform.
5267    * In particular, the overflow and saturation behavior of the accumulator used must be considered.
5268    * Refer to the function specific documentation below for usage guidelines.
5269    */
5270 
5271   /**
5272    * @addtogroup park
5273    * @{
5274    */
5275 
5276   /**
5277    * @brief Floating-point Park transform
5278    * @param[in]       Ialpha input two-phase vector coordinate alpha
5279    * @param[in]       Ibeta  input two-phase vector coordinate beta
5280    * @param[out]      *pId   points to output	rotor reference frame d
5281    * @param[out]      *pIq   points to output	rotor reference frame q
5282    * @param[in]       sinVal sine value of rotation angle theta
5283    * @param[in]       cosVal cosine value of rotation angle theta
5284    * @return none.
5285    *
5286    * The function implements the forward Park transform.
5287    *
5288    */
5289 
5290   static __INLINE void arm_park_f32(
5291   float32_t Ialpha,
5292   float32_t Ibeta,
5293   float32_t * pId,
5294   float32_t * pIq,
5295   float32_t sinVal,
5296   float32_t cosVal)
5297   {
5298     /* Calculate pId using the equation, pId = Ialpha * cosVal + Ibeta * sinVal */
5299     *pId = Ialpha * cosVal + Ibeta * sinVal;
5300 
5301     /* Calculate pIq using the equation, pIq = - Ialpha * sinVal + Ibeta * cosVal */
5302     *pIq = -Ialpha * sinVal + Ibeta * cosVal;
5303 
5304   }
5305 
5306   /**
5307    * @brief  Park transform for Q31 version
5308    * @param[in]       Ialpha input two-phase vector coordinate alpha
5309    * @param[in]       Ibeta  input two-phase vector coordinate beta
5310    * @param[out]      *pId   points to output rotor reference frame d
5311    * @param[out]      *pIq   points to output rotor reference frame q
5312    * @param[in]       sinVal sine value of rotation angle theta
5313    * @param[in]       cosVal cosine value of rotation angle theta
5314    * @return none.
5315    *
5316    * <b>Scaling and Overflow Behavior:</b>
5317    * \par
5318    * The function is implemented using an internal 32-bit accumulator.
5319    * The accumulator maintains 1.31 format by truncating lower 31 bits of the intermediate multiplication in 2.62 format.
5320    * There is saturation on the addition and subtraction, hence there is no risk of overflow.
5321    */
5322 
5323 
5324   static __INLINE void arm_park_q31(
5325   q31_t Ialpha,
5326   q31_t Ibeta,
5327   q31_t * pId,
5328   q31_t * pIq,
5329   q31_t sinVal,
5330   q31_t cosVal)
5331   {
5332     q31_t product1, product2;                    /* Temporary variables used to store intermediate results */
5333     q31_t product3, product4;                    /* Temporary variables used to store intermediate results */
5334 
5335     /* Intermediate product is calculated by (Ialpha * cosVal) */
5336     product1 = (q31_t) (((q63_t) (Ialpha) * (cosVal)) >> 31);
5337 
5338     /* Intermediate product is calculated by (Ibeta * sinVal) */
5339     product2 = (q31_t) (((q63_t) (Ibeta) * (sinVal)) >> 31);
5340 
5341 
5342     /* Intermediate product is calculated by (Ialpha * sinVal) */
5343     product3 = (q31_t) (((q63_t) (Ialpha) * (sinVal)) >> 31);
5344 
5345     /* Intermediate product is calculated by (Ibeta * cosVal) */
5346     product4 = (q31_t) (((q63_t) (Ibeta) * (cosVal)) >> 31);
5347 
5348     /* Calculate pId by adding the two intermediate products 1 and 2 */
5349     *pId = __QADD(product1, product2);
5350 
5351     /* Calculate pIq by subtracting the two intermediate products 3 from 4 */
5352     *pIq = __QSUB(product4, product3);
5353   }
5354 
5355   /**
5356    * @} end of park group
5357    */
5358 
5359   /**
5360    * @brief  Converts the elements of the Q7 vector to floating-point vector.
5361    * @param[in]  *pSrc is input pointer
5362    * @param[out]  *pDst is output pointer
5363    * @param[in]  blockSize is the number of samples to process
5364    * @return none.
5365    */
5366   void arm_q7_to_float(
5367   q7_t * pSrc,
5368   float32_t * pDst,
5369   uint32_t blockSize);
5370 
5371 
5372   /**
5373    * @ingroup groupController
5374    */
5375 
5376   /**
5377    * @defgroup inv_park Vector Inverse Park transform
5378    * Inverse Park transform converts the input flux and torque components to two-coordinate vector.
5379    *
5380    * The function operates on a single sample of data and each call to the function returns the processed output.
5381    * The library provides separate functions for Q31 and floating-point data types.
5382    * \par Algorithm
5383    * \image html parkInvFormula.gif
5384    * where <code>pIalpha</code> and <code>pIbeta</code> are the stator vector components,
5385    * <code>Id</code> and <code>Iq</code> are rotor vector components and <code>cosVal</code> and <code>sinVal</code> are the
5386    * cosine and sine values of theta (rotor flux position).
5387    * \par Fixed-Point Behavior
5388    * Care must be taken when using the Q31 version of the Park transform.
5389    * In particular, the overflow and saturation behavior of the accumulator used must be considered.
5390    * Refer to the function specific documentation below for usage guidelines.
5391    */
5392 
5393   /**
5394    * @addtogroup inv_park
5395    * @{
5396    */
5397 
5398    /**
5399    * @brief  Floating-point Inverse Park transform
5400    * @param[in]       Id        input coordinate of rotor reference frame d
5401    * @param[in]       Iq        input coordinate of rotor reference frame q
5402    * @param[out]      *pIalpha  points to output two-phase orthogonal vector axis alpha
5403    * @param[out]      *pIbeta   points to output two-phase orthogonal vector axis beta
5404    * @param[in]       sinVal    sine value of rotation angle theta
5405    * @param[in]       cosVal    cosine value of rotation angle theta
5406    * @return none.
5407    */
5408 
5409   static __INLINE void arm_inv_park_f32(
5410   float32_t Id,
5411   float32_t Iq,
5412   float32_t * pIalpha,
5413   float32_t * pIbeta,
5414   float32_t sinVal,
5415   float32_t cosVal)
5416   {
5417     /* Calculate pIalpha using the equation, pIalpha = Id * cosVal - Iq * sinVal */
5418     *pIalpha = Id * cosVal - Iq * sinVal;
5419 
5420     /* Calculate pIbeta using the equation, pIbeta = Id * sinVal + Iq * cosVal */
5421     *pIbeta = Id * sinVal + Iq * cosVal;
5422 
5423   }
5424 
5425 
5426   /**
5427    * @brief  Inverse Park transform for	Q31 version
5428    * @param[in]       Id        input coordinate of rotor reference frame d
5429    * @param[in]       Iq        input coordinate of rotor reference frame q
5430    * @param[out]      *pIalpha  points to output two-phase orthogonal vector axis alpha
5431    * @param[out]      *pIbeta   points to output two-phase orthogonal vector axis beta
5432    * @param[in]       sinVal    sine value of rotation angle theta
5433    * @param[in]       cosVal    cosine value of rotation angle theta
5434    * @return none.
5435    *
5436    * <b>Scaling and Overflow Behavior:</b>
5437    * \par
5438    * The function is implemented using an internal 32-bit accumulator.
5439    * The accumulator maintains 1.31 format by truncating lower 31 bits of the intermediate multiplication in 2.62 format.
5440    * There is saturation on the addition, hence there is no risk of overflow.
5441    */
5442 
5443 
5444   static __INLINE void arm_inv_park_q31(
5445   q31_t Id,
5446   q31_t Iq,
5447   q31_t * pIalpha,
5448   q31_t * pIbeta,
5449   q31_t sinVal,
5450   q31_t cosVal)
5451   {
5452     q31_t product1, product2;                    /* Temporary variables used to store intermediate results */
5453     q31_t product3, product4;                    /* Temporary variables used to store intermediate results */
5454 
5455     /* Intermediate product is calculated by (Id * cosVal) */
5456     product1 = (q31_t) (((q63_t) (Id) * (cosVal)) >> 31);
5457 
5458     /* Intermediate product is calculated by (Iq * sinVal) */
5459     product2 = (q31_t) (((q63_t) (Iq) * (sinVal)) >> 31);
5460 
5461 
5462     /* Intermediate product is calculated by (Id * sinVal) */
5463     product3 = (q31_t) (((q63_t) (Id) * (sinVal)) >> 31);
5464 
5465     /* Intermediate product is calculated by (Iq * cosVal) */
5466     product4 = (q31_t) (((q63_t) (Iq) * (cosVal)) >> 31);
5467 
5468     /* Calculate pIalpha by using the two intermediate products 1 and 2 */
5469     *pIalpha = __QSUB(product1, product2);
5470 
5471     /* Calculate pIbeta by using the two intermediate products 3 and 4 */
5472     *pIbeta = __QADD(product4, product3);
5473 
5474   }
5475 
5476   /**
5477    * @} end of Inverse park group
5478    */
5479 
5480 
5481   /**
5482    * @brief  Converts the elements of the Q31 vector to floating-point vector.
5483    * @param[in]  *pSrc is input pointer
5484    * @param[out]  *pDst is output pointer
5485    * @param[in]  blockSize is the number of samples to process
5486    * @return none.
5487    */
5488   void arm_q31_to_float(
5489   q31_t * pSrc,
5490   float32_t * pDst,
5491   uint32_t blockSize);
5492 
5493   /**
5494    * @ingroup groupInterpolation
5495    */
5496 
5497   /**
5498    * @defgroup LinearInterpolate Linear Interpolation
5499    *
5500    * Linear interpolation is a method of curve fitting using linear polynomials.
5501    * Linear interpolation works by effectively drawing a straight line between two neighboring samples and returning the appropriate point along that line
5502    *
5503    * \par
5504    * \image html LinearInterp.gif "Linear interpolation"
5505    *
5506    * \par
5507    * A  Linear Interpolate function calculates an output value(y), for the input(x)
5508    * using linear interpolation of the input values x0, x1( nearest input values) and the output values y0 and y1(nearest output values)
5509    *
5510    * \par Algorithm:
5511    * <pre>
5512    *       y = y0 + (x - x0) * ((y1 - y0)/(x1-x0))
5513    *       where x0, x1 are nearest values of input x
5514    *             y0, y1 are nearest values to output y
5515    * </pre>
5516    *
5517    * \par
5518    * This set of functions implements Linear interpolation process
5519    * for Q7, Q15, Q31, and floating-point data types.  The functions operate on a single
5520    * sample of data and each call to the function returns a single processed value.
5521    * <code>S</code> points to an instance of the Linear Interpolate function data structure.
5522    * <code>x</code> is the input sample value. The functions returns the output value.
5523    *
5524    * \par
5525    * if x is outside of the table boundary, Linear interpolation returns first value of the table
5526    * if x is below input range and returns last value of table if x is above range.
5527    */
5528 
5529   /**
5530    * @addtogroup LinearInterpolate
5531    * @{
5532    */
5533 
5534   /**
5535    * @brief  Process function for the floating-point Linear Interpolation Function.
5536    * @param[in,out] *S is an instance of the floating-point Linear Interpolation structure
5537    * @param[in] x input sample to process
5538    * @return y processed output sample.
5539    *
5540    */
5541 
5542   static __INLINE float32_t arm_linear_interp_f32(
5543   arm_linear_interp_instance_f32 * S,
5544   float32_t x)
5545   {
5546 
5547     float32_t y;
5548     float32_t x0, x1;                            /* Nearest input values */
5549     float32_t y0, y1;                            /* Nearest output values */
5550     float32_t xSpacing = S->xSpacing;            /* spacing between input values */
5551     int32_t i;                                   /* Index variable */
5552     float32_t *pYData = S->pYData;               /* pointer to output table */
5553 
5554     /* Calculation of index */
5555     i = (int32_t) ((x - S->x1) / xSpacing);
5556 
5557     if(i < 0)
5558     {
5559       /* Iniatilize output for below specified range as least output value of table */
5560       y = pYData[0];
5561     }
5562     else if((uint32_t)i >= S->nValues)
5563     {
5564       /* Iniatilize output for above specified range as last output value of table */
5565       y = pYData[S->nValues - 1];
5566     }
5567     else
5568     {
5569       /* Calculation of nearest input values */
5570       x0 = S->x1 + i * xSpacing;
5571       x1 = S->x1 + (i + 1) * xSpacing;
5572 
5573       /* Read of nearest output values */
5574       y0 = pYData[i];
5575       y1 = pYData[i + 1];
5576 
5577       /* Calculation of output */
5578       y = y0 + (x - x0) * ((y1 - y0) / (x1 - x0));
5579 
5580     }
5581 
5582     /* returns output value */
5583     return (y);
5584   }
5585 
5586    /**
5587    *
5588    * @brief  Process function for the Q31 Linear Interpolation Function.
5589    * @param[in] *pYData  pointer to Q31 Linear Interpolation table
5590    * @param[in] x input sample to process
5591    * @param[in] nValues number of table values
5592    * @return y processed output sample.
5593    *
5594    * \par
5595    * Input sample <code>x</code> is in 12.20 format which contains 12 bits for table index and 20 bits for fractional part.
5596    * This function can support maximum of table size 2^12.
5597    *
5598    */
5599 
5600 
5601   static __INLINE q31_t arm_linear_interp_q31(
5602   q31_t * pYData,
5603   q31_t x,
5604   uint32_t nValues)
5605   {
5606     q31_t y;                                     /* output */
5607     q31_t y0, y1;                                /* Nearest output values */
5608     q31_t fract;                                 /* fractional part */
5609     int32_t index;                               /* Index to read nearest output values */
5610 
5611     /* Input is in 12.20 format */
5612     /* 12 bits for the table index */
5613     /* Index value calculation */
5614     index = ((x & 0xFFF00000) >> 20);
5615 
5616     if(index >= (int32_t)(nValues - 1))
5617     {
5618       return (pYData[nValues - 1]);
5619     }
5620     else if(index < 0)
5621     {
5622       return (pYData[0]);
5623     }
5624     else
5625     {
5626 
5627       /* 20 bits for the fractional part */
5628       /* shift left by 11 to keep fract in 1.31 format */
5629       fract = (x & 0x000FFFFF) << 11;
5630 
5631       /* Read two nearest output values from the index in 1.31(q31) format */
5632       y0 = pYData[index];
5633       y1 = pYData[index + 1u];
5634 
5635       /* Calculation of y0 * (1-fract) and y is in 2.30 format */
5636       y = ((q31_t) ((q63_t) y0 * (0x7FFFFFFF - fract) >> 32));
5637 
5638       /* Calculation of y0 * (1-fract) + y1 *fract and y is in 2.30 format */
5639       y += ((q31_t) (((q63_t) y1 * fract) >> 32));
5640 
5641       /* Convert y to 1.31 format */
5642       return (y << 1u);
5643 
5644     }
5645 
5646   }
5647 
5648   /**
5649    *
5650    * @brief  Process function for the Q15 Linear Interpolation Function.
5651    * @param[in] *pYData  pointer to Q15 Linear Interpolation table
5652    * @param[in] x input sample to process
5653    * @param[in] nValues number of table values
5654    * @return y processed output sample.
5655    *
5656    * \par
5657    * Input sample <code>x</code> is in 12.20 format which contains 12 bits for table index and 20 bits for fractional part.
5658    * This function can support maximum of table size 2^12.
5659    *
5660    */
5661 
5662 
5663   static __INLINE q15_t arm_linear_interp_q15(
5664   q15_t * pYData,
5665   q31_t x,
5666   uint32_t nValues)
5667   {
5668     q63_t y;                                     /* output */
5669     q15_t y0, y1;                                /* Nearest output values */
5670     q31_t fract;                                 /* fractional part */
5671     int32_t index;                               /* Index to read nearest output values */
5672 
5673     /* Input is in 12.20 format */
5674     /* 12 bits for the table index */
5675     /* Index value calculation */
5676     index = ((x & 0xFFF00000) >> 20u);
5677 
5678     if(index >= (int32_t)(nValues - 1))
5679     {
5680       return (pYData[nValues - 1]);
5681     }
5682     else if(index < 0)
5683     {
5684       return (pYData[0]);
5685     }
5686     else
5687     {
5688       /* 20 bits for the fractional part */
5689       /* fract is in 12.20 format */
5690       fract = (x & 0x000FFFFF);
5691 
5692       /* Read two nearest output values from the index */
5693       y0 = pYData[index];
5694       y1 = pYData[index + 1u];
5695 
5696       /* Calculation of y0 * (1-fract) and y is in 13.35 format */
5697       y = ((q63_t) y0 * (0xFFFFF - fract));
5698 
5699       /* Calculation of (y0 * (1-fract) + y1 * fract) and y is in 13.35 format */
5700       y += ((q63_t) y1 * (fract));
5701 
5702       /* convert y to 1.15 format */
5703       return (y >> 20);
5704     }
5705 
5706 
5707   }
5708 
5709   /**
5710    *
5711    * @brief  Process function for the Q7 Linear Interpolation Function.
5712    * @param[in] *pYData  pointer to Q7 Linear Interpolation table
5713    * @param[in] x input sample to process
5714    * @param[in] nValues number of table values
5715    * @return y processed output sample.
5716    *
5717    * \par
5718    * Input sample <code>x</code> is in 12.20 format which contains 12 bits for table index and 20 bits for fractional part.
5719    * This function can support maximum of table size 2^12.
5720    */
5721 
5722 
5723   static __INLINE q7_t arm_linear_interp_q7(
5724   q7_t * pYData,
5725   q31_t x,
5726   uint32_t nValues)
5727   {
5728     q31_t y;                                     /* output */
5729     q7_t y0, y1;                                 /* Nearest output values */
5730     q31_t fract;                                 /* fractional part */
5731     uint32_t index;                              /* Index to read nearest output values */
5732 
5733     /* Input is in 12.20 format */
5734     /* 12 bits for the table index */
5735     /* Index value calculation */
5736     if (x < 0)
5737     {
5738       return (pYData[0]);
5739     }
5740     index = (x >> 20) & 0xfff;
5741 
5742 
5743     if(index >= (nValues - 1))
5744     {
5745       return (pYData[nValues - 1]);
5746     }
5747     else
5748     {
5749 
5750       /* 20 bits for the fractional part */
5751       /* fract is in 12.20 format */
5752       fract = (x & 0x000FFFFF);
5753 
5754       /* Read two nearest output values from the index and are in 1.7(q7) format */
5755       y0 = pYData[index];
5756       y1 = pYData[index + 1u];
5757 
5758       /* Calculation of y0 * (1-fract ) and y is in 13.27(q27) format */
5759       y = ((y0 * (0xFFFFF - fract)));
5760 
5761       /* Calculation of y1 * fract + y0 * (1-fract) and y is in 13.27(q27) format */
5762       y += (y1 * fract);
5763 
5764       /* convert y to 1.7(q7) format */
5765       return (y >> 20u);
5766 
5767     }
5768 
5769   }
5770   /**
5771    * @} end of LinearInterpolate group
5772    */
5773 
5774   /**
5775    * @brief  Fast approximation to the trigonometric sine function for floating-point data.
5776    * @param[in] x input value in radians.
5777    * @return  sin(x).
5778    */
5779 
5780   float32_t arm_sin_f32(
5781   float32_t x);
5782 
5783   /**
5784    * @brief  Fast approximation to the trigonometric sine function for Q31 data.
5785    * @param[in] x Scaled input value in radians.
5786    * @return  sin(x).
5787    */
5788 
5789   q31_t arm_sin_q31(
5790   q31_t x);
5791 
5792   /**
5793    * @brief  Fast approximation to the trigonometric sine function for Q15 data.
5794    * @param[in] x Scaled input value in radians.
5795    * @return  sin(x).
5796    */
5797 
5798   q15_t arm_sin_q15(
5799   q15_t x);
5800 
5801   /**
5802    * @brief  Fast approximation to the trigonometric cosine function for floating-point data.
5803    * @param[in] x input value in radians.
5804    * @return  cos(x).
5805    */
5806 
5807   float32_t arm_cos_f32(
5808   float32_t x);
5809 
5810   /**
5811    * @brief Fast approximation to the trigonometric cosine function for Q31 data.
5812    * @param[in] x Scaled input value in radians.
5813    * @return  cos(x).
5814    */
5815 
5816   q31_t arm_cos_q31(
5817   q31_t x);
5818 
5819   /**
5820    * @brief  Fast approximation to the trigonometric cosine function for Q15 data.
5821    * @param[in] x Scaled input value in radians.
5822    * @return  cos(x).
5823    */
5824 
5825   q15_t arm_cos_q15(
5826   q15_t x);
5827 
5828 
5829   /**
5830    * @ingroup groupFastMath
5831    */
5832 
5833 
5834   /**
5835    * @defgroup SQRT Square Root
5836    *
5837    * Computes the square root of a number.
5838    * There are separate functions for Q15, Q31, and floating-point data types.
5839    * The square root function is computed using the Newton-Raphson algorithm.
5840    * This is an iterative algorithm of the form:
5841    * <pre>
5842    *      x1 = x0 - f(x0)/f'(x0)
5843    * </pre>
5844    * where <code>x1</code> is the current estimate,
5845    * <code>x0</code> is the previous estimate, and
5846    * <code>f'(x0)</code> is the derivative of <code>f()</code> evaluated at <code>x0</code>.
5847    * For the square root function, the algorithm reduces to:
5848    * <pre>
5849    *     x0 = in/2                         [initial guess]
5850    *     x1 = 1/2 * ( x0 + in / x0)        [each iteration]
5851    * </pre>
5852    */
5853 
5854 
5855   /**
5856    * @addtogroup SQRT
5857    * @{
5858    */
5859 
5860   /**
5861    * @brief  Floating-point square root function.
5862    * @param[in]  in     input value.
5863    * @param[out] *pOut  square root of input value.
5864    * @return The function returns ARM_MATH_SUCCESS if input value is positive value or ARM_MATH_ARGUMENT_ERROR if
5865    * <code>in</code> is negative value and returns zero output for negative values.
5866    */
5867 
5868   static __INLINE arm_status arm_sqrt_f32(
5869   float32_t in,
5870   float32_t * pOut)
5871   {
5872     if(in > 0)
5873     {
5874 
5875 //      #if __FPU_USED
5876 #if (__FPU_USED == 1) && defined ( __CC_ARM   )
5877       *pOut = __sqrtf(in);
5878 #else
5879       *pOut = sqrtf(in);
5880 #endif
5881 
5882       return (ARM_MATH_SUCCESS);
5883     }
5884     else
5885     {
5886       *pOut = 0.0f;
5887       return (ARM_MATH_ARGUMENT_ERROR);
5888     }
5889 
5890   }
5891 
5892 
5893   /**
5894    * @brief Q31 square root function.
5895    * @param[in]   in    input value.  The range of the input value is [0 +1) or 0x00000000 to 0x7FFFFFFF.
5896    * @param[out]  *pOut square root of input value.
5897    * @return The function returns ARM_MATH_SUCCESS if input value is positive value or ARM_MATH_ARGUMENT_ERROR if
5898    * <code>in</code> is negative value and returns zero output for negative values.
5899    */
5900   arm_status arm_sqrt_q31(
5901   q31_t in,
5902   q31_t * pOut);
5903 
5904   /**
5905    * @brief  Q15 square root function.
5906    * @param[in]   in     input value.  The range of the input value is [0 +1) or 0x0000 to 0x7FFF.
5907    * @param[out]  *pOut  square root of input value.
5908    * @return The function returns ARM_MATH_SUCCESS if input value is positive value or ARM_MATH_ARGUMENT_ERROR if
5909    * <code>in</code> is negative value and returns zero output for negative values.
5910    */
5911   arm_status arm_sqrt_q15(
5912   q15_t in,
5913   q15_t * pOut);
5914 
5915   /**
5916    * @} end of SQRT group
5917    */
5918 
5919 
5920 
5921 
5922 
5923 
5924   /**
5925    * @brief floating-point Circular write function.
5926    */
5927 
5928   static __INLINE void arm_circularWrite_f32(
5929   int32_t * circBuffer,
5930   int32_t L,
5931   uint16_t * writeOffset,
5932   int32_t bufferInc,
5933   const int32_t * src,
5934   int32_t srcInc,
5935   uint32_t blockSize)
5936   {
5937     uint32_t i = 0u;
5938     int32_t wOffset;
5939 
5940     /* Copy the value of Index pointer that points
5941      * to the current location where the input samples to be copied */
5942     wOffset = *writeOffset;
5943 
5944     /* Loop over the blockSize */
5945     i = blockSize;
5946 
5947     while(i > 0u)
5948     {
5949       /* copy the input sample to the circular buffer */
5950       circBuffer[wOffset] = *src;
5951 
5952       /* Update the input pointer */
5953       src += srcInc;
5954 
5955       /* Circularly update wOffset.  Watch out for positive and negative value */
5956       wOffset += bufferInc;
5957       if(wOffset >= L)
5958         wOffset -= L;
5959 
5960       /* Decrement the loop counter */
5961       i--;
5962     }
5963 
5964     /* Update the index pointer */
5965     *writeOffset = wOffset;
5966   }
5967 
5968 
5969 
5970   /**
5971    * @brief floating-point Circular Read function.
5972    */
5973   static __INLINE void arm_circularRead_f32(
5974   int32_t * circBuffer,
5975   int32_t L,
5976   int32_t * readOffset,
5977   int32_t bufferInc,
5978   int32_t * dst,
5979   int32_t * dst_base,
5980   int32_t dst_length,
5981   int32_t dstInc,
5982   uint32_t blockSize)
5983   {
5984     uint32_t i = 0u;
5985     int32_t rOffset, dst_end;
5986 
5987     /* Copy the value of Index pointer that points
5988      * to the current location from where the input samples to be read */
5989     rOffset = *readOffset;
5990     dst_end = (int32_t) (dst_base + dst_length);
5991 
5992     /* Loop over the blockSize */
5993     i = blockSize;
5994 
5995     while(i > 0u)
5996     {
5997       /* copy the sample from the circular buffer to the destination buffer */
5998       *dst = circBuffer[rOffset];
5999 
6000       /* Update the input pointer */
6001       dst += dstInc;
6002 
6003       if(dst == (int32_t *) dst_end)
6004       {
6005         dst = dst_base;
6006       }
6007 
6008       /* Circularly update rOffset.  Watch out for positive and negative value  */
6009       rOffset += bufferInc;
6010 
6011       if(rOffset >= L)
6012       {
6013         rOffset -= L;
6014       }
6015 
6016       /* Decrement the loop counter */
6017       i--;
6018     }
6019 
6020     /* Update the index pointer */
6021     *readOffset = rOffset;
6022   }
6023 
6024   /**
6025    * @brief Q15 Circular write function.
6026    */
6027 
6028   static __INLINE void arm_circularWrite_q15(
6029   q15_t * circBuffer,
6030   int32_t L,
6031   uint16_t * writeOffset,
6032   int32_t bufferInc,
6033   const q15_t * src,
6034   int32_t srcInc,
6035   uint32_t blockSize)
6036   {
6037     uint32_t i = 0u;
6038     int32_t wOffset;
6039 
6040     /* Copy the value of Index pointer that points
6041      * to the current location where the input samples to be copied */
6042     wOffset = *writeOffset;
6043 
6044     /* Loop over the blockSize */
6045     i = blockSize;
6046 
6047     while(i > 0u)
6048     {
6049       /* copy the input sample to the circular buffer */
6050       circBuffer[wOffset] = *src;
6051 
6052       /* Update the input pointer */
6053       src += srcInc;
6054 
6055       /* Circularly update wOffset.  Watch out for positive and negative value */
6056       wOffset += bufferInc;
6057       if(wOffset >= L)
6058         wOffset -= L;
6059 
6060       /* Decrement the loop counter */
6061       i--;
6062     }
6063 
6064     /* Update the index pointer */
6065     *writeOffset = wOffset;
6066   }
6067 
6068 
6069 
6070   /**
6071    * @brief Q15 Circular Read function.
6072    */
6073   static __INLINE void arm_circularRead_q15(
6074   q15_t * circBuffer,
6075   int32_t L,
6076   int32_t * readOffset,
6077   int32_t bufferInc,
6078   q15_t * dst,
6079   q15_t * dst_base,
6080   int32_t dst_length,
6081   int32_t dstInc,
6082   uint32_t blockSize)
6083   {
6084     uint32_t i = 0;
6085     int32_t rOffset, dst_end;
6086 
6087     /* Copy the value of Index pointer that points
6088      * to the current location from where the input samples to be read */
6089     rOffset = *readOffset;
6090 
6091     dst_end = (int32_t) (dst_base + dst_length);
6092 
6093     /* Loop over the blockSize */
6094     i = blockSize;
6095 
6096     while(i > 0u)
6097     {
6098       /* copy the sample from the circular buffer to the destination buffer */
6099       *dst = circBuffer[rOffset];
6100 
6101       /* Update the input pointer */
6102       dst += dstInc;
6103 
6104       if(dst == (q15_t *) dst_end)
6105       {
6106         dst = dst_base;
6107       }
6108 
6109       /* Circularly update wOffset.  Watch out for positive and negative value */
6110       rOffset += bufferInc;
6111 
6112       if(rOffset >= L)
6113       {
6114         rOffset -= L;
6115       }
6116 
6117       /* Decrement the loop counter */
6118       i--;
6119     }
6120 
6121     /* Update the index pointer */
6122     *readOffset = rOffset;
6123   }
6124 
6125 
6126   /**
6127    * @brief Q7 Circular write function.
6128    */
6129 
6130   static __INLINE void arm_circularWrite_q7(
6131   q7_t * circBuffer,
6132   int32_t L,
6133   uint16_t * writeOffset,
6134   int32_t bufferInc,
6135   const q7_t * src,
6136   int32_t srcInc,
6137   uint32_t blockSize)
6138   {
6139     uint32_t i = 0u;
6140     int32_t wOffset;
6141 
6142     /* Copy the value of Index pointer that points
6143      * to the current location where the input samples to be copied */
6144     wOffset = *writeOffset;
6145 
6146     /* Loop over the blockSize */
6147     i = blockSize;
6148 
6149     while(i > 0u)
6150     {
6151       /* copy the input sample to the circular buffer */
6152       circBuffer[wOffset] = *src;
6153 
6154       /* Update the input pointer */
6155       src += srcInc;
6156 
6157       /* Circularly update wOffset.  Watch out for positive and negative value */
6158       wOffset += bufferInc;
6159       if(wOffset >= L)
6160         wOffset -= L;
6161 
6162       /* Decrement the loop counter */
6163       i--;
6164     }
6165 
6166     /* Update the index pointer */
6167     *writeOffset = wOffset;
6168   }
6169 
6170 
6171 
6172   /**
6173    * @brief Q7 Circular Read function.
6174    */
6175   static __INLINE void arm_circularRead_q7(
6176   q7_t * circBuffer,
6177   int32_t L,
6178   int32_t * readOffset,
6179   int32_t bufferInc,
6180   q7_t * dst,
6181   q7_t * dst_base,
6182   int32_t dst_length,
6183   int32_t dstInc,
6184   uint32_t blockSize)
6185   {
6186     uint32_t i = 0;
6187     int32_t rOffset, dst_end;
6188 
6189     /* Copy the value of Index pointer that points
6190      * to the current location from where the input samples to be read */
6191     rOffset = *readOffset;
6192 
6193     dst_end = (int32_t) (dst_base + dst_length);
6194 
6195     /* Loop over the blockSize */
6196     i = blockSize;
6197 
6198     while(i > 0u)
6199     {
6200       /* copy the sample from the circular buffer to the destination buffer */
6201       *dst = circBuffer[rOffset];
6202 
6203       /* Update the input pointer */
6204       dst += dstInc;
6205 
6206       if(dst == (q7_t *) dst_end)
6207       {
6208         dst = dst_base;
6209       }
6210 
6211       /* Circularly update rOffset.  Watch out for positive and negative value */
6212       rOffset += bufferInc;
6213 
6214       if(rOffset >= L)
6215       {
6216         rOffset -= L;
6217       }
6218 
6219       /* Decrement the loop counter */
6220       i--;
6221     }
6222 
6223     /* Update the index pointer */
6224     *readOffset = rOffset;
6225   }
6226 
6227 
6228   /**
6229    * @brief  Sum of the squares of the elements of a Q31 vector.
6230    * @param[in]  *pSrc is input pointer
6231    * @param[in]  blockSize is the number of samples to process
6232    * @param[out]  *pResult is output value.
6233    * @return none.
6234    */
6235 
6236   void arm_power_q31(
6237   q31_t * pSrc,
6238   uint32_t blockSize,
6239   q63_t * pResult);
6240 
6241   /**
6242    * @brief  Sum of the squares of the elements of a floating-point vector.
6243    * @param[in]  *pSrc is input pointer
6244    * @param[in]  blockSize is the number of samples to process
6245    * @param[out]  *pResult is output value.
6246    * @return none.
6247    */
6248 
6249   void arm_power_f32(
6250   float32_t * pSrc,
6251   uint32_t blockSize,
6252   float32_t * pResult);
6253 
6254   /**
6255    * @brief  Sum of the squares of the elements of a Q15 vector.
6256    * @param[in]  *pSrc is input pointer
6257    * @param[in]  blockSize is the number of samples to process
6258    * @param[out]  *pResult is output value.
6259    * @return none.
6260    */
6261 
6262   void arm_power_q15(
6263   q15_t * pSrc,
6264   uint32_t blockSize,
6265   q63_t * pResult);
6266 
6267   /**
6268    * @brief  Sum of the squares of the elements of a Q7 vector.
6269    * @param[in]  *pSrc is input pointer
6270    * @param[in]  blockSize is the number of samples to process
6271    * @param[out]  *pResult is output value.
6272    * @return none.
6273    */
6274 
6275   void arm_power_q7(
6276   q7_t * pSrc,
6277   uint32_t blockSize,
6278   q31_t * pResult);
6279 
6280   /**
6281    * @brief  Mean value of a Q7 vector.
6282    * @param[in]  *pSrc is input pointer
6283    * @param[in]  blockSize is the number of samples to process
6284    * @param[out]  *pResult is output value.
6285    * @return none.
6286    */
6287 
6288   void arm_mean_q7(
6289   q7_t * pSrc,
6290   uint32_t blockSize,
6291   q7_t * pResult);
6292 
6293   /**
6294    * @brief  Mean value of a Q15 vector.
6295    * @param[in]  *pSrc is input pointer
6296    * @param[in]  blockSize is the number of samples to process
6297    * @param[out]  *pResult is output value.
6298    * @return none.
6299    */
6300   void arm_mean_q15(
6301   q15_t * pSrc,
6302   uint32_t blockSize,
6303   q15_t * pResult);
6304 
6305   /**
6306    * @brief  Mean value of a Q31 vector.
6307    * @param[in]  *pSrc is input pointer
6308    * @param[in]  blockSize is the number of samples to process
6309    * @param[out]  *pResult is output value.
6310    * @return none.
6311    */
6312   void arm_mean_q31(
6313   q31_t * pSrc,
6314   uint32_t blockSize,
6315   q31_t * pResult);
6316 
6317   /**
6318    * @brief  Mean value of a floating-point vector.
6319    * @param[in]  *pSrc is input pointer
6320    * @param[in]  blockSize is the number of samples to process
6321    * @param[out]  *pResult is output value.
6322    * @return none.
6323    */
6324   void arm_mean_f32(
6325   float32_t * pSrc,
6326   uint32_t blockSize,
6327   float32_t * pResult);
6328 
6329   /**
6330    * @brief  Variance of the elements of a floating-point vector.
6331    * @param[in]  *pSrc is input pointer
6332    * @param[in]  blockSize is the number of samples to process
6333    * @param[out]  *pResult is output value.
6334    * @return none.
6335    */
6336 
6337   void arm_var_f32(
6338   float32_t * pSrc,
6339   uint32_t blockSize,
6340   float32_t * pResult);
6341 
6342   /**
6343    * @brief  Variance of the elements of a Q31 vector.
6344    * @param[in]  *pSrc is input pointer
6345    * @param[in]  blockSize is the number of samples to process
6346    * @param[out]  *pResult is output value.
6347    * @return none.
6348    */
6349 
6350   void arm_var_q31(
6351   q31_t * pSrc,
6352   uint32_t blockSize,
6353   q63_t * pResult);
6354 
6355   /**
6356    * @brief  Variance of the elements of a Q15 vector.
6357    * @param[in]  *pSrc is input pointer
6358    * @param[in]  blockSize is the number of samples to process
6359    * @param[out]  *pResult is output value.
6360    * @return none.
6361    */
6362 
6363   void arm_var_q15(
6364   q15_t * pSrc,
6365   uint32_t blockSize,
6366   q31_t * pResult);
6367 
6368   /**
6369    * @brief  Root Mean Square of the elements of a floating-point vector.
6370    * @param[in]  *pSrc is input pointer
6371    * @param[in]  blockSize is the number of samples to process
6372    * @param[out]  *pResult is output value.
6373    * @return none.
6374    */
6375 
6376   void arm_rms_f32(
6377   float32_t * pSrc,
6378   uint32_t blockSize,
6379   float32_t * pResult);
6380 
6381   /**
6382    * @brief  Root Mean Square of the elements of a Q31 vector.
6383    * @param[in]  *pSrc is input pointer
6384    * @param[in]  blockSize is the number of samples to process
6385    * @param[out]  *pResult is output value.
6386    * @return none.
6387    */
6388 
6389   void arm_rms_q31(
6390   q31_t * pSrc,
6391   uint32_t blockSize,
6392   q31_t * pResult);
6393 
6394   /**
6395    * @brief  Root Mean Square of the elements of a Q15 vector.
6396    * @param[in]  *pSrc is input pointer
6397    * @param[in]  blockSize is the number of samples to process
6398    * @param[out]  *pResult is output value.
6399    * @return none.
6400    */
6401 
6402   void arm_rms_q15(
6403   q15_t * pSrc,
6404   uint32_t blockSize,
6405   q15_t * pResult);
6406 
6407   /**
6408    * @brief  Standard deviation of the elements of a floating-point vector.
6409    * @param[in]  *pSrc is input pointer
6410    * @param[in]  blockSize is the number of samples to process
6411    * @param[out]  *pResult is output value.
6412    * @return none.
6413    */
6414 
6415   void arm_std_f32(
6416   float32_t * pSrc,
6417   uint32_t blockSize,
6418   float32_t * pResult);
6419 
6420   /**
6421    * @brief  Standard deviation of the elements of a Q31 vector.
6422    * @param[in]  *pSrc is input pointer
6423    * @param[in]  blockSize is the number of samples to process
6424    * @param[out]  *pResult is output value.
6425    * @return none.
6426    */
6427 
6428   void arm_std_q31(
6429   q31_t * pSrc,
6430   uint32_t blockSize,
6431   q31_t * pResult);
6432 
6433   /**
6434    * @brief  Standard deviation of the elements of a Q15 vector.
6435    * @param[in]  *pSrc is input pointer
6436    * @param[in]  blockSize is the number of samples to process
6437    * @param[out]  *pResult is output value.
6438    * @return none.
6439    */
6440 
6441   void arm_std_q15(
6442   q15_t * pSrc,
6443   uint32_t blockSize,
6444   q15_t * pResult);
6445 
6446   /**
6447    * @brief  Floating-point complex magnitude
6448    * @param[in]  *pSrc points to the complex input vector
6449    * @param[out]  *pDst points to the real output vector
6450    * @param[in]  numSamples number of complex samples in the input vector
6451    * @return none.
6452    */
6453 
6454   void arm_cmplx_mag_f32(
6455   float32_t * pSrc,
6456   float32_t * pDst,
6457   uint32_t numSamples);
6458 
6459   /**
6460    * @brief  Q31 complex magnitude
6461    * @param[in]  *pSrc points to the complex input vector
6462    * @param[out]  *pDst points to the real output vector
6463    * @param[in]  numSamples number of complex samples in the input vector
6464    * @return none.
6465    */
6466 
6467   void arm_cmplx_mag_q31(
6468   q31_t * pSrc,
6469   q31_t * pDst,
6470   uint32_t numSamples);
6471 
6472   /**
6473    * @brief  Q15 complex magnitude
6474    * @param[in]  *pSrc points to the complex input vector
6475    * @param[out]  *pDst points to the real output vector
6476    * @param[in]  numSamples number of complex samples in the input vector
6477    * @return none.
6478    */
6479 
6480   void arm_cmplx_mag_q15(
6481   q15_t * pSrc,
6482   q15_t * pDst,
6483   uint32_t numSamples);
6484 
6485   /**
6486    * @brief  Q15 complex dot product
6487    * @param[in]  *pSrcA points to the first input vector
6488    * @param[in]  *pSrcB points to the second input vector
6489    * @param[in]  numSamples number of complex samples in each vector
6490    * @param[out]  *realResult real part of the result returned here
6491    * @param[out]  *imagResult imaginary part of the result returned here
6492    * @return none.
6493    */
6494 
6495   void arm_cmplx_dot_prod_q15(
6496   q15_t * pSrcA,
6497   q15_t * pSrcB,
6498   uint32_t numSamples,
6499   q31_t * realResult,
6500   q31_t * imagResult);
6501 
6502   /**
6503    * @brief  Q31 complex dot product
6504    * @param[in]  *pSrcA points to the first input vector
6505    * @param[in]  *pSrcB points to the second input vector
6506    * @param[in]  numSamples number of complex samples in each vector
6507    * @param[out]  *realResult real part of the result returned here
6508    * @param[out]  *imagResult imaginary part of the result returned here
6509    * @return none.
6510    */
6511 
6512   void arm_cmplx_dot_prod_q31(
6513   q31_t * pSrcA,
6514   q31_t * pSrcB,
6515   uint32_t numSamples,
6516   q63_t * realResult,
6517   q63_t * imagResult);
6518 
6519   /**
6520    * @brief  Floating-point complex dot product
6521    * @param[in]  *pSrcA points to the first input vector
6522    * @param[in]  *pSrcB points to the second input vector
6523    * @param[in]  numSamples number of complex samples in each vector
6524    * @param[out]  *realResult real part of the result returned here
6525    * @param[out]  *imagResult imaginary part of the result returned here
6526    * @return none.
6527    */
6528 
6529   void arm_cmplx_dot_prod_f32(
6530   float32_t * pSrcA,
6531   float32_t * pSrcB,
6532   uint32_t numSamples,
6533   float32_t * realResult,
6534   float32_t * imagResult);
6535 
6536   /**
6537    * @brief  Q15 complex-by-real multiplication
6538    * @param[in]  *pSrcCmplx points to the complex input vector
6539    * @param[in]  *pSrcReal points to the real input vector
6540    * @param[out]  *pCmplxDst points to the complex output vector
6541    * @param[in]  numSamples number of samples in each vector
6542    * @return none.
6543    */
6544 
6545   void arm_cmplx_mult_real_q15(
6546   q15_t * pSrcCmplx,
6547   q15_t * pSrcReal,
6548   q15_t * pCmplxDst,
6549   uint32_t numSamples);
6550 
6551   /**
6552    * @brief  Q31 complex-by-real multiplication
6553    * @param[in]  *pSrcCmplx points to the complex input vector
6554    * @param[in]  *pSrcReal points to the real input vector
6555    * @param[out]  *pCmplxDst points to the complex output vector
6556    * @param[in]  numSamples number of samples in each vector
6557    * @return none.
6558    */
6559 
6560   void arm_cmplx_mult_real_q31(
6561   q31_t * pSrcCmplx,
6562   q31_t * pSrcReal,
6563   q31_t * pCmplxDst,
6564   uint32_t numSamples);
6565 
6566   /**
6567    * @brief  Floating-point complex-by-real multiplication
6568    * @param[in]  *pSrcCmplx points to the complex input vector
6569    * @param[in]  *pSrcReal points to the real input vector
6570    * @param[out]  *pCmplxDst points to the complex output vector
6571    * @param[in]  numSamples number of samples in each vector
6572    * @return none.
6573    */
6574 
6575   void arm_cmplx_mult_real_f32(
6576   float32_t * pSrcCmplx,
6577   float32_t * pSrcReal,
6578   float32_t * pCmplxDst,
6579   uint32_t numSamples);
6580 
6581   /**
6582    * @brief  Minimum value of a Q7 vector.
6583    * @param[in]  *pSrc is input pointer
6584    * @param[in]  blockSize is the number of samples to process
6585    * @param[out]  *result is output pointer
6586    * @param[in]  index is the array index of the minimum value in the input buffer.
6587    * @return none.
6588    */
6589 
6590   void arm_min_q7(
6591   q7_t * pSrc,
6592   uint32_t blockSize,
6593   q7_t * result,
6594   uint32_t * index);
6595 
6596   /**
6597    * @brief  Minimum value of a Q15 vector.
6598    * @param[in]  *pSrc is input pointer
6599    * @param[in]  blockSize is the number of samples to process
6600    * @param[out]  *pResult is output pointer
6601    * @param[in]  *pIndex is the array index of the minimum value in the input buffer.
6602    * @return none.
6603    */
6604 
6605   void arm_min_q15(
6606   q15_t * pSrc,
6607   uint32_t blockSize,
6608   q15_t * pResult,
6609   uint32_t * pIndex);
6610 
6611   /**
6612    * @brief  Minimum value of a Q31 vector.
6613    * @param[in]  *pSrc is input pointer
6614    * @param[in]  blockSize is the number of samples to process
6615    * @param[out]  *pResult is output pointer
6616    * @param[out]  *pIndex is the array index of the minimum value in the input buffer.
6617    * @return none.
6618    */
6619   void arm_min_q31(
6620   q31_t * pSrc,
6621   uint32_t blockSize,
6622   q31_t * pResult,
6623   uint32_t * pIndex);
6624 
6625   /**
6626    * @brief  Minimum value of a floating-point vector.
6627    * @param[in]  *pSrc is input pointer
6628    * @param[in]  blockSize is the number of samples to process
6629    * @param[out]  *pResult is output pointer
6630    * @param[out]  *pIndex is the array index of the minimum value in the input buffer.
6631    * @return none.
6632    */
6633 
6634   void arm_min_f32(
6635   float32_t * pSrc,
6636   uint32_t blockSize,
6637   float32_t * pResult,
6638   uint32_t * pIndex);
6639 
6640 /**
6641  * @brief Maximum value of a Q7 vector.
6642  * @param[in]       *pSrc points to the input buffer
6643  * @param[in]       blockSize length of the input vector
6644  * @param[out]      *pResult maximum value returned here
6645  * @param[out]      *pIndex index of maximum value returned here
6646  * @return none.
6647  */
6648 
6649   void arm_max_q7(
6650   q7_t * pSrc,
6651   uint32_t blockSize,
6652   q7_t * pResult,
6653   uint32_t * pIndex);
6654 
6655 /**
6656  * @brief Maximum value of a Q15 vector.
6657  * @param[in]       *pSrc points to the input buffer
6658  * @param[in]       blockSize length of the input vector
6659  * @param[out]      *pResult maximum value returned here
6660  * @param[out]      *pIndex index of maximum value returned here
6661  * @return none.
6662  */
6663 
6664   void arm_max_q15(
6665   q15_t * pSrc,
6666   uint32_t blockSize,
6667   q15_t * pResult,
6668   uint32_t * pIndex);
6669 
6670 /**
6671  * @brief Maximum value of a Q31 vector.
6672  * @param[in]       *pSrc points to the input buffer
6673  * @param[in]       blockSize length of the input vector
6674  * @param[out]      *pResult maximum value returned here
6675  * @param[out]      *pIndex index of maximum value returned here
6676  * @return none.
6677  */
6678 
6679   void arm_max_q31(
6680   q31_t * pSrc,
6681   uint32_t blockSize,
6682   q31_t * pResult,
6683   uint32_t * pIndex);
6684 
6685 /**
6686  * @brief Maximum value of a floating-point vector.
6687  * @param[in]       *pSrc points to the input buffer
6688  * @param[in]       blockSize length of the input vector
6689  * @param[out]      *pResult maximum value returned here
6690  * @param[out]      *pIndex index of maximum value returned here
6691  * @return none.
6692  */
6693 
6694   void arm_max_f32(
6695   float32_t * pSrc,
6696   uint32_t blockSize,
6697   float32_t * pResult,
6698   uint32_t * pIndex);
6699 
6700   /**
6701    * @brief  Q15 complex-by-complex multiplication
6702    * @param[in]  *pSrcA points to the first input vector
6703    * @param[in]  *pSrcB points to the second input vector
6704    * @param[out]  *pDst  points to the output vector
6705    * @param[in]  numSamples number of complex samples in each vector
6706    * @return none.
6707    */
6708 
6709   void arm_cmplx_mult_cmplx_q15(
6710   q15_t * pSrcA,
6711   q15_t * pSrcB,
6712   q15_t * pDst,
6713   uint32_t numSamples);
6714 
6715   /**
6716    * @brief  Q31 complex-by-complex multiplication
6717    * @param[in]  *pSrcA points to the first input vector
6718    * @param[in]  *pSrcB points to the second input vector
6719    * @param[out]  *pDst  points to the output vector
6720    * @param[in]  numSamples number of complex samples in each vector
6721    * @return none.
6722    */
6723 
6724   void arm_cmplx_mult_cmplx_q31(
6725   q31_t * pSrcA,
6726   q31_t * pSrcB,
6727   q31_t * pDst,
6728   uint32_t numSamples);
6729 
6730   /**
6731    * @brief  Floating-point complex-by-complex multiplication
6732    * @param[in]  *pSrcA points to the first input vector
6733    * @param[in]  *pSrcB points to the second input vector
6734    * @param[out]  *pDst  points to the output vector
6735    * @param[in]  numSamples number of complex samples in each vector
6736    * @return none.
6737    */
6738 
6739   void arm_cmplx_mult_cmplx_f32(
6740   float32_t * pSrcA,
6741   float32_t * pSrcB,
6742   float32_t * pDst,
6743   uint32_t numSamples);
6744 
6745   /**
6746    * @brief Converts the elements of the floating-point vector to Q31 vector.
6747    * @param[in]       *pSrc points to the floating-point input vector
6748    * @param[out]      *pDst points to the Q31 output vector
6749    * @param[in]       blockSize length of the input vector
6750    * @return none.
6751    */
6752   void arm_float_to_q31(
6753   float32_t * pSrc,
6754   q31_t * pDst,
6755   uint32_t blockSize);
6756 
6757   /**
6758    * @brief Converts the elements of the floating-point vector to Q15 vector.
6759    * @param[in]       *pSrc points to the floating-point input vector
6760    * @param[out]      *pDst points to the Q15 output vector
6761    * @param[in]       blockSize length of the input vector
6762    * @return          none
6763    */
6764   void arm_float_to_q15(
6765   float32_t * pSrc,
6766   q15_t * pDst,
6767   uint32_t blockSize);
6768 
6769   /**
6770    * @brief Converts the elements of the floating-point vector to Q7 vector.
6771    * @param[in]       *pSrc points to the floating-point input vector
6772    * @param[out]      *pDst points to the Q7 output vector
6773    * @param[in]       blockSize length of the input vector
6774    * @return          none
6775    */
6776   void arm_float_to_q7(
6777   float32_t * pSrc,
6778   q7_t * pDst,
6779   uint32_t blockSize);
6780 
6781 
6782   /**
6783    * @brief  Converts the elements of the Q31 vector to Q15 vector.
6784    * @param[in]  *pSrc is input pointer
6785    * @param[out]  *pDst is output pointer
6786    * @param[in]  blockSize is the number of samples to process
6787    * @return none.
6788    */
6789   void arm_q31_to_q15(
6790   q31_t * pSrc,
6791   q15_t * pDst,
6792   uint32_t blockSize);
6793 
6794   /**
6795    * @brief  Converts the elements of the Q31 vector to Q7 vector.
6796    * @param[in]  *pSrc is input pointer
6797    * @param[out]  *pDst is output pointer
6798    * @param[in]  blockSize is the number of samples to process
6799    * @return none.
6800    */
6801   void arm_q31_to_q7(
6802   q31_t * pSrc,
6803   q7_t * pDst,
6804   uint32_t blockSize);
6805 
6806   /**
6807    * @brief  Converts the elements of the Q15 vector to floating-point vector.
6808    * @param[in]  *pSrc is input pointer
6809    * @param[out]  *pDst is output pointer
6810    * @param[in]  blockSize is the number of samples to process
6811    * @return none.
6812    */
6813   void arm_q15_to_float(
6814   q15_t * pSrc,
6815   float32_t * pDst,
6816   uint32_t blockSize);
6817 
6818 
6819   /**
6820    * @brief  Converts the elements of the Q15 vector to Q31 vector.
6821    * @param[in]  *pSrc is input pointer
6822    * @param[out]  *pDst is output pointer
6823    * @param[in]  blockSize is the number of samples to process
6824    * @return none.
6825    */
6826   void arm_q15_to_q31(
6827   q15_t * pSrc,
6828   q31_t * pDst,
6829   uint32_t blockSize);
6830 
6831 
6832   /**
6833    * @brief  Converts the elements of the Q15 vector to Q7 vector.
6834    * @param[in]  *pSrc is input pointer
6835    * @param[out]  *pDst is output pointer
6836    * @param[in]  blockSize is the number of samples to process
6837    * @return none.
6838    */
6839   void arm_q15_to_q7(
6840   q15_t * pSrc,
6841   q7_t * pDst,
6842   uint32_t blockSize);
6843 
6844 
6845   /**
6846    * @ingroup groupInterpolation
6847    */
6848 
6849   /**
6850    * @defgroup BilinearInterpolate Bilinear Interpolation
6851    *
6852    * Bilinear interpolation is an extension of linear interpolation applied to a two dimensional grid.
6853    * The underlying function <code>f(x, y)</code> is sampled on a regular grid and the interpolation process
6854    * determines values between the grid points.
6855    * Bilinear interpolation is equivalent to two step linear interpolation, first in the x-dimension and then in the y-dimension.
6856    * Bilinear interpolation is often used in image processing to rescale images.
6857    * The CMSIS DSP library provides bilinear interpolation functions for Q7, Q15, Q31, and floating-point data types.
6858    *
6859    * <b>Algorithm</b>
6860    * \par
6861    * The instance structure used by the bilinear interpolation functions describes a two dimensional data table.
6862    * For floating-point, the instance structure is defined as:
6863    * <pre>
6864    *   typedef struct
6865    *   {
6866    *     uint16_t numRows;
6867    *     uint16_t numCols;
6868    *     float32_t *pData;
6869    * } arm_bilinear_interp_instance_f32;
6870    * </pre>
6871    *
6872    * \par
6873    * where <code>numRows</code> specifies the number of rows in the table;
6874    * <code>numCols</code> specifies the number of columns in the table;
6875    * and <code>pData</code> points to an array of size <code>numRows*numCols</code> values.
6876    * The data table <code>pTable</code> is organized in row order and the supplied data values fall on integer indexes.
6877    * That is, table element (x,y) is located at <code>pTable[x + y*numCols]</code> where x and y are integers.
6878    *
6879    * \par
6880    * Let <code>(x, y)</code> specify the desired interpolation point.  Then define:
6881    * <pre>
6882    *     XF = floor(x)
6883    *     YF = floor(y)
6884    * </pre>
6885    * \par
6886    * The interpolated output point is computed as:
6887    * <pre>
6888    *  f(x, y) = f(XF, YF) * (1-(x-XF)) * (1-(y-YF))
6889    *           + f(XF+1, YF) * (x-XF)*(1-(y-YF))
6890    *           + f(XF, YF+1) * (1-(x-XF))*(y-YF)
6891    *           + f(XF+1, YF+1) * (x-XF)*(y-YF)
6892    * </pre>
6893    * Note that the coordinates (x, y) contain integer and fractional components.
6894    * The integer components specify which portion of the table to use while the
6895    * fractional components control the interpolation processor.
6896    *
6897    * \par
6898    * if (x,y) are outside of the table boundary, Bilinear interpolation returns zero output.
6899    */
6900 
6901   /**
6902    * @addtogroup BilinearInterpolate
6903    * @{
6904    */
6905 
6906   /**
6907   *
6908   * @brief  Floating-point bilinear interpolation.
6909   * @param[in,out] *S points to an instance of the interpolation structure.
6910   * @param[in] X interpolation coordinate.
6911   * @param[in] Y interpolation coordinate.
6912   * @return out interpolated value.
6913   */
6914 
6915 
6916   static __INLINE float32_t arm_bilinear_interp_f32(
6917   const arm_bilinear_interp_instance_f32 * S,
6918   float32_t X,
6919   float32_t Y)
6920   {
6921     float32_t out;
6922     float32_t f00, f01, f10, f11;
6923     float32_t *pData = S->pData;
6924     int32_t xIndex, yIndex, index;
6925     float32_t xdiff, ydiff;
6926     float32_t b1, b2, b3, b4;
6927 
6928     xIndex = (int32_t) X;
6929     yIndex = (int32_t) Y;
6930 
6931     /* Care taken for table outside boundary */
6932     /* Returns zero output when values are outside table boundary */
6933     if(xIndex < 0 || xIndex > (S->numRows - 1) || yIndex < 0
6934        || yIndex > (S->numCols - 1))
6935     {
6936       return (0);
6937     }
6938 
6939     /* Calculation of index for two nearest points in X-direction */
6940     index = (xIndex - 1) + (yIndex - 1) * S->numCols;
6941 
6942 
6943     /* Read two nearest points in X-direction */
6944     f00 = pData[index];
6945     f01 = pData[index + 1];
6946 
6947     /* Calculation of index for two nearest points in Y-direction */
6948     index = (xIndex - 1) + (yIndex) * S->numCols;
6949 
6950 
6951     /* Read two nearest points in Y-direction */
6952     f10 = pData[index];
6953     f11 = pData[index + 1];
6954 
6955     /* Calculation of intermediate values */
6956     b1 = f00;
6957     b2 = f01 - f00;
6958     b3 = f10 - f00;
6959     b4 = f00 - f01 - f10 + f11;
6960 
6961     /* Calculation of fractional part in X */
6962     xdiff = X - xIndex;
6963 
6964     /* Calculation of fractional part in Y */
6965     ydiff = Y - yIndex;
6966 
6967     /* Calculation of bi-linear interpolated output */
6968     out = b1 + b2 * xdiff + b3 * ydiff + b4 * xdiff * ydiff;
6969 
6970     /* return to application */
6971     return (out);
6972 
6973   }
6974 
6975   /**
6976   *
6977   * @brief  Q31 bilinear interpolation.
6978   * @param[in,out] *S points to an instance of the interpolation structure.
6979   * @param[in] X interpolation coordinate in 12.20 format.
6980   * @param[in] Y interpolation coordinate in 12.20 format.
6981   * @return out interpolated value.
6982   */
6983 
6984   static __INLINE q31_t arm_bilinear_interp_q31(
6985   arm_bilinear_interp_instance_q31 * S,
6986   q31_t X,
6987   q31_t Y)
6988   {
6989     q31_t out;                                   /* Temporary output */
6990     q31_t acc = 0;                               /* output */
6991     q31_t xfract, yfract;                        /* X, Y fractional parts */
6992     q31_t x1, x2, y1, y2;                        /* Nearest output values */
6993     int32_t rI, cI;                              /* Row and column indices */
6994     q31_t *pYData = S->pData;                    /* pointer to output table values */
6995     uint32_t nCols = S->numCols;                 /* num of rows */
6996 
6997 
6998     /* Input is in 12.20 format */
6999     /* 12 bits for the table index */
7000     /* Index value calculation */
7001     rI = ((X & 0xFFF00000) >> 20u);
7002 
7003     /* Input is in 12.20 format */
7004     /* 12 bits for the table index */
7005     /* Index value calculation */
7006     cI = ((Y & 0xFFF00000) >> 20u);
7007 
7008     /* Care taken for table outside boundary */
7009     /* Returns zero output when values are outside table boundary */
7010     if(rI < 0 || rI > (S->numRows - 1) || cI < 0 || cI > (S->numCols - 1))
7011     {
7012       return (0);
7013     }
7014 
7015     /* 20 bits for the fractional part */
7016     /* shift left xfract by 11 to keep 1.31 format */
7017     xfract = (X & 0x000FFFFF) << 11u;
7018 
7019     /* Read two nearest output values from the index */
7020     x1 = pYData[(rI) + nCols * (cI)];
7021     x2 = pYData[(rI) + nCols * (cI) + 1u];
7022 
7023     /* 20 bits for the fractional part */
7024     /* shift left yfract by 11 to keep 1.31 format */
7025     yfract = (Y & 0x000FFFFF) << 11u;
7026 
7027     /* Read two nearest output values from the index */
7028     y1 = pYData[(rI) + nCols * (cI + 1)];
7029     y2 = pYData[(rI) + nCols * (cI + 1) + 1u];
7030 
7031     /* Calculation of x1 * (1-xfract ) * (1-yfract) and acc is in 3.29(q29) format */
7032     out = ((q31_t) (((q63_t) x1 * (0x7FFFFFFF - xfract)) >> 32));
7033     acc = ((q31_t) (((q63_t) out * (0x7FFFFFFF - yfract)) >> 32));
7034 
7035     /* x2 * (xfract) * (1-yfract)  in 3.29(q29) and adding to acc */
7036     out = ((q31_t) ((q63_t) x2 * (0x7FFFFFFF - yfract) >> 32));
7037     acc += ((q31_t) ((q63_t) out * (xfract) >> 32));
7038 
7039     /* y1 * (1 - xfract) * (yfract)  in 3.29(q29) and adding to acc */
7040     out = ((q31_t) ((q63_t) y1 * (0x7FFFFFFF - xfract) >> 32));
7041     acc += ((q31_t) ((q63_t) out * (yfract) >> 32));
7042 
7043     /* y2 * (xfract) * (yfract)  in 3.29(q29) and adding to acc */
7044     out = ((q31_t) ((q63_t) y2 * (xfract) >> 32));
7045     acc += ((q31_t) ((q63_t) out * (yfract) >> 32));
7046 
7047     /* Convert acc to 1.31(q31) format */
7048     return (acc << 2u);
7049 
7050   }
7051 
7052   /**
7053   * @brief  Q15 bilinear interpolation.
7054   * @param[in,out] *S points to an instance of the interpolation structure.
7055   * @param[in] X interpolation coordinate in 12.20 format.
7056   * @param[in] Y interpolation coordinate in 12.20 format.
7057   * @return out interpolated value.
7058   */
7059 
7060   static __INLINE q15_t arm_bilinear_interp_q15(
7061   arm_bilinear_interp_instance_q15 * S,
7062   q31_t X,
7063   q31_t Y)
7064   {
7065     q63_t acc = 0;                               /* output */
7066     q31_t out;                                   /* Temporary output */
7067     q15_t x1, x2, y1, y2;                        /* Nearest output values */
7068     q31_t xfract, yfract;                        /* X, Y fractional parts */
7069     int32_t rI, cI;                              /* Row and column indices */
7070     q15_t *pYData = S->pData;                    /* pointer to output table values */
7071     uint32_t nCols = S->numCols;                 /* num of rows */
7072 
7073     /* Input is in 12.20 format */
7074     /* 12 bits for the table index */
7075     /* Index value calculation */
7076     rI = ((X & 0xFFF00000) >> 20);
7077 
7078     /* Input is in 12.20 format */
7079     /* 12 bits for the table index */
7080     /* Index value calculation */
7081     cI = ((Y & 0xFFF00000) >> 20);
7082 
7083     /* Care taken for table outside boundary */
7084     /* Returns zero output when values are outside table boundary */
7085     if(rI < 0 || rI > (S->numRows - 1) || cI < 0 || cI > (S->numCols - 1))
7086     {
7087       return (0);
7088     }
7089 
7090     /* 20 bits for the fractional part */
7091     /* xfract should be in 12.20 format */
7092     xfract = (X & 0x000FFFFF);
7093 
7094     /* Read two nearest output values from the index */
7095     x1 = pYData[(rI) + nCols * (cI)];
7096     x2 = pYData[(rI) + nCols * (cI) + 1u];
7097 
7098 
7099     /* 20 bits for the fractional part */
7100     /* yfract should be in 12.20 format */
7101     yfract = (Y & 0x000FFFFF);
7102 
7103     /* Read two nearest output values from the index */
7104     y1 = pYData[(rI) + nCols * (cI + 1)];
7105     y2 = pYData[(rI) + nCols * (cI + 1) + 1u];
7106 
7107     /* Calculation of x1 * (1-xfract ) * (1-yfract) and acc is in 13.51 format */
7108 
7109     /* x1 is in 1.15(q15), xfract in 12.20 format and out is in 13.35 format */
7110     /* convert 13.35 to 13.31 by right shifting  and out is in 1.31 */
7111     out = (q31_t) (((q63_t) x1 * (0xFFFFF - xfract)) >> 4u);
7112     acc = ((q63_t) out * (0xFFFFF - yfract));
7113 
7114     /* x2 * (xfract) * (1-yfract)  in 1.51 and adding to acc */
7115     out = (q31_t) (((q63_t) x2 * (0xFFFFF - yfract)) >> 4u);
7116     acc += ((q63_t) out * (xfract));
7117 
7118     /* y1 * (1 - xfract) * (yfract)  in 1.51 and adding to acc */
7119     out = (q31_t) (((q63_t) y1 * (0xFFFFF - xfract)) >> 4u);
7120     acc += ((q63_t) out * (yfract));
7121 
7122     /* y2 * (xfract) * (yfract)  in 1.51 and adding to acc */
7123     out = (q31_t) (((q63_t) y2 * (xfract)) >> 4u);
7124     acc += ((q63_t) out * (yfract));
7125 
7126     /* acc is in 13.51 format and down shift acc by 36 times */
7127     /* Convert out to 1.15 format */
7128     return (acc >> 36);
7129 
7130   }
7131 
7132   /**
7133   * @brief  Q7 bilinear interpolation.
7134   * @param[in,out] *S points to an instance of the interpolation structure.
7135   * @param[in] X interpolation coordinate in 12.20 format.
7136   * @param[in] Y interpolation coordinate in 12.20 format.
7137   * @return out interpolated value.
7138   */
7139 
7140   static __INLINE q7_t arm_bilinear_interp_q7(
7141   arm_bilinear_interp_instance_q7 * S,
7142   q31_t X,
7143   q31_t Y)
7144   {
7145     q63_t acc = 0;                               /* output */
7146     q31_t out;                                   /* Temporary output */
7147     q31_t xfract, yfract;                        /* X, Y fractional parts */
7148     q7_t x1, x2, y1, y2;                         /* Nearest output values */
7149     int32_t rI, cI;                              /* Row and column indices */
7150     q7_t *pYData = S->pData;                     /* pointer to output table values */
7151     uint32_t nCols = S->numCols;                 /* num of rows */
7152 
7153     /* Input is in 12.20 format */
7154     /* 12 bits for the table index */
7155     /* Index value calculation */
7156     rI = ((X & 0xFFF00000) >> 20);
7157 
7158     /* Input is in 12.20 format */
7159     /* 12 bits for the table index */
7160     /* Index value calculation */
7161     cI = ((Y & 0xFFF00000) >> 20);
7162 
7163     /* Care taken for table outside boundary */
7164     /* Returns zero output when values are outside table boundary */
7165     if(rI < 0 || rI > (S->numRows - 1) || cI < 0 || cI > (S->numCols - 1))
7166     {
7167       return (0);
7168     }
7169 
7170     /* 20 bits for the fractional part */
7171     /* xfract should be in 12.20 format */
7172     xfract = (X & 0x000FFFFF);
7173 
7174     /* Read two nearest output values from the index */
7175     x1 = pYData[(rI) + nCols * (cI)];
7176     x2 = pYData[(rI) + nCols * (cI) + 1u];
7177 
7178 
7179     /* 20 bits for the fractional part */
7180     /* yfract should be in 12.20 format */
7181     yfract = (Y & 0x000FFFFF);
7182 
7183     /* Read two nearest output values from the index */
7184     y1 = pYData[(rI) + nCols * (cI + 1)];
7185     y2 = pYData[(rI) + nCols * (cI + 1) + 1u];
7186 
7187     /* Calculation of x1 * (1-xfract ) * (1-yfract) and acc is in 16.47 format */
7188     out = ((x1 * (0xFFFFF - xfract)));
7189     acc = (((q63_t) out * (0xFFFFF - yfract)));
7190 
7191     /* x2 * (xfract) * (1-yfract)  in 2.22 and adding to acc */
7192     out = ((x2 * (0xFFFFF - yfract)));
7193     acc += (((q63_t) out * (xfract)));
7194 
7195     /* y1 * (1 - xfract) * (yfract)  in 2.22 and adding to acc */
7196     out = ((y1 * (0xFFFFF - xfract)));
7197     acc += (((q63_t) out * (yfract)));
7198 
7199     /* y2 * (xfract) * (yfract)  in 2.22 and adding to acc */
7200     out = ((y2 * (yfract)));
7201     acc += (((q63_t) out * (xfract)));
7202 
7203     /* acc in 16.47 format and down shift by 40 to convert to 1.7 format */
7204     return (acc >> 40);
7205 
7206   }
7207 
7208   /**
7209    * @} end of BilinearInterpolate group
7210    */
7211 
7212 
7213 #if   defined ( __CC_ARM ) //Keil
7214 //SMMLAR
7215   #define multAcc_32x32_keep32_R(a, x, y) \
7216   a = (q31_t) (((((q63_t) a) << 32) + ((q63_t) x * y) + 0x80000000LL ) >> 32)
7217 
7218 //SMMLSR
7219   #define multSub_32x32_keep32_R(a, x, y) \
7220   a = (q31_t) (((((q63_t) a) << 32) - ((q63_t) x * y) + 0x80000000LL ) >> 32)
7221 
7222 //SMMULR
7223   #define mult_32x32_keep32_R(a, x, y) \
7224   a = (q31_t) (((q63_t) x * y + 0x80000000LL ) >> 32)
7225 
7226 //Enter low optimization region - place directly above function definition
7227   #define LOW_OPTIMIZATION_ENTER \
7228      _Pragma ("push")         \
7229      _Pragma ("O1")
7230 
7231 //Exit low optimization region - place directly after end of function definition
7232   #define LOW_OPTIMIZATION_EXIT \
7233      _Pragma ("pop")
7234 
7235 //Enter low optimization region - place directly above function definition
7236   #define IAR_ONLY_LOW_OPTIMIZATION_ENTER
7237 
7238 //Exit low optimization region - place directly after end of function definition
7239   #define IAR_ONLY_LOW_OPTIMIZATION_EXIT
7240 
7241 #elif defined(__ICCARM__) //IAR
7242  //SMMLA
7243   #define multAcc_32x32_keep32_R(a, x, y) \
7244   a += (q31_t) (((q63_t) x * y) >> 32)
7245 
7246  //SMMLS
7247   #define multSub_32x32_keep32_R(a, x, y) \
7248   a -= (q31_t) (((q63_t) x * y) >> 32)
7249 
7250 //SMMUL
7251   #define mult_32x32_keep32_R(a, x, y) \
7252   a = (q31_t) (((q63_t) x * y ) >> 32)
7253 
7254 //Enter low optimization region - place directly above function definition
7255   #define LOW_OPTIMIZATION_ENTER \
7256      _Pragma ("optimize=low")
7257 
7258 //Exit low optimization region - place directly after end of function definition
7259   #define LOW_OPTIMIZATION_EXIT
7260 
7261 //Enter low optimization region - place directly above function definition
7262   #define IAR_ONLY_LOW_OPTIMIZATION_ENTER \
7263      _Pragma ("optimize=low")
7264 
7265 //Exit low optimization region - place directly after end of function definition
7266   #define IAR_ONLY_LOW_OPTIMIZATION_EXIT
7267 
7268 #elif defined(__GNUC__)
7269  //SMMLA
7270   #define multAcc_32x32_keep32_R(a, x, y) \
7271   a += (q31_t) (((q63_t) x * y) >> 32)
7272 
7273  //SMMLS
7274   #define multSub_32x32_keep32_R(a, x, y) \
7275   a -= (q31_t) (((q63_t) x * y) >> 32)
7276 
7277 //SMMUL
7278   #define mult_32x32_keep32_R(a, x, y) \
7279   a = (q31_t) (((q63_t) x * y ) >> 32)
7280 
7281   #define LOW_OPTIMIZATION_ENTER __attribute__(( optimize("-O1") ))
7282 
7283   #define LOW_OPTIMIZATION_EXIT
7284 
7285   #define IAR_ONLY_LOW_OPTIMIZATION_ENTER
7286 
7287   #define IAR_ONLY_LOW_OPTIMIZATION_EXIT
7288 
7289 #endif
7290 
7291 
7292 
7293 
7294 
7295 #ifdef	__cplusplus
7296 }
7297 #endif
7298 
7299 
7300 #endif /* _ARM_MATH_H */
7301 
7302 
7303 /**
7304  *
7305  * End of file.
7306  */
7307