1*1295d682SXin Li /* Copyright (c) 2003-2008 Jean-Marc Valin
2*1295d682SXin Li Copyright (c) 2007-2008 CSIRO
3*1295d682SXin Li Copyright (c) 2007-2009 Xiph.Org Foundation
4*1295d682SXin Li Written by Jean-Marc Valin */
5*1295d682SXin Li /**
6*1295d682SXin Li @file arch.h
7*1295d682SXin Li @brief Various architecture definitions for CELT
8*1295d682SXin Li */
9*1295d682SXin Li /*
10*1295d682SXin Li Redistribution and use in source and binary forms, with or without
11*1295d682SXin Li modification, are permitted provided that the following conditions
12*1295d682SXin Li are met:
13*1295d682SXin Li
14*1295d682SXin Li - Redistributions of source code must retain the above copyright
15*1295d682SXin Li notice, this list of conditions and the following disclaimer.
16*1295d682SXin Li
17*1295d682SXin Li - Redistributions in binary form must reproduce the above copyright
18*1295d682SXin Li notice, this list of conditions and the following disclaimer in the
19*1295d682SXin Li documentation and/or other materials provided with the distribution.
20*1295d682SXin Li
21*1295d682SXin Li THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22*1295d682SXin Li ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23*1295d682SXin Li LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
24*1295d682SXin Li A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
25*1295d682SXin Li OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
26*1295d682SXin Li EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
27*1295d682SXin Li PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
28*1295d682SXin Li PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
29*1295d682SXin Li LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
30*1295d682SXin Li NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
31*1295d682SXin Li SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
32*1295d682SXin Li */
33*1295d682SXin Li
34*1295d682SXin Li #ifndef ARCH_H
35*1295d682SXin Li #define ARCH_H
36*1295d682SXin Li
37*1295d682SXin Li #include "opus_types.h"
38*1295d682SXin Li #include "common.h"
39*1295d682SXin Li
40*1295d682SXin Li # if !defined(__GNUC_PREREQ)
41*1295d682SXin Li # if defined(__GNUC__)&&defined(__GNUC_MINOR__)
42*1295d682SXin Li # define __GNUC_PREREQ(_maj,_min) \
43*1295d682SXin Li ((__GNUC__<<16)+__GNUC_MINOR__>=((_maj)<<16)+(_min))
44*1295d682SXin Li # else
45*1295d682SXin Li # define __GNUC_PREREQ(_maj,_min) 0
46*1295d682SXin Li # endif
47*1295d682SXin Li # endif
48*1295d682SXin Li
49*1295d682SXin Li #define CELT_SIG_SCALE 32768.f
50*1295d682SXin Li
51*1295d682SXin Li #define celt_fatal(str) _celt_fatal(str, __FILE__, __LINE__);
52*1295d682SXin Li #ifdef ENABLE_ASSERTIONS
53*1295d682SXin Li #include <stdio.h>
54*1295d682SXin Li #include <stdlib.h>
55*1295d682SXin Li #ifdef __GNUC__
56*1295d682SXin Li __attribute__((noreturn))
57*1295d682SXin Li #endif
_celt_fatal(const char * str,const char * file,int line)58*1295d682SXin Li static OPUS_INLINE void _celt_fatal(const char *str, const char *file, int line)
59*1295d682SXin Li {
60*1295d682SXin Li fprintf (stderr, "Fatal (internal) error in %s, line %d: %s\n", file, line, str);
61*1295d682SXin Li abort();
62*1295d682SXin Li }
63*1295d682SXin Li #define celt_assert(cond) {if (!(cond)) {celt_fatal("assertion failed: " #cond);}}
64*1295d682SXin Li #define celt_assert2(cond, message) {if (!(cond)) {celt_fatal("assertion failed: " #cond "\n" message);}}
65*1295d682SXin Li #else
66*1295d682SXin Li #define celt_assert(cond)
67*1295d682SXin Li #define celt_assert2(cond, message)
68*1295d682SXin Li #endif
69*1295d682SXin Li
70*1295d682SXin Li #define IMUL32(a,b) ((a)*(b))
71*1295d682SXin Li
72*1295d682SXin Li #define MIN16(a,b) ((a) < (b) ? (a) : (b)) /**< Minimum 16-bit value. */
73*1295d682SXin Li #define MAX16(a,b) ((a) > (b) ? (a) : (b)) /**< Maximum 16-bit value. */
74*1295d682SXin Li #define MIN32(a,b) ((a) < (b) ? (a) : (b)) /**< Minimum 32-bit value. */
75*1295d682SXin Li #define MAX32(a,b) ((a) > (b) ? (a) : (b)) /**< Maximum 32-bit value. */
76*1295d682SXin Li #define IMIN(a,b) ((a) < (b) ? (a) : (b)) /**< Minimum int value. */
77*1295d682SXin Li #define IMAX(a,b) ((a) > (b) ? (a) : (b)) /**< Maximum int value. */
78*1295d682SXin Li #define UADD32(a,b) ((a)+(b))
79*1295d682SXin Li #define USUB32(a,b) ((a)-(b))
80*1295d682SXin Li
81*1295d682SXin Li /* Set this if opus_int64 is a native type of the CPU. */
82*1295d682SXin Li /* Assume that all LP64 architectures have fast 64-bit types; also x86_64
83*1295d682SXin Li (which can be ILP32 for x32) and Win64 (which is LLP64). */
84*1295d682SXin Li #if defined(__x86_64__) || defined(__LP64__) || defined(_WIN64)
85*1295d682SXin Li #define OPUS_FAST_INT64 1
86*1295d682SXin Li #else
87*1295d682SXin Li #define OPUS_FAST_INT64 0
88*1295d682SXin Li #endif
89*1295d682SXin Li
90*1295d682SXin Li #define PRINT_MIPS(file)
91*1295d682SXin Li
92*1295d682SXin Li #ifdef FIXED_POINT
93*1295d682SXin Li
94*1295d682SXin Li typedef opus_int16 opus_val16;
95*1295d682SXin Li typedef opus_int32 opus_val32;
96*1295d682SXin Li typedef opus_int64 opus_val64;
97*1295d682SXin Li
98*1295d682SXin Li typedef opus_val32 celt_sig;
99*1295d682SXin Li typedef opus_val16 celt_norm;
100*1295d682SXin Li typedef opus_val32 celt_ener;
101*1295d682SXin Li
102*1295d682SXin Li #define Q15ONE 32767
103*1295d682SXin Li
104*1295d682SXin Li #define SIG_SHIFT 12
105*1295d682SXin Li /* Safe saturation value for 32-bit signals. Should be less than
106*1295d682SXin Li 2^31*(1-0.85) to avoid blowing up on DC at deemphasis.*/
107*1295d682SXin Li #define SIG_SAT (300000000)
108*1295d682SXin Li
109*1295d682SXin Li #define NORM_SCALING 16384
110*1295d682SXin Li
111*1295d682SXin Li #define DB_SHIFT 10
112*1295d682SXin Li
113*1295d682SXin Li #define EPSILON 1
114*1295d682SXin Li #define VERY_SMALL 0
115*1295d682SXin Li #define VERY_LARGE16 ((opus_val16)32767)
116*1295d682SXin Li #define Q15_ONE ((opus_val16)32767)
117*1295d682SXin Li
118*1295d682SXin Li #define SCALEIN(a) (a)
119*1295d682SXin Li #define SCALEOUT(a) (a)
120*1295d682SXin Li
121*1295d682SXin Li #define ABS16(x) ((x) < 0 ? (-(x)) : (x))
122*1295d682SXin Li #define ABS32(x) ((x) < 0 ? (-(x)) : (x))
123*1295d682SXin Li
SAT16(opus_int32 x)124*1295d682SXin Li static OPUS_INLINE opus_int16 SAT16(opus_int32 x) {
125*1295d682SXin Li return x > 32767 ? 32767 : x < -32768 ? -32768 : (opus_int16)x;
126*1295d682SXin Li }
127*1295d682SXin Li
128*1295d682SXin Li #ifdef FIXED_DEBUG
129*1295d682SXin Li #include "fixed_debug.h"
130*1295d682SXin Li #else
131*1295d682SXin Li
132*1295d682SXin Li #include "fixed_generic.h"
133*1295d682SXin Li
134*1295d682SXin Li #ifdef OPUS_ARM_PRESUME_AARCH64_NEON_INTR
135*1295d682SXin Li #include "arm/fixed_arm64.h"
136*1295d682SXin Li #elif OPUS_ARM_INLINE_EDSP
137*1295d682SXin Li #include "arm/fixed_armv5e.h"
138*1295d682SXin Li #elif defined (OPUS_ARM_INLINE_ASM)
139*1295d682SXin Li #include "arm/fixed_armv4.h"
140*1295d682SXin Li #elif defined (BFIN_ASM)
141*1295d682SXin Li #include "fixed_bfin.h"
142*1295d682SXin Li #elif defined (TI_C5X_ASM)
143*1295d682SXin Li #include "fixed_c5x.h"
144*1295d682SXin Li #elif defined (TI_C6X_ASM)
145*1295d682SXin Li #include "fixed_c6x.h"
146*1295d682SXin Li #endif
147*1295d682SXin Li
148*1295d682SXin Li #endif
149*1295d682SXin Li
150*1295d682SXin Li #else /* FIXED_POINT */
151*1295d682SXin Li
152*1295d682SXin Li typedef float opus_val16;
153*1295d682SXin Li typedef float opus_val32;
154*1295d682SXin Li typedef float opus_val64;
155*1295d682SXin Li
156*1295d682SXin Li typedef float celt_sig;
157*1295d682SXin Li typedef float celt_norm;
158*1295d682SXin Li typedef float celt_ener;
159*1295d682SXin Li
160*1295d682SXin Li #ifdef FLOAT_APPROX
161*1295d682SXin Li /* This code should reliably detect NaN/inf even when -ffast-math is used.
162*1295d682SXin Li Assumes IEEE 754 format. */
celt_isnan(float x)163*1295d682SXin Li static OPUS_INLINE int celt_isnan(float x)
164*1295d682SXin Li {
165*1295d682SXin Li union {float f; opus_uint32 i;} in;
166*1295d682SXin Li in.f = x;
167*1295d682SXin Li return ((in.i>>23)&0xFF)==0xFF && (in.i&0x007FFFFF)!=0;
168*1295d682SXin Li }
169*1295d682SXin Li #else
170*1295d682SXin Li #ifdef __FAST_MATH__
171*1295d682SXin Li #error Cannot build libopus with -ffast-math unless FLOAT_APPROX is defined. This could result in crashes on extreme (e.g. NaN) input
172*1295d682SXin Li #endif
173*1295d682SXin Li #define celt_isnan(x) ((x)!=(x))
174*1295d682SXin Li #endif
175*1295d682SXin Li
176*1295d682SXin Li #define Q15ONE 1.0f
177*1295d682SXin Li
178*1295d682SXin Li #define NORM_SCALING 1.f
179*1295d682SXin Li
180*1295d682SXin Li #define EPSILON 1e-15f
181*1295d682SXin Li #define VERY_SMALL 1e-30f
182*1295d682SXin Li #define VERY_LARGE16 1e15f
183*1295d682SXin Li #define Q15_ONE ((opus_val16)1.f)
184*1295d682SXin Li
185*1295d682SXin Li /* This appears to be the same speed as C99's fabsf() but it's more portable. */
186*1295d682SXin Li #define ABS16(x) ((float)fabs(x))
187*1295d682SXin Li #define ABS32(x) ((float)fabs(x))
188*1295d682SXin Li
189*1295d682SXin Li #define QCONST16(x,bits) (x)
190*1295d682SXin Li #define QCONST32(x,bits) (x)
191*1295d682SXin Li
192*1295d682SXin Li #define NEG16(x) (-(x))
193*1295d682SXin Li #define NEG32(x) (-(x))
194*1295d682SXin Li #define NEG32_ovflw(x) (-(x))
195*1295d682SXin Li #define EXTRACT16(x) (x)
196*1295d682SXin Li #define EXTEND32(x) (x)
197*1295d682SXin Li #define SHR16(a,shift) (a)
198*1295d682SXin Li #define SHL16(a,shift) (a)
199*1295d682SXin Li #define SHR32(a,shift) (a)
200*1295d682SXin Li #define SHL32(a,shift) (a)
201*1295d682SXin Li #define PSHR32(a,shift) (a)
202*1295d682SXin Li #define VSHR32(a,shift) (a)
203*1295d682SXin Li
204*1295d682SXin Li #define PSHR(a,shift) (a)
205*1295d682SXin Li #define SHR(a,shift) (a)
206*1295d682SXin Li #define SHL(a,shift) (a)
207*1295d682SXin Li #define SATURATE(x,a) (x)
208*1295d682SXin Li #define SATURATE16(x) (x)
209*1295d682SXin Li
210*1295d682SXin Li #define ROUND16(a,shift) (a)
211*1295d682SXin Li #define SROUND16(a,shift) (a)
212*1295d682SXin Li #define HALF16(x) (.5f*(x))
213*1295d682SXin Li #define HALF32(x) (.5f*(x))
214*1295d682SXin Li
215*1295d682SXin Li #define ADD16(a,b) ((a)+(b))
216*1295d682SXin Li #define SUB16(a,b) ((a)-(b))
217*1295d682SXin Li #define ADD32(a,b) ((a)+(b))
218*1295d682SXin Li #define SUB32(a,b) ((a)-(b))
219*1295d682SXin Li #define ADD32_ovflw(a,b) ((a)+(b))
220*1295d682SXin Li #define SUB32_ovflw(a,b) ((a)-(b))
221*1295d682SXin Li #define MULT16_16_16(a,b) ((a)*(b))
222*1295d682SXin Li #define MULT16_16(a,b) ((opus_val32)(a)*(opus_val32)(b))
223*1295d682SXin Li #define MAC16_16(c,a,b) ((c)+(opus_val32)(a)*(opus_val32)(b))
224*1295d682SXin Li
225*1295d682SXin Li #define MULT16_32_Q15(a,b) ((a)*(b))
226*1295d682SXin Li #define MULT16_32_Q16(a,b) ((a)*(b))
227*1295d682SXin Li
228*1295d682SXin Li #define MULT32_32_Q31(a,b) ((a)*(b))
229*1295d682SXin Li
230*1295d682SXin Li #define MAC16_32_Q15(c,a,b) ((c)+(a)*(b))
231*1295d682SXin Li #define MAC16_32_Q16(c,a,b) ((c)+(a)*(b))
232*1295d682SXin Li
233*1295d682SXin Li #define MULT16_16_Q11_32(a,b) ((a)*(b))
234*1295d682SXin Li #define MULT16_16_Q11(a,b) ((a)*(b))
235*1295d682SXin Li #define MULT16_16_Q13(a,b) ((a)*(b))
236*1295d682SXin Li #define MULT16_16_Q14(a,b) ((a)*(b))
237*1295d682SXin Li #define MULT16_16_Q15(a,b) ((a)*(b))
238*1295d682SXin Li #define MULT16_16_P15(a,b) ((a)*(b))
239*1295d682SXin Li #define MULT16_16_P13(a,b) ((a)*(b))
240*1295d682SXin Li #define MULT16_16_P14(a,b) ((a)*(b))
241*1295d682SXin Li #define MULT16_32_P16(a,b) ((a)*(b))
242*1295d682SXin Li
243*1295d682SXin Li #define DIV32_16(a,b) (((opus_val32)(a))/(opus_val16)(b))
244*1295d682SXin Li #define DIV32(a,b) (((opus_val32)(a))/(opus_val32)(b))
245*1295d682SXin Li
246*1295d682SXin Li #define SCALEIN(a) ((a)*CELT_SIG_SCALE)
247*1295d682SXin Li #define SCALEOUT(a) ((a)*(1/CELT_SIG_SCALE))
248*1295d682SXin Li
249*1295d682SXin Li #define SIG2WORD16(x) (x)
250*1295d682SXin Li
251*1295d682SXin Li #endif /* !FIXED_POINT */
252*1295d682SXin Li
253*1295d682SXin Li #ifndef GLOBAL_STACK_SIZE
254*1295d682SXin Li #ifdef FIXED_POINT
255*1295d682SXin Li #define GLOBAL_STACK_SIZE 120000
256*1295d682SXin Li #else
257*1295d682SXin Li #define GLOBAL_STACK_SIZE 120000
258*1295d682SXin Li #endif
259*1295d682SXin Li #endif
260*1295d682SXin Li
261*1295d682SXin Li #endif /* ARCH_H */
262