1*a58d3d2aSXin Li /***********************************************************************
2*a58d3d2aSXin Li Copyright (c) 2006-2011, Skype Limited. All rights reserved.
3*a58d3d2aSXin Li Copyright (c) 2013 Parrot
4*a58d3d2aSXin Li Redistribution and use in source and binary forms, with or without
5*a58d3d2aSXin Li modification, are permitted provided that the following conditions
6*a58d3d2aSXin Li are met:
7*a58d3d2aSXin Li - Redistributions of source code must retain the above copyright notice,
8*a58d3d2aSXin Li this list of conditions and the following disclaimer.
9*a58d3d2aSXin Li - Redistributions in binary form must reproduce the above copyright
10*a58d3d2aSXin Li notice, this list of conditions and the following disclaimer in the
11*a58d3d2aSXin Li documentation and/or other materials provided with the distribution.
12*a58d3d2aSXin Li - Neither the name of Internet Society, IETF or IETF Trust, nor the
13*a58d3d2aSXin Li names of specific contributors, may be used to endorse or promote
14*a58d3d2aSXin Li products derived from this software without specific prior written
15*a58d3d2aSXin Li permission.
16*a58d3d2aSXin Li THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
17*a58d3d2aSXin Li AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18*a58d3d2aSXin Li IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19*a58d3d2aSXin Li ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
20*a58d3d2aSXin Li LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
21*a58d3d2aSXin Li CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
22*a58d3d2aSXin Li SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
23*a58d3d2aSXin Li INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
24*a58d3d2aSXin Li CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
25*a58d3d2aSXin Li ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
26*a58d3d2aSXin Li POSSIBILITY OF SUCH DAMAGE.
27*a58d3d2aSXin Li ***********************************************************************/
28*a58d3d2aSXin Li
29*a58d3d2aSXin Li #ifndef SILK_MACROS_ARMv5E_H
30*a58d3d2aSXin Li #define SILK_MACROS_ARMv5E_H
31*a58d3d2aSXin Li
32*a58d3d2aSXin Li /* This macro only avoids the undefined behaviour from a left shift of
33*a58d3d2aSXin Li a negative value. It should only be used in macros that can't include
34*a58d3d2aSXin Li SigProc_FIX.h. In other cases, use silk_LSHIFT32(). */
35*a58d3d2aSXin Li #define SAFE_SHL(a,b) ((opus_int32)((opus_uint32)(a) << (b)))
36*a58d3d2aSXin Li
37*a58d3d2aSXin Li /* (a32 * (opus_int32)((opus_int16)(b32))) >> 16 output have to be 32bit int */
38*a58d3d2aSXin Li #undef silk_SMULWB
silk_SMULWB_armv5e(opus_int32 a,opus_int16 b)39*a58d3d2aSXin Li static OPUS_INLINE opus_int32 silk_SMULWB_armv5e(opus_int32 a, opus_int16 b)
40*a58d3d2aSXin Li {
41*a58d3d2aSXin Li int res;
42*a58d3d2aSXin Li __asm__(
43*a58d3d2aSXin Li "#silk_SMULWB\n\t"
44*a58d3d2aSXin Li "smulwb %0, %1, %2\n\t"
45*a58d3d2aSXin Li : "=r"(res)
46*a58d3d2aSXin Li : "r"(a), "r"(b)
47*a58d3d2aSXin Li );
48*a58d3d2aSXin Li return res;
49*a58d3d2aSXin Li }
50*a58d3d2aSXin Li #define silk_SMULWB(a, b) (silk_SMULWB_armv5e(a, b))
51*a58d3d2aSXin Li
52*a58d3d2aSXin Li /* a32 + (b32 * (opus_int32)((opus_int16)(c32))) >> 16 output have to be 32bit int */
53*a58d3d2aSXin Li #undef silk_SMLAWB
silk_SMLAWB_armv5e(opus_int32 a,opus_int32 b,opus_int16 c)54*a58d3d2aSXin Li static OPUS_INLINE opus_int32 silk_SMLAWB_armv5e(opus_int32 a, opus_int32 b,
55*a58d3d2aSXin Li opus_int16 c)
56*a58d3d2aSXin Li {
57*a58d3d2aSXin Li int res;
58*a58d3d2aSXin Li __asm__(
59*a58d3d2aSXin Li "#silk_SMLAWB\n\t"
60*a58d3d2aSXin Li "smlawb %0, %1, %2, %3\n\t"
61*a58d3d2aSXin Li : "=r"(res)
62*a58d3d2aSXin Li : "r"(b), "r"(c), "r"(a)
63*a58d3d2aSXin Li );
64*a58d3d2aSXin Li return res;
65*a58d3d2aSXin Li }
66*a58d3d2aSXin Li #define silk_SMLAWB(a, b, c) (silk_SMLAWB_armv5e(a, b, c))
67*a58d3d2aSXin Li
68*a58d3d2aSXin Li /* (a32 * (b32 >> 16)) >> 16 */
69*a58d3d2aSXin Li #undef silk_SMULWT
silk_SMULWT_armv5e(opus_int32 a,opus_int32 b)70*a58d3d2aSXin Li static OPUS_INLINE opus_int32 silk_SMULWT_armv5e(opus_int32 a, opus_int32 b)
71*a58d3d2aSXin Li {
72*a58d3d2aSXin Li int res;
73*a58d3d2aSXin Li __asm__(
74*a58d3d2aSXin Li "#silk_SMULWT\n\t"
75*a58d3d2aSXin Li "smulwt %0, %1, %2\n\t"
76*a58d3d2aSXin Li : "=r"(res)
77*a58d3d2aSXin Li : "r"(a), "r"(b)
78*a58d3d2aSXin Li );
79*a58d3d2aSXin Li return res;
80*a58d3d2aSXin Li }
81*a58d3d2aSXin Li #define silk_SMULWT(a, b) (silk_SMULWT_armv5e(a, b))
82*a58d3d2aSXin Li
83*a58d3d2aSXin Li /* a32 + (b32 * (c32 >> 16)) >> 16 */
84*a58d3d2aSXin Li #undef silk_SMLAWT
silk_SMLAWT_armv5e(opus_int32 a,opus_int32 b,opus_int32 c)85*a58d3d2aSXin Li static OPUS_INLINE opus_int32 silk_SMLAWT_armv5e(opus_int32 a, opus_int32 b,
86*a58d3d2aSXin Li opus_int32 c)
87*a58d3d2aSXin Li {
88*a58d3d2aSXin Li int res;
89*a58d3d2aSXin Li __asm__(
90*a58d3d2aSXin Li "#silk_SMLAWT\n\t"
91*a58d3d2aSXin Li "smlawt %0, %1, %2, %3\n\t"
92*a58d3d2aSXin Li : "=r"(res)
93*a58d3d2aSXin Li : "r"(b), "r"(c), "r"(a)
94*a58d3d2aSXin Li );
95*a58d3d2aSXin Li return res;
96*a58d3d2aSXin Li }
97*a58d3d2aSXin Li #define silk_SMLAWT(a, b, c) (silk_SMLAWT_armv5e(a, b, c))
98*a58d3d2aSXin Li
99*a58d3d2aSXin Li /* (opus_int32)((opus_int16)(a3))) * (opus_int32)((opus_int16)(b32)) output have to be 32bit int */
100*a58d3d2aSXin Li #undef silk_SMULBB
silk_SMULBB_armv5e(opus_int32 a,opus_int32 b)101*a58d3d2aSXin Li static OPUS_INLINE opus_int32 silk_SMULBB_armv5e(opus_int32 a, opus_int32 b)
102*a58d3d2aSXin Li {
103*a58d3d2aSXin Li int res;
104*a58d3d2aSXin Li __asm__(
105*a58d3d2aSXin Li "#silk_SMULBB\n\t"
106*a58d3d2aSXin Li "smulbb %0, %1, %2\n\t"
107*a58d3d2aSXin Li : "=r"(res)
108*a58d3d2aSXin Li : "%r"(a), "r"(b)
109*a58d3d2aSXin Li );
110*a58d3d2aSXin Li return res;
111*a58d3d2aSXin Li }
112*a58d3d2aSXin Li #define silk_SMULBB(a, b) (silk_SMULBB_armv5e(a, b))
113*a58d3d2aSXin Li
114*a58d3d2aSXin Li /* a32 + (opus_int32)((opus_int16)(b32)) * (opus_int32)((opus_int16)(c32)) output have to be 32bit int */
115*a58d3d2aSXin Li #undef silk_SMLABB
silk_SMLABB_armv5e(opus_int32 a,opus_int32 b,opus_int32 c)116*a58d3d2aSXin Li static OPUS_INLINE opus_int32 silk_SMLABB_armv5e(opus_int32 a, opus_int32 b,
117*a58d3d2aSXin Li opus_int32 c)
118*a58d3d2aSXin Li {
119*a58d3d2aSXin Li int res;
120*a58d3d2aSXin Li __asm__(
121*a58d3d2aSXin Li "#silk_SMLABB\n\t"
122*a58d3d2aSXin Li "smlabb %0, %1, %2, %3\n\t"
123*a58d3d2aSXin Li : "=r"(res)
124*a58d3d2aSXin Li : "%r"(b), "r"(c), "r"(a)
125*a58d3d2aSXin Li );
126*a58d3d2aSXin Li return res;
127*a58d3d2aSXin Li }
128*a58d3d2aSXin Li #define silk_SMLABB(a, b, c) (silk_SMLABB_armv5e(a, b, c))
129*a58d3d2aSXin Li
130*a58d3d2aSXin Li /* (opus_int32)((opus_int16)(a32)) * (b32 >> 16) */
131*a58d3d2aSXin Li #undef silk_SMULBT
silk_SMULBT_armv5e(opus_int32 a,opus_int32 b)132*a58d3d2aSXin Li static OPUS_INLINE opus_int32 silk_SMULBT_armv5e(opus_int32 a, opus_int32 b)
133*a58d3d2aSXin Li {
134*a58d3d2aSXin Li int res;
135*a58d3d2aSXin Li __asm__(
136*a58d3d2aSXin Li "#silk_SMULBT\n\t"
137*a58d3d2aSXin Li "smulbt %0, %1, %2\n\t"
138*a58d3d2aSXin Li : "=r"(res)
139*a58d3d2aSXin Li : "r"(a), "r"(b)
140*a58d3d2aSXin Li );
141*a58d3d2aSXin Li return res;
142*a58d3d2aSXin Li }
143*a58d3d2aSXin Li #define silk_SMULBT(a, b) (silk_SMULBT_armv5e(a, b))
144*a58d3d2aSXin Li
145*a58d3d2aSXin Li /* a32 + (opus_int32)((opus_int16)(b32)) * (c32 >> 16) */
146*a58d3d2aSXin Li #undef silk_SMLABT
silk_SMLABT_armv5e(opus_int32 a,opus_int32 b,opus_int32 c)147*a58d3d2aSXin Li static OPUS_INLINE opus_int32 silk_SMLABT_armv5e(opus_int32 a, opus_int32 b,
148*a58d3d2aSXin Li opus_int32 c)
149*a58d3d2aSXin Li {
150*a58d3d2aSXin Li int res;
151*a58d3d2aSXin Li __asm__(
152*a58d3d2aSXin Li "#silk_SMLABT\n\t"
153*a58d3d2aSXin Li "smlabt %0, %1, %2, %3\n\t"
154*a58d3d2aSXin Li : "=r"(res)
155*a58d3d2aSXin Li : "r"(b), "r"(c), "r"(a)
156*a58d3d2aSXin Li );
157*a58d3d2aSXin Li return res;
158*a58d3d2aSXin Li }
159*a58d3d2aSXin Li #define silk_SMLABT(a, b, c) (silk_SMLABT_armv5e(a, b, c))
160*a58d3d2aSXin Li
161*a58d3d2aSXin Li /* add/subtract with output saturated */
162*a58d3d2aSXin Li #undef silk_ADD_SAT32
silk_ADD_SAT32_armv5e(opus_int32 a,opus_int32 b)163*a58d3d2aSXin Li static OPUS_INLINE opus_int32 silk_ADD_SAT32_armv5e(opus_int32 a, opus_int32 b)
164*a58d3d2aSXin Li {
165*a58d3d2aSXin Li int res;
166*a58d3d2aSXin Li __asm__(
167*a58d3d2aSXin Li "#silk_ADD_SAT32\n\t"
168*a58d3d2aSXin Li "qadd %0, %1, %2\n\t"
169*a58d3d2aSXin Li : "=r"(res)
170*a58d3d2aSXin Li : "%r"(a), "r"(b)
171*a58d3d2aSXin Li );
172*a58d3d2aSXin Li return res;
173*a58d3d2aSXin Li }
174*a58d3d2aSXin Li #define silk_ADD_SAT32(a, b) (silk_ADD_SAT32_armv5e(a, b))
175*a58d3d2aSXin Li
176*a58d3d2aSXin Li #undef silk_SUB_SAT32
silk_SUB_SAT32_armv5e(opus_int32 a,opus_int32 b)177*a58d3d2aSXin Li static OPUS_INLINE opus_int32 silk_SUB_SAT32_armv5e(opus_int32 a, opus_int32 b)
178*a58d3d2aSXin Li {
179*a58d3d2aSXin Li int res;
180*a58d3d2aSXin Li __asm__(
181*a58d3d2aSXin Li "#silk_SUB_SAT32\n\t"
182*a58d3d2aSXin Li "qsub %0, %1, %2\n\t"
183*a58d3d2aSXin Li : "=r"(res)
184*a58d3d2aSXin Li : "r"(a), "r"(b)
185*a58d3d2aSXin Li );
186*a58d3d2aSXin Li return res;
187*a58d3d2aSXin Li }
188*a58d3d2aSXin Li #define silk_SUB_SAT32(a, b) (silk_SUB_SAT32_armv5e(a, b))
189*a58d3d2aSXin Li
190*a58d3d2aSXin Li #undef silk_CLZ16
silk_CLZ16_armv5(opus_int16 in16)191*a58d3d2aSXin Li static OPUS_INLINE opus_int32 silk_CLZ16_armv5(opus_int16 in16)
192*a58d3d2aSXin Li {
193*a58d3d2aSXin Li int res;
194*a58d3d2aSXin Li __asm__(
195*a58d3d2aSXin Li "#silk_CLZ16\n\t"
196*a58d3d2aSXin Li "clz %0, %1;\n"
197*a58d3d2aSXin Li : "=r"(res)
198*a58d3d2aSXin Li : "r"(SAFE_SHL(in16,16)|0x8000)
199*a58d3d2aSXin Li );
200*a58d3d2aSXin Li return res;
201*a58d3d2aSXin Li }
202*a58d3d2aSXin Li #define silk_CLZ16(in16) (silk_CLZ16_armv5(in16))
203*a58d3d2aSXin Li
204*a58d3d2aSXin Li #undef silk_CLZ32
silk_CLZ32_armv5(opus_int32 in32)205*a58d3d2aSXin Li static OPUS_INLINE opus_int32 silk_CLZ32_armv5(opus_int32 in32)
206*a58d3d2aSXin Li {
207*a58d3d2aSXin Li int res;
208*a58d3d2aSXin Li __asm__(
209*a58d3d2aSXin Li "#silk_CLZ32\n\t"
210*a58d3d2aSXin Li "clz %0, %1\n\t"
211*a58d3d2aSXin Li : "=r"(res)
212*a58d3d2aSXin Li : "r"(in32)
213*a58d3d2aSXin Li );
214*a58d3d2aSXin Li return res;
215*a58d3d2aSXin Li }
216*a58d3d2aSXin Li #define silk_CLZ32(in32) (silk_CLZ32_armv5(in32))
217*a58d3d2aSXin Li
218*a58d3d2aSXin Li #undef SAFE_SHL
219*a58d3d2aSXin Li
220*a58d3d2aSXin Li #endif /* SILK_MACROS_ARMv5E_H */
221