xref: /aosp_15_r20/external/tremolo/Tremolo/asm_arm.h (revision bda690e46497e1f65c5077173b9c548e6e0cd5a1)
1*bda690e4SXin Li /************************************************************************
2*bda690e4SXin Li  * Copyright (C) 2002-2009, Xiph.org Foundation
3*bda690e4SXin Li  * Copyright (C) 2010, Robin Watts for Pinknoise Productions Ltd
4*bda690e4SXin Li  * All rights reserved.
5*bda690e4SXin Li  *
6*bda690e4SXin Li  * Redistribution and use in source and binary forms, with or without
7*bda690e4SXin Li  * modification, are permitted provided that the following conditions
8*bda690e4SXin Li  * are met:
9*bda690e4SXin Li  *
10*bda690e4SXin Li  *     * Redistributions of source code must retain the above copyright
11*bda690e4SXin Li  * notice, this list of conditions and the following disclaimer.
12*bda690e4SXin Li  *     * Redistributions in binary form must reproduce the above
13*bda690e4SXin Li  * copyright notice, this list of conditions and the following disclaimer
14*bda690e4SXin Li  * in the documentation and/or other materials provided with the
15*bda690e4SXin Li  * distribution.
16*bda690e4SXin Li  *     * Neither the names of the Xiph.org Foundation nor Pinknoise
17*bda690e4SXin Li  * Productions Ltd nor the names of its contributors may be used to
18*bda690e4SXin Li  * endorse or promote products derived from this software without
19*bda690e4SXin Li  * specific prior written permission.
20*bda690e4SXin Li  *
21*bda690e4SXin Li  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22*bda690e4SXin Li  * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23*bda690e4SXin Li  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
24*bda690e4SXin Li  * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
25*bda690e4SXin Li  * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
26*bda690e4SXin Li  * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
27*bda690e4SXin Li  * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
28*bda690e4SXin Li  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
29*bda690e4SXin Li  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
30*bda690e4SXin Li  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
31*bda690e4SXin Li  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
32*bda690e4SXin Li  ************************************************************************
33*bda690e4SXin Li 
34*bda690e4SXin Li  function: arm7 and later wide math functions
35*bda690e4SXin Li 
36*bda690e4SXin Li  ************************************************************************/
37*bda690e4SXin Li 
38*bda690e4SXin Li #ifdef _ARM_ASSEM_
39*bda690e4SXin Li 
40*bda690e4SXin Li #if !defined(_V_WIDE_MATH) && !defined(_LOW_ACCURACY_)
41*bda690e4SXin Li #define _V_WIDE_MATH
42*bda690e4SXin Li 
MULT32(ogg_int32_t x,ogg_int32_t y)43*bda690e4SXin Li static inline ogg_int32_t MULT32(ogg_int32_t x, ogg_int32_t y) {
44*bda690e4SXin Li   int lo,hi;
45*bda690e4SXin Li   asm volatile("smull\t%0, %1, %2, %3"
46*bda690e4SXin Li                : "=&r"(lo),"=&r"(hi)
47*bda690e4SXin Li                : "%r"(x),"r"(y)
48*bda690e4SXin Li 	       : "cc");
49*bda690e4SXin Li   return(hi);
50*bda690e4SXin Li }
51*bda690e4SXin Li 
MULT31(ogg_int32_t x,ogg_int32_t y)52*bda690e4SXin Li static inline ogg_int32_t MULT31(ogg_int32_t x, ogg_int32_t y) {
53*bda690e4SXin Li   return MULT32(x,y)<<1;
54*bda690e4SXin Li }
55*bda690e4SXin Li 
MULT31_SHIFT15(ogg_int32_t x,ogg_int32_t y)56*bda690e4SXin Li static inline ogg_int32_t MULT31_SHIFT15(ogg_int32_t x, ogg_int32_t y) {
57*bda690e4SXin Li   int lo,hi;
58*bda690e4SXin Li   asm volatile("smull	%0, %1, %2, %3\n\t"
59*bda690e4SXin Li 	       "movs	%0, %0, lsr #15\n\t"
60*bda690e4SXin Li 	       "adc	%1, %0, %1, lsl #17\n\t"
61*bda690e4SXin Li                : "=&r"(lo),"=&r"(hi)
62*bda690e4SXin Li                : "%r"(x),"r"(y)
63*bda690e4SXin Li 	       : "cc");
64*bda690e4SXin Li   return(hi);
65*bda690e4SXin Li }
66*bda690e4SXin Li 
67*bda690e4SXin Li #define MB() asm volatile ("" : : : "memory")
68*bda690e4SXin Li 
XPROD32(ogg_int32_t a,ogg_int32_t b,ogg_int32_t t,ogg_int32_t v,ogg_int32_t * x,ogg_int32_t * y)69*bda690e4SXin Li static inline void XPROD32(ogg_int32_t  a, ogg_int32_t  b,
70*bda690e4SXin Li 			   ogg_int32_t  t, ogg_int32_t  v,
71*bda690e4SXin Li 			   ogg_int32_t *x, ogg_int32_t *y)
72*bda690e4SXin Li {
73*bda690e4SXin Li   int x1, y1, l;
74*bda690e4SXin Li   asm(	"smull	%0, %1, %4, %6\n\t"
75*bda690e4SXin Li 	"smlal	%0, %1, %5, %7\n\t"
76*bda690e4SXin Li 	"rsb	%3, %4, #0\n\t"
77*bda690e4SXin Li 	"smull	%0, %2, %5, %6\n\t"
78*bda690e4SXin Li 	"smlal	%0, %2, %3, %7"
79*bda690e4SXin Li 	: "=&r" (l), "=&r" (x1), "=&r" (y1), "=r" (a)
80*bda690e4SXin Li 	: "3" (a), "r" (b), "r" (t), "r" (v)
81*bda690e4SXin Li 	: "cc" );
82*bda690e4SXin Li   *x = x1;
83*bda690e4SXin Li   MB();
84*bda690e4SXin Li   *y = y1;
85*bda690e4SXin Li }
86*bda690e4SXin Li 
87*bda690e4SXin Li /* x = (a*t + b*v)>>31,    y = (b*t - a*v)>>31 */
XPROD31(ogg_int32_t a,ogg_int32_t b,ogg_int32_t t,ogg_int32_t v,ogg_int32_t * x,ogg_int32_t * y)88*bda690e4SXin Li static inline void XPROD31(ogg_int32_t  a, ogg_int32_t  b,
89*bda690e4SXin Li 			   ogg_int32_t  t, ogg_int32_t  v,
90*bda690e4SXin Li 			   ogg_int32_t *x, ogg_int32_t *y)
91*bda690e4SXin Li {
92*bda690e4SXin Li   int x1, y1, l;
93*bda690e4SXin Li   asm(	"smull	%0, %1, %4, %6\n\t"
94*bda690e4SXin Li 	"smlal	%0, %1, %5, %7\n\t"
95*bda690e4SXin Li 	"rsb	%3, %4, #0\n\t"
96*bda690e4SXin Li 	"smull	%0, %2, %5, %6\n\t"
97*bda690e4SXin Li 	"smlal	%0, %2, %3, %7"
98*bda690e4SXin Li 	: "=&r" (l), "=&r" (x1), "=&r" (y1), "=r" (a)
99*bda690e4SXin Li 	: "3" (a), "r" (b), "r" (t), "r" (v)
100*bda690e4SXin Li 	: "cc" );
101*bda690e4SXin Li   *x = x1 << 1;
102*bda690e4SXin Li   MB();
103*bda690e4SXin Li   *y = y1 << 1;
104*bda690e4SXin Li }
105*bda690e4SXin Li 
106*bda690e4SXin Li /* x = (a*t - b*v)>>31,     y = (b*t + a*v)>>31 */
XNPROD31(ogg_int32_t a,ogg_int32_t b,ogg_int32_t t,ogg_int32_t v,ogg_int32_t * x,ogg_int32_t * y)107*bda690e4SXin Li static inline void XNPROD31(ogg_int32_t  a, ogg_int32_t  b,
108*bda690e4SXin Li 			    ogg_int32_t  t, ogg_int32_t  v,
109*bda690e4SXin Li 			    ogg_int32_t *x, ogg_int32_t *y)
110*bda690e4SXin Li {
111*bda690e4SXin Li   int x1, y1, l;
112*bda690e4SXin Li   asm(	"rsb	%2, %4, #0\n\t"
113*bda690e4SXin Li 	"smull	%0, %1, %3, %5\n\t"
114*bda690e4SXin Li 	"smlal	%0, %1, %2, %6\n\t"
115*bda690e4SXin Li 	"smull	%0, %2, %4, %5\n\t"
116*bda690e4SXin Li 	"smlal	%0, %2, %3, %6"
117*bda690e4SXin Li 	: "=&r" (l), "=&r" (x1), "=&r" (y1)
118*bda690e4SXin Li 	: "r" (a), "r" (b), "r" (t), "r" (v)
119*bda690e4SXin Li 	: "cc" );
120*bda690e4SXin Li   *x = x1 << 1;
121*bda690e4SXin Li   MB();
122*bda690e4SXin Li   *y = y1 << 1;
123*bda690e4SXin Li }
124*bda690e4SXin Li 
125*bda690e4SXin Li #endif
126*bda690e4SXin Li 
127*bda690e4SXin Li #ifndef _V_CLIP_MATH
128*bda690e4SXin Li #define _V_CLIP_MATH
129*bda690e4SXin Li 
CLIP_TO_15(ogg_int32_t x)130*bda690e4SXin Li static inline ogg_int32_t CLIP_TO_15(ogg_int32_t x) {
131*bda690e4SXin Li   int tmp;
132*bda690e4SXin Li   asm volatile("subs	%1, %0, #32768\n\t"
133*bda690e4SXin Li 	       "movpl	%0, #0x7f00\n\t"
134*bda690e4SXin Li 	       "orrpl	%0, %0, #0xff\n"
135*bda690e4SXin Li 	       "adds	%1, %0, #32768\n\t"
136*bda690e4SXin Li 	       "movmi	%0, #0x8000"
137*bda690e4SXin Li 	       : "+r"(x),"=r"(tmp)
138*bda690e4SXin Li 	       :
139*bda690e4SXin Li 	       : "cc");
140*bda690e4SXin Li   return(x);
141*bda690e4SXin Li }
142*bda690e4SXin Li 
143*bda690e4SXin Li #endif
144*bda690e4SXin Li 
145*bda690e4SXin Li #ifndef _V_LSP_MATH_ASM
146*bda690e4SXin Li #define _V_LSP_MATH_ASM
147*bda690e4SXin Li 
lsp_loop_asm(ogg_uint32_t * qip,ogg_uint32_t * pip,ogg_int32_t * qexpp,ogg_int32_t * ilsp,ogg_int32_t wi,ogg_int32_t m)148*bda690e4SXin Li static inline void lsp_loop_asm(ogg_uint32_t *qip,ogg_uint32_t *pip,
149*bda690e4SXin Li 				ogg_int32_t *qexpp,
150*bda690e4SXin Li 				ogg_int32_t *ilsp,ogg_int32_t wi,
151*bda690e4SXin Li 				ogg_int32_t m){
152*bda690e4SXin Li 
153*bda690e4SXin Li   ogg_uint32_t qi=*qip,pi=*pip;
154*bda690e4SXin Li   ogg_int32_t qexp=*qexpp;
155*bda690e4SXin Li 
156*bda690e4SXin Li   asm("mov     r0,%3;"
157*bda690e4SXin Li       "mov     r1,%5,asr#1;"
158*bda690e4SXin Li       "add     r0,r0,r1,lsl#3;"
159*bda690e4SXin Li       "1:"
160*bda690e4SXin Li 
161*bda690e4SXin Li       "ldmdb   r0!,{r1,r3};"
162*bda690e4SXin Li       "subs    r1,r1,%4;"          //ilsp[j]-wi
163*bda690e4SXin Li       "rsbmi   r1,r1,#0;"          //labs(ilsp[j]-wi)
164*bda690e4SXin Li       "umull   %0,r2,r1,%0;"       //qi*=labs(ilsp[j]-wi)
165*bda690e4SXin Li 
166*bda690e4SXin Li       "subs    r1,r3,%4;"          //ilsp[j+1]-wi
167*bda690e4SXin Li       "rsbmi   r1,r1,#0;"          //labs(ilsp[j+1]-wi)
168*bda690e4SXin Li       "umull   %1,r3,r1,%1;"       //pi*=labs(ilsp[j+1]-wi)
169*bda690e4SXin Li 
170*bda690e4SXin Li       "cmn     r2,r3;"             // shift down 16?
171*bda690e4SXin Li       "beq     0f;"
172*bda690e4SXin Li       "add     %2,%2,#16;"
173*bda690e4SXin Li       "mov     %0,%0,lsr #16;"
174*bda690e4SXin Li       "orr     %0,%0,r2,lsl #16;"
175*bda690e4SXin Li       "mov     %1,%1,lsr #16;"
176*bda690e4SXin Li       "orr     %1,%1,r3,lsl #16;"
177*bda690e4SXin Li       "0:"
178*bda690e4SXin Li       "cmp     r0,%3;\n"
179*bda690e4SXin Li       "bhi     1b;\n"
180*bda690e4SXin Li 
181*bda690e4SXin Li       // odd filter assymetry
182*bda690e4SXin Li       "ands    r0,%5,#1;\n"
183*bda690e4SXin Li       "beq     2f;\n"
184*bda690e4SXin Li       "add     r0,%3,%5,lsl#2;\n"
185*bda690e4SXin Li 
186*bda690e4SXin Li       "ldr     r1,[r0,#-4];\n"
187*bda690e4SXin Li       "mov     r0,#0x4000;\n"
188*bda690e4SXin Li 
189*bda690e4SXin Li       "subs    r1,r1,%4;\n"          //ilsp[j]-wi
190*bda690e4SXin Li       "rsbmi   r1,r1,#0;\n"          //labs(ilsp[j]-wi)
191*bda690e4SXin Li       "umull   %0,r2,r1,%0;\n"       //qi*=labs(ilsp[j]-wi)
192*bda690e4SXin Li       "umull   %1,r3,r0,%1;\n"       //pi*=labs(ilsp[j+1]-wi)
193*bda690e4SXin Li 
194*bda690e4SXin Li       "cmn     r2,r3;\n"             // shift down 16?
195*bda690e4SXin Li       "beq     2f;\n"
196*bda690e4SXin Li       "add     %2,%2,#16;\n"
197*bda690e4SXin Li       "mov     %0,%0,lsr #16;\n"
198*bda690e4SXin Li       "orr     %0,%0,r2,lsl #16;\n"
199*bda690e4SXin Li       "mov     %1,%1,lsr #16;\n"
200*bda690e4SXin Li       "orr     %1,%1,r3,lsl #16;\n"
201*bda690e4SXin Li 
202*bda690e4SXin Li       //qi=(pi>>shift)*labs(ilsp[j]-wi);
203*bda690e4SXin Li       //pi=(qi>>shift)*labs(ilsp[j+1]-wi);
204*bda690e4SXin Li       //qexp+=shift;
205*bda690e4SXin Li 
206*bda690e4SXin Li       //}
207*bda690e4SXin Li 
208*bda690e4SXin Li       /* normalize to max 16 sig figs */
209*bda690e4SXin Li       "2:"
210*bda690e4SXin Li       "mov     r2,#0;"
211*bda690e4SXin Li       "orr     r1,%0,%1;"
212*bda690e4SXin Li       "tst     r1,#0xff000000;"
213*bda690e4SXin Li       "addne   r2,r2,#8;"
214*bda690e4SXin Li       "movne   r1,r1,lsr #8;"
215*bda690e4SXin Li       "tst     r1,#0x00f00000;"
216*bda690e4SXin Li       "addne   r2,r2,#4;"
217*bda690e4SXin Li       "movne   r1,r1,lsr #4;"
218*bda690e4SXin Li       "tst     r1,#0x000c0000;"
219*bda690e4SXin Li       "addne   r2,r2,#2;"
220*bda690e4SXin Li       "movne   r1,r1,lsr #2;"
221*bda690e4SXin Li       "tst     r1,#0x00020000;"
222*bda690e4SXin Li       "addne   r2,r2,#1;"
223*bda690e4SXin Li       "movne   r1,r1,lsr #1;"
224*bda690e4SXin Li       "tst     r1,#0x00010000;"
225*bda690e4SXin Li       "addne   r2,r2,#1;"
226*bda690e4SXin Li       "mov     %0,%0,lsr r2;"
227*bda690e4SXin Li       "mov     %1,%1,lsr r2;"
228*bda690e4SXin Li       "add     %2,%2,r2;"
229*bda690e4SXin Li 
230*bda690e4SXin Li       : "+r"(qi),"+r"(pi),"+r"(qexp)
231*bda690e4SXin Li       : "r"(ilsp),"r"(wi),"r"(m)
232*bda690e4SXin Li       : "r0","r1","r2","r3","cc");
233*bda690e4SXin Li 
234*bda690e4SXin Li   *qip=qi;
235*bda690e4SXin Li   *pip=pi;
236*bda690e4SXin Li   *qexpp=qexp;
237*bda690e4SXin Li }
238*bda690e4SXin Li 
lsp_norm_asm(ogg_uint32_t * qip,ogg_int32_t * qexpp)239*bda690e4SXin Li static inline void lsp_norm_asm(ogg_uint32_t *qip,ogg_int32_t *qexpp){
240*bda690e4SXin Li 
241*bda690e4SXin Li   ogg_uint32_t qi=*qip;
242*bda690e4SXin Li   ogg_int32_t qexp=*qexpp;
243*bda690e4SXin Li 
244*bda690e4SXin Li   asm("tst     %0,#0x0000ff00;"
245*bda690e4SXin Li       "moveq   %0,%0,lsl #8;"
246*bda690e4SXin Li       "subeq   %1,%1,#8;"
247*bda690e4SXin Li       "tst     %0,#0x0000f000;"
248*bda690e4SXin Li       "moveq   %0,%0,lsl #4;"
249*bda690e4SXin Li       "subeq   %1,%1,#4;"
250*bda690e4SXin Li       "tst     %0,#0x0000c000;"
251*bda690e4SXin Li       "moveq   %0,%0,lsl #2;"
252*bda690e4SXin Li       "subeq   %1,%1,#2;"
253*bda690e4SXin Li       "tst     %0,#0x00008000;"
254*bda690e4SXin Li       "moveq   %0,%0,lsl #1;"
255*bda690e4SXin Li       "subeq   %1,%1,#1;"
256*bda690e4SXin Li       : "+r"(qi),"+r"(qexp)
257*bda690e4SXin Li       :
258*bda690e4SXin Li       : "cc");
259*bda690e4SXin Li   *qip=qi;
260*bda690e4SXin Li   *qexpp=qexp;
261*bda690e4SXin Li }
262*bda690e4SXin Li 
263*bda690e4SXin Li #endif
264*bda690e4SXin Li #endif
265*bda690e4SXin Li 
266