1*bda690e4SXin Li /************************************************************************
2*bda690e4SXin Li * Copyright (C) 2002-2009, Xiph.org Foundation
3*bda690e4SXin Li * Copyright (C) 2010, Robin Watts for Pinknoise Productions Ltd
4*bda690e4SXin Li * All rights reserved.
5*bda690e4SXin Li *
6*bda690e4SXin Li * Redistribution and use in source and binary forms, with or without
7*bda690e4SXin Li * modification, are permitted provided that the following conditions
8*bda690e4SXin Li * are met:
9*bda690e4SXin Li *
10*bda690e4SXin Li * * Redistributions of source code must retain the above copyright
11*bda690e4SXin Li * notice, this list of conditions and the following disclaimer.
12*bda690e4SXin Li * * Redistributions in binary form must reproduce the above
13*bda690e4SXin Li * copyright notice, this list of conditions and the following disclaimer
14*bda690e4SXin Li * in the documentation and/or other materials provided with the
15*bda690e4SXin Li * distribution.
16*bda690e4SXin Li * * Neither the names of the Xiph.org Foundation nor Pinknoise
17*bda690e4SXin Li * Productions Ltd nor the names of its contributors may be used to
18*bda690e4SXin Li * endorse or promote products derived from this software without
19*bda690e4SXin Li * specific prior written permission.
20*bda690e4SXin Li *
21*bda690e4SXin Li * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22*bda690e4SXin Li * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23*bda690e4SXin Li * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
24*bda690e4SXin Li * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
25*bda690e4SXin Li * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
26*bda690e4SXin Li * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
27*bda690e4SXin Li * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
28*bda690e4SXin Li * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
29*bda690e4SXin Li * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
30*bda690e4SXin Li * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
31*bda690e4SXin Li * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
32*bda690e4SXin Li ************************************************************************
33*bda690e4SXin Li
34*bda690e4SXin Li function: arm7 and later wide math functions
35*bda690e4SXin Li
36*bda690e4SXin Li ************************************************************************/
37*bda690e4SXin Li
38*bda690e4SXin Li #ifdef _ARM_ASSEM_
39*bda690e4SXin Li
40*bda690e4SXin Li #if !defined(_V_WIDE_MATH) && !defined(_LOW_ACCURACY_)
41*bda690e4SXin Li #define _V_WIDE_MATH
42*bda690e4SXin Li
MULT32(ogg_int32_t x,ogg_int32_t y)43*bda690e4SXin Li static inline ogg_int32_t MULT32(ogg_int32_t x, ogg_int32_t y) {
44*bda690e4SXin Li int lo,hi;
45*bda690e4SXin Li asm volatile("smull\t%0, %1, %2, %3"
46*bda690e4SXin Li : "=&r"(lo),"=&r"(hi)
47*bda690e4SXin Li : "%r"(x),"r"(y)
48*bda690e4SXin Li : "cc");
49*bda690e4SXin Li return(hi);
50*bda690e4SXin Li }
51*bda690e4SXin Li
MULT31(ogg_int32_t x,ogg_int32_t y)52*bda690e4SXin Li static inline ogg_int32_t MULT31(ogg_int32_t x, ogg_int32_t y) {
53*bda690e4SXin Li return MULT32(x,y)<<1;
54*bda690e4SXin Li }
55*bda690e4SXin Li
MULT31_SHIFT15(ogg_int32_t x,ogg_int32_t y)56*bda690e4SXin Li static inline ogg_int32_t MULT31_SHIFT15(ogg_int32_t x, ogg_int32_t y) {
57*bda690e4SXin Li int lo,hi;
58*bda690e4SXin Li asm volatile("smull %0, %1, %2, %3\n\t"
59*bda690e4SXin Li "movs %0, %0, lsr #15\n\t"
60*bda690e4SXin Li "adc %1, %0, %1, lsl #17\n\t"
61*bda690e4SXin Li : "=&r"(lo),"=&r"(hi)
62*bda690e4SXin Li : "%r"(x),"r"(y)
63*bda690e4SXin Li : "cc");
64*bda690e4SXin Li return(hi);
65*bda690e4SXin Li }
66*bda690e4SXin Li
67*bda690e4SXin Li #define MB() asm volatile ("" : : : "memory")
68*bda690e4SXin Li
XPROD32(ogg_int32_t a,ogg_int32_t b,ogg_int32_t t,ogg_int32_t v,ogg_int32_t * x,ogg_int32_t * y)69*bda690e4SXin Li static inline void XPROD32(ogg_int32_t a, ogg_int32_t b,
70*bda690e4SXin Li ogg_int32_t t, ogg_int32_t v,
71*bda690e4SXin Li ogg_int32_t *x, ogg_int32_t *y)
72*bda690e4SXin Li {
73*bda690e4SXin Li int x1, y1, l;
74*bda690e4SXin Li asm( "smull %0, %1, %4, %6\n\t"
75*bda690e4SXin Li "smlal %0, %1, %5, %7\n\t"
76*bda690e4SXin Li "rsb %3, %4, #0\n\t"
77*bda690e4SXin Li "smull %0, %2, %5, %6\n\t"
78*bda690e4SXin Li "smlal %0, %2, %3, %7"
79*bda690e4SXin Li : "=&r" (l), "=&r" (x1), "=&r" (y1), "=r" (a)
80*bda690e4SXin Li : "3" (a), "r" (b), "r" (t), "r" (v)
81*bda690e4SXin Li : "cc" );
82*bda690e4SXin Li *x = x1;
83*bda690e4SXin Li MB();
84*bda690e4SXin Li *y = y1;
85*bda690e4SXin Li }
86*bda690e4SXin Li
87*bda690e4SXin Li /* x = (a*t + b*v)>>31, y = (b*t - a*v)>>31 */
XPROD31(ogg_int32_t a,ogg_int32_t b,ogg_int32_t t,ogg_int32_t v,ogg_int32_t * x,ogg_int32_t * y)88*bda690e4SXin Li static inline void XPROD31(ogg_int32_t a, ogg_int32_t b,
89*bda690e4SXin Li ogg_int32_t t, ogg_int32_t v,
90*bda690e4SXin Li ogg_int32_t *x, ogg_int32_t *y)
91*bda690e4SXin Li {
92*bda690e4SXin Li int x1, y1, l;
93*bda690e4SXin Li asm( "smull %0, %1, %4, %6\n\t"
94*bda690e4SXin Li "smlal %0, %1, %5, %7\n\t"
95*bda690e4SXin Li "rsb %3, %4, #0\n\t"
96*bda690e4SXin Li "smull %0, %2, %5, %6\n\t"
97*bda690e4SXin Li "smlal %0, %2, %3, %7"
98*bda690e4SXin Li : "=&r" (l), "=&r" (x1), "=&r" (y1), "=r" (a)
99*bda690e4SXin Li : "3" (a), "r" (b), "r" (t), "r" (v)
100*bda690e4SXin Li : "cc" );
101*bda690e4SXin Li *x = x1 << 1;
102*bda690e4SXin Li MB();
103*bda690e4SXin Li *y = y1 << 1;
104*bda690e4SXin Li }
105*bda690e4SXin Li
106*bda690e4SXin Li /* x = (a*t - b*v)>>31, y = (b*t + a*v)>>31 */
XNPROD31(ogg_int32_t a,ogg_int32_t b,ogg_int32_t t,ogg_int32_t v,ogg_int32_t * x,ogg_int32_t * y)107*bda690e4SXin Li static inline void XNPROD31(ogg_int32_t a, ogg_int32_t b,
108*bda690e4SXin Li ogg_int32_t t, ogg_int32_t v,
109*bda690e4SXin Li ogg_int32_t *x, ogg_int32_t *y)
110*bda690e4SXin Li {
111*bda690e4SXin Li int x1, y1, l;
112*bda690e4SXin Li asm( "rsb %2, %4, #0\n\t"
113*bda690e4SXin Li "smull %0, %1, %3, %5\n\t"
114*bda690e4SXin Li "smlal %0, %1, %2, %6\n\t"
115*bda690e4SXin Li "smull %0, %2, %4, %5\n\t"
116*bda690e4SXin Li "smlal %0, %2, %3, %6"
117*bda690e4SXin Li : "=&r" (l), "=&r" (x1), "=&r" (y1)
118*bda690e4SXin Li : "r" (a), "r" (b), "r" (t), "r" (v)
119*bda690e4SXin Li : "cc" );
120*bda690e4SXin Li *x = x1 << 1;
121*bda690e4SXin Li MB();
122*bda690e4SXin Li *y = y1 << 1;
123*bda690e4SXin Li }
124*bda690e4SXin Li
125*bda690e4SXin Li #endif
126*bda690e4SXin Li
127*bda690e4SXin Li #ifndef _V_CLIP_MATH
128*bda690e4SXin Li #define _V_CLIP_MATH
129*bda690e4SXin Li
CLIP_TO_15(ogg_int32_t x)130*bda690e4SXin Li static inline ogg_int32_t CLIP_TO_15(ogg_int32_t x) {
131*bda690e4SXin Li int tmp;
132*bda690e4SXin Li asm volatile("subs %1, %0, #32768\n\t"
133*bda690e4SXin Li "movpl %0, #0x7f00\n\t"
134*bda690e4SXin Li "orrpl %0, %0, #0xff\n"
135*bda690e4SXin Li "adds %1, %0, #32768\n\t"
136*bda690e4SXin Li "movmi %0, #0x8000"
137*bda690e4SXin Li : "+r"(x),"=r"(tmp)
138*bda690e4SXin Li :
139*bda690e4SXin Li : "cc");
140*bda690e4SXin Li return(x);
141*bda690e4SXin Li }
142*bda690e4SXin Li
143*bda690e4SXin Li #endif
144*bda690e4SXin Li
145*bda690e4SXin Li #ifndef _V_LSP_MATH_ASM
146*bda690e4SXin Li #define _V_LSP_MATH_ASM
147*bda690e4SXin Li
lsp_loop_asm(ogg_uint32_t * qip,ogg_uint32_t * pip,ogg_int32_t * qexpp,ogg_int32_t * ilsp,ogg_int32_t wi,ogg_int32_t m)148*bda690e4SXin Li static inline void lsp_loop_asm(ogg_uint32_t *qip,ogg_uint32_t *pip,
149*bda690e4SXin Li ogg_int32_t *qexpp,
150*bda690e4SXin Li ogg_int32_t *ilsp,ogg_int32_t wi,
151*bda690e4SXin Li ogg_int32_t m){
152*bda690e4SXin Li
153*bda690e4SXin Li ogg_uint32_t qi=*qip,pi=*pip;
154*bda690e4SXin Li ogg_int32_t qexp=*qexpp;
155*bda690e4SXin Li
156*bda690e4SXin Li asm("mov r0,%3;"
157*bda690e4SXin Li "mov r1,%5,asr#1;"
158*bda690e4SXin Li "add r0,r0,r1,lsl#3;"
159*bda690e4SXin Li "1:"
160*bda690e4SXin Li
161*bda690e4SXin Li "ldmdb r0!,{r1,r3};"
162*bda690e4SXin Li "subs r1,r1,%4;" //ilsp[j]-wi
163*bda690e4SXin Li "rsbmi r1,r1,#0;" //labs(ilsp[j]-wi)
164*bda690e4SXin Li "umull %0,r2,r1,%0;" //qi*=labs(ilsp[j]-wi)
165*bda690e4SXin Li
166*bda690e4SXin Li "subs r1,r3,%4;" //ilsp[j+1]-wi
167*bda690e4SXin Li "rsbmi r1,r1,#0;" //labs(ilsp[j+1]-wi)
168*bda690e4SXin Li "umull %1,r3,r1,%1;" //pi*=labs(ilsp[j+1]-wi)
169*bda690e4SXin Li
170*bda690e4SXin Li "cmn r2,r3;" // shift down 16?
171*bda690e4SXin Li "beq 0f;"
172*bda690e4SXin Li "add %2,%2,#16;"
173*bda690e4SXin Li "mov %0,%0,lsr #16;"
174*bda690e4SXin Li "orr %0,%0,r2,lsl #16;"
175*bda690e4SXin Li "mov %1,%1,lsr #16;"
176*bda690e4SXin Li "orr %1,%1,r3,lsl #16;"
177*bda690e4SXin Li "0:"
178*bda690e4SXin Li "cmp r0,%3;\n"
179*bda690e4SXin Li "bhi 1b;\n"
180*bda690e4SXin Li
181*bda690e4SXin Li // odd filter assymetry
182*bda690e4SXin Li "ands r0,%5,#1;\n"
183*bda690e4SXin Li "beq 2f;\n"
184*bda690e4SXin Li "add r0,%3,%5,lsl#2;\n"
185*bda690e4SXin Li
186*bda690e4SXin Li "ldr r1,[r0,#-4];\n"
187*bda690e4SXin Li "mov r0,#0x4000;\n"
188*bda690e4SXin Li
189*bda690e4SXin Li "subs r1,r1,%4;\n" //ilsp[j]-wi
190*bda690e4SXin Li "rsbmi r1,r1,#0;\n" //labs(ilsp[j]-wi)
191*bda690e4SXin Li "umull %0,r2,r1,%0;\n" //qi*=labs(ilsp[j]-wi)
192*bda690e4SXin Li "umull %1,r3,r0,%1;\n" //pi*=labs(ilsp[j+1]-wi)
193*bda690e4SXin Li
194*bda690e4SXin Li "cmn r2,r3;\n" // shift down 16?
195*bda690e4SXin Li "beq 2f;\n"
196*bda690e4SXin Li "add %2,%2,#16;\n"
197*bda690e4SXin Li "mov %0,%0,lsr #16;\n"
198*bda690e4SXin Li "orr %0,%0,r2,lsl #16;\n"
199*bda690e4SXin Li "mov %1,%1,lsr #16;\n"
200*bda690e4SXin Li "orr %1,%1,r3,lsl #16;\n"
201*bda690e4SXin Li
202*bda690e4SXin Li //qi=(pi>>shift)*labs(ilsp[j]-wi);
203*bda690e4SXin Li //pi=(qi>>shift)*labs(ilsp[j+1]-wi);
204*bda690e4SXin Li //qexp+=shift;
205*bda690e4SXin Li
206*bda690e4SXin Li //}
207*bda690e4SXin Li
208*bda690e4SXin Li /* normalize to max 16 sig figs */
209*bda690e4SXin Li "2:"
210*bda690e4SXin Li "mov r2,#0;"
211*bda690e4SXin Li "orr r1,%0,%1;"
212*bda690e4SXin Li "tst r1,#0xff000000;"
213*bda690e4SXin Li "addne r2,r2,#8;"
214*bda690e4SXin Li "movne r1,r1,lsr #8;"
215*bda690e4SXin Li "tst r1,#0x00f00000;"
216*bda690e4SXin Li "addne r2,r2,#4;"
217*bda690e4SXin Li "movne r1,r1,lsr #4;"
218*bda690e4SXin Li "tst r1,#0x000c0000;"
219*bda690e4SXin Li "addne r2,r2,#2;"
220*bda690e4SXin Li "movne r1,r1,lsr #2;"
221*bda690e4SXin Li "tst r1,#0x00020000;"
222*bda690e4SXin Li "addne r2,r2,#1;"
223*bda690e4SXin Li "movne r1,r1,lsr #1;"
224*bda690e4SXin Li "tst r1,#0x00010000;"
225*bda690e4SXin Li "addne r2,r2,#1;"
226*bda690e4SXin Li "mov %0,%0,lsr r2;"
227*bda690e4SXin Li "mov %1,%1,lsr r2;"
228*bda690e4SXin Li "add %2,%2,r2;"
229*bda690e4SXin Li
230*bda690e4SXin Li : "+r"(qi),"+r"(pi),"+r"(qexp)
231*bda690e4SXin Li : "r"(ilsp),"r"(wi),"r"(m)
232*bda690e4SXin Li : "r0","r1","r2","r3","cc");
233*bda690e4SXin Li
234*bda690e4SXin Li *qip=qi;
235*bda690e4SXin Li *pip=pi;
236*bda690e4SXin Li *qexpp=qexp;
237*bda690e4SXin Li }
238*bda690e4SXin Li
lsp_norm_asm(ogg_uint32_t * qip,ogg_int32_t * qexpp)239*bda690e4SXin Li static inline void lsp_norm_asm(ogg_uint32_t *qip,ogg_int32_t *qexpp){
240*bda690e4SXin Li
241*bda690e4SXin Li ogg_uint32_t qi=*qip;
242*bda690e4SXin Li ogg_int32_t qexp=*qexpp;
243*bda690e4SXin Li
244*bda690e4SXin Li asm("tst %0,#0x0000ff00;"
245*bda690e4SXin Li "moveq %0,%0,lsl #8;"
246*bda690e4SXin Li "subeq %1,%1,#8;"
247*bda690e4SXin Li "tst %0,#0x0000f000;"
248*bda690e4SXin Li "moveq %0,%0,lsl #4;"
249*bda690e4SXin Li "subeq %1,%1,#4;"
250*bda690e4SXin Li "tst %0,#0x0000c000;"
251*bda690e4SXin Li "moveq %0,%0,lsl #2;"
252*bda690e4SXin Li "subeq %1,%1,#2;"
253*bda690e4SXin Li "tst %0,#0x00008000;"
254*bda690e4SXin Li "moveq %0,%0,lsl #1;"
255*bda690e4SXin Li "subeq %1,%1,#1;"
256*bda690e4SXin Li : "+r"(qi),"+r"(qexp)
257*bda690e4SXin Li :
258*bda690e4SXin Li : "cc");
259*bda690e4SXin Li *qip=qi;
260*bda690e4SXin Li *qexpp=qexp;
261*bda690e4SXin Li }
262*bda690e4SXin Li
263*bda690e4SXin Li #endif
264*bda690e4SXin Li #endif
265*bda690e4SXin Li
266