xref: /aosp_15_r20/external/libdav1d/src/arm/32/msac.S (revision c09093415860a1c2373dacd84c4fde00c507cdfd)
1*c0909341SAndroid Build Coastguard Worker/*
2*c0909341SAndroid Build Coastguard Worker * Copyright © 2019, VideoLAN and dav1d authors
3*c0909341SAndroid Build Coastguard Worker * Copyright © 2020, Martin Storsjo
4*c0909341SAndroid Build Coastguard Worker * All rights reserved.
5*c0909341SAndroid Build Coastguard Worker *
6*c0909341SAndroid Build Coastguard Worker * Redistribution and use in source and binary forms, with or without
7*c0909341SAndroid Build Coastguard Worker * modification, are permitted provided that the following conditions are met:
8*c0909341SAndroid Build Coastguard Worker *
9*c0909341SAndroid Build Coastguard Worker * 1. Redistributions of source code must retain the above copyright notice, this
10*c0909341SAndroid Build Coastguard Worker *    list of conditions and the following disclaimer.
11*c0909341SAndroid Build Coastguard Worker *
12*c0909341SAndroid Build Coastguard Worker * 2. Redistributions in binary form must reproduce the above copyright notice,
13*c0909341SAndroid Build Coastguard Worker *    this list of conditions and the following disclaimer in the documentation
14*c0909341SAndroid Build Coastguard Worker *    and/or other materials provided with the distribution.
15*c0909341SAndroid Build Coastguard Worker *
16*c0909341SAndroid Build Coastguard Worker * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
17*c0909341SAndroid Build Coastguard Worker * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
18*c0909341SAndroid Build Coastguard Worker * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
19*c0909341SAndroid Build Coastguard Worker * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
20*c0909341SAndroid Build Coastguard Worker * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
21*c0909341SAndroid Build Coastguard Worker * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
22*c0909341SAndroid Build Coastguard Worker * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
23*c0909341SAndroid Build Coastguard Worker * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
24*c0909341SAndroid Build Coastguard Worker * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
25*c0909341SAndroid Build Coastguard Worker * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26*c0909341SAndroid Build Coastguard Worker */
27*c0909341SAndroid Build Coastguard Worker
28*c0909341SAndroid Build Coastguard Worker#include "src/arm/asm.S"
29*c0909341SAndroid Build Coastguard Worker#include "util.S"
30*c0909341SAndroid Build Coastguard Worker
31*c0909341SAndroid Build Coastguard Worker#define BUF_POS 0
32*c0909341SAndroid Build Coastguard Worker#define BUF_END 4
33*c0909341SAndroid Build Coastguard Worker#define DIF 8
34*c0909341SAndroid Build Coastguard Worker#define RNG 12
35*c0909341SAndroid Build Coastguard Worker#define CNT 16
36*c0909341SAndroid Build Coastguard Worker#define ALLOW_UPDATE_CDF 20
37*c0909341SAndroid Build Coastguard Worker
38*c0909341SAndroid Build Coastguard Workerconst coeffs
39*c0909341SAndroid Build Coastguard Worker        .short 60, 56, 52, 48, 44, 40, 36, 32, 28, 24, 20, 16, 12, 8, 4, 0
40*c0909341SAndroid Build Coastguard Worker        .short 0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0, 0, 0
41*c0909341SAndroid Build Coastguard Workerendconst
42*c0909341SAndroid Build Coastguard Worker
43*c0909341SAndroid Build Coastguard Workerconst bits, align=4
44*c0909341SAndroid Build Coastguard Worker        .short   0x1,   0x2,   0x4,   0x8,   0x10,   0x20,   0x40,   0x80
45*c0909341SAndroid Build Coastguard Worker        .short 0x100, 0x200, 0x400, 0x800, 0x1000, 0x2000, 0x4000, 0x8000
46*c0909341SAndroid Build Coastguard Workerendconst
47*c0909341SAndroid Build Coastguard Worker
48*c0909341SAndroid Build Coastguard Worker.macro vld1_align_n d0, q0, q1, src, n
49*c0909341SAndroid Build Coastguard Worker.if \n == 4
50*c0909341SAndroid Build Coastguard Worker        vld1.16         {\d0},  [\src, :64]
51*c0909341SAndroid Build Coastguard Worker.elseif \n == 8
52*c0909341SAndroid Build Coastguard Worker        vld1.16         {\q0},  [\src, :128]
53*c0909341SAndroid Build Coastguard Worker.else
54*c0909341SAndroid Build Coastguard Worker        vld1.16         {\q0, \q1},  [\src, :128]
55*c0909341SAndroid Build Coastguard Worker.endif
56*c0909341SAndroid Build Coastguard Worker.endm
57*c0909341SAndroid Build Coastguard Worker
58*c0909341SAndroid Build Coastguard Worker.macro vld1_n d0, q0, q1, src, n
59*c0909341SAndroid Build Coastguard Worker.if \n == 4
60*c0909341SAndroid Build Coastguard Worker        vld1.16         {\d0},  [\src]
61*c0909341SAndroid Build Coastguard Worker.elseif \n == 8
62*c0909341SAndroid Build Coastguard Worker        vld1.16         {\q0},  [\src]
63*c0909341SAndroid Build Coastguard Worker.else
64*c0909341SAndroid Build Coastguard Worker        vld1.16         {\q0, \q1},  [\src]
65*c0909341SAndroid Build Coastguard Worker.endif
66*c0909341SAndroid Build Coastguard Worker.endm
67*c0909341SAndroid Build Coastguard Worker
68*c0909341SAndroid Build Coastguard Worker.macro vst1_align_n d0, q0, q1, src, n
69*c0909341SAndroid Build Coastguard Worker.if \n == 4
70*c0909341SAndroid Build Coastguard Worker        vst1.16         {\d0},  [\src, :64]
71*c0909341SAndroid Build Coastguard Worker.elseif \n == 8
72*c0909341SAndroid Build Coastguard Worker        vst1.16         {\q0},  [\src, :128]
73*c0909341SAndroid Build Coastguard Worker.else
74*c0909341SAndroid Build Coastguard Worker        vst1.16         {\q0, \q1},  [\src, :128]
75*c0909341SAndroid Build Coastguard Worker.endif
76*c0909341SAndroid Build Coastguard Worker.endm
77*c0909341SAndroid Build Coastguard Worker
78*c0909341SAndroid Build Coastguard Worker.macro vst1_n d0, q0, q1, src, n
79*c0909341SAndroid Build Coastguard Worker.if \n == 4
80*c0909341SAndroid Build Coastguard Worker        vst1.16         {\d0},  [\src]
81*c0909341SAndroid Build Coastguard Worker.elseif \n == 8
82*c0909341SAndroid Build Coastguard Worker        vst1.16         {\q0},  [\src]
83*c0909341SAndroid Build Coastguard Worker.else
84*c0909341SAndroid Build Coastguard Worker        vst1.16         {\q0, \q1},  [\src]
85*c0909341SAndroid Build Coastguard Worker.endif
86*c0909341SAndroid Build Coastguard Worker.endm
87*c0909341SAndroid Build Coastguard Worker
88*c0909341SAndroid Build Coastguard Worker.macro vshr_n d0, d1, d2, s0, s1, s2, s3, s4, s5, n
89*c0909341SAndroid Build Coastguard Worker.if \n == 4
90*c0909341SAndroid Build Coastguard Worker        vshr.u16        \d0,  \s0,  \s3
91*c0909341SAndroid Build Coastguard Worker.else
92*c0909341SAndroid Build Coastguard Worker        vshr.u16        \d1,  \s1,  \s4
93*c0909341SAndroid Build Coastguard Worker.if \n == 16
94*c0909341SAndroid Build Coastguard Worker        vshr.u16        \d2,  \s2,  \s5
95*c0909341SAndroid Build Coastguard Worker.endif
96*c0909341SAndroid Build Coastguard Worker.endif
97*c0909341SAndroid Build Coastguard Worker.endm
98*c0909341SAndroid Build Coastguard Worker
99*c0909341SAndroid Build Coastguard Worker.macro vadd_n d0, d1, d2, s0, s1, s2, s3, s4, s5, n
100*c0909341SAndroid Build Coastguard Worker.if \n == 4
101*c0909341SAndroid Build Coastguard Worker        vadd.i16        \d0,  \s0,  \s3
102*c0909341SAndroid Build Coastguard Worker.else
103*c0909341SAndroid Build Coastguard Worker        vadd.i16        \d1,  \s1,  \s4
104*c0909341SAndroid Build Coastguard Worker.if \n == 16
105*c0909341SAndroid Build Coastguard Worker        vadd.i16        \d2,  \s2,  \s5
106*c0909341SAndroid Build Coastguard Worker.endif
107*c0909341SAndroid Build Coastguard Worker.endif
108*c0909341SAndroid Build Coastguard Worker.endm
109*c0909341SAndroid Build Coastguard Worker
110*c0909341SAndroid Build Coastguard Worker.macro vsub_n d0, d1, d2, s0, s1, s2, s3, s4, s5, n
111*c0909341SAndroid Build Coastguard Worker.if \n == 4
112*c0909341SAndroid Build Coastguard Worker        vsub.i16        \d0,  \s0,  \s3
113*c0909341SAndroid Build Coastguard Worker.else
114*c0909341SAndroid Build Coastguard Worker        vsub.i16        \d1,  \s1,  \s4
115*c0909341SAndroid Build Coastguard Worker.if \n == 16
116*c0909341SAndroid Build Coastguard Worker        vsub.i16        \d2,  \s2,  \s5
117*c0909341SAndroid Build Coastguard Worker.endif
118*c0909341SAndroid Build Coastguard Worker.endif
119*c0909341SAndroid Build Coastguard Worker.endm
120*c0909341SAndroid Build Coastguard Worker
121*c0909341SAndroid Build Coastguard Worker.macro vand_n d0, d1, d2, s0, s1, s2, s3, s4, s5, n
122*c0909341SAndroid Build Coastguard Worker.if \n == 4
123*c0909341SAndroid Build Coastguard Worker        vand            \d0,  \s0,  \s3
124*c0909341SAndroid Build Coastguard Worker.else
125*c0909341SAndroid Build Coastguard Worker        vand            \d1,  \s1,  \s4
126*c0909341SAndroid Build Coastguard Worker.if \n == 16
127*c0909341SAndroid Build Coastguard Worker        vand            \d2,  \s2,  \s5
128*c0909341SAndroid Build Coastguard Worker.endif
129*c0909341SAndroid Build Coastguard Worker.endif
130*c0909341SAndroid Build Coastguard Worker.endm
131*c0909341SAndroid Build Coastguard Worker
132*c0909341SAndroid Build Coastguard Worker.macro vcge_n d0, d1, d2, s0, s1, s2, s3, s4, s5, n
133*c0909341SAndroid Build Coastguard Worker.if \n == 4
134*c0909341SAndroid Build Coastguard Worker        vcge.u16        \d0,  \s0,  \s3
135*c0909341SAndroid Build Coastguard Worker.else
136*c0909341SAndroid Build Coastguard Worker        vcge.u16        \d1,  \s1,  \s4
137*c0909341SAndroid Build Coastguard Worker.if \n == 16
138*c0909341SAndroid Build Coastguard Worker        vcge.u16        \d2,  \s2,  \s5
139*c0909341SAndroid Build Coastguard Worker.endif
140*c0909341SAndroid Build Coastguard Worker.endif
141*c0909341SAndroid Build Coastguard Worker.endm
142*c0909341SAndroid Build Coastguard Worker
143*c0909341SAndroid Build Coastguard Worker.macro vrhadd_n d0, d1, d2, s0, s1, s2, s3, s4, s5, n
144*c0909341SAndroid Build Coastguard Worker.if \n == 4
145*c0909341SAndroid Build Coastguard Worker        vrhadd.u16      \d0,  \s0,  \s3
146*c0909341SAndroid Build Coastguard Worker.else
147*c0909341SAndroid Build Coastguard Worker        vrhadd.u16      \d1,  \s1,  \s4
148*c0909341SAndroid Build Coastguard Worker.if \n == 16
149*c0909341SAndroid Build Coastguard Worker        vrhadd.u16      \d2,  \s2,  \s5
150*c0909341SAndroid Build Coastguard Worker.endif
151*c0909341SAndroid Build Coastguard Worker.endif
152*c0909341SAndroid Build Coastguard Worker.endm
153*c0909341SAndroid Build Coastguard Worker
154*c0909341SAndroid Build Coastguard Worker.macro vshl_n d0, d1, d2, s0, s1, s2, s3, s4, s5, n
155*c0909341SAndroid Build Coastguard Worker.if \n == 4
156*c0909341SAndroid Build Coastguard Worker        vshl.s16        \d0,  \s0,  \s3
157*c0909341SAndroid Build Coastguard Worker.else
158*c0909341SAndroid Build Coastguard Worker        vshl.s16        \d1,  \s1,  \s4
159*c0909341SAndroid Build Coastguard Worker.if \n == 16
160*c0909341SAndroid Build Coastguard Worker        vshl.s16        \d2,  \s2,  \s5
161*c0909341SAndroid Build Coastguard Worker.endif
162*c0909341SAndroid Build Coastguard Worker.endif
163*c0909341SAndroid Build Coastguard Worker.endm
164*c0909341SAndroid Build Coastguard Worker
165*c0909341SAndroid Build Coastguard Worker.macro vqdmulh_n d0, d1, d2, s0, s1, s2, s3, s4, s5, n
166*c0909341SAndroid Build Coastguard Worker.if \n == 4
167*c0909341SAndroid Build Coastguard Worker        vqdmulh.s16     \d0,  \s0,  \s3
168*c0909341SAndroid Build Coastguard Worker.else
169*c0909341SAndroid Build Coastguard Worker        vqdmulh.s16     \d1,  \s1,  \s4
170*c0909341SAndroid Build Coastguard Worker.if \n == 16
171*c0909341SAndroid Build Coastguard Worker        vqdmulh.s16     \d2,  \s2,  \s5
172*c0909341SAndroid Build Coastguard Worker.endif
173*c0909341SAndroid Build Coastguard Worker.endif
174*c0909341SAndroid Build Coastguard Worker.endm
175*c0909341SAndroid Build Coastguard Worker
176*c0909341SAndroid Build Coastguard Worker// unsigned dav1d_msac_decode_symbol_adapt4_neon(MsacContext *s, uint16_t *cdf,
177*c0909341SAndroid Build Coastguard Worker//                                               size_t n_symbols);
178*c0909341SAndroid Build Coastguard Worker
179*c0909341SAndroid Build Coastguard Workerfunction msac_decode_symbol_adapt4_neon, export=1
180*c0909341SAndroid Build Coastguard Worker.macro decode_update n
181*c0909341SAndroid Build Coastguard Worker        push            {r4-r10,lr}
182*c0909341SAndroid Build Coastguard Worker        sub             sp,  sp,  #48
183*c0909341SAndroid Build Coastguard Worker        add             r8,  r0,  #RNG
184*c0909341SAndroid Build Coastguard Worker
185*c0909341SAndroid Build Coastguard Worker        vld1_align_n    d0,  q0,  q1,  r1,  \n                         // cdf
186*c0909341SAndroid Build Coastguard Worker        vld1.16         {d16[]}, [r8, :16]                             // rng
187*c0909341SAndroid Build Coastguard Worker        movrel_local    r9,  coeffs, 30
188*c0909341SAndroid Build Coastguard Worker        vmov.i16        d30, #0x7f00                                   // 0x7f00
189*c0909341SAndroid Build Coastguard Worker        sub             r9,  r9,  r2, lsl #1
190*c0909341SAndroid Build Coastguard Worker        vmvn.i16        q14, #0x3f                                     // 0xffc0
191*c0909341SAndroid Build Coastguard Worker        add             r8,  sp,  #14
192*c0909341SAndroid Build Coastguard Worker        vand            d22, d16, d30                                  // rng & 0x7f00
193*c0909341SAndroid Build Coastguard Worker        vst1.16         {d16[0]}, [r8, :16]                            // store original u = s->rng
194*c0909341SAndroid Build Coastguard Worker        vand_n          d4,  q2,  q3,  d0,  q0,  q1, d28, q14, q14, \n // cdf & 0xffc0
195*c0909341SAndroid Build Coastguard Worker.if \n > 4
196*c0909341SAndroid Build Coastguard Worker        vmov            d23, d22
197*c0909341SAndroid Build Coastguard Worker.endif
198*c0909341SAndroid Build Coastguard Worker
199*c0909341SAndroid Build Coastguard Worker        vld1_n          d16, q8,  q9,  r9,  \n                          // EC_MIN_PROB * (n_symbols - ret)
200*c0909341SAndroid Build Coastguard Worker        vqdmulh_n       d20, q10, q11, d4,  q2,  q3,  d22, q11, q11, \n // ((cdf >> EC_PROB_SHIFT) * (r - 128)) >> 1
201*c0909341SAndroid Build Coastguard Worker        add             r8,  r0,  #DIF + 2
202*c0909341SAndroid Build Coastguard Worker
203*c0909341SAndroid Build Coastguard Worker        vadd_n          d16, q8,  q9,  d4,  q2,  q3,  d16, q8,  q9,  \n // v = cdf + EC_MIN_PROB * (n_symbols - ret)
204*c0909341SAndroid Build Coastguard Worker.if \n == 4
205*c0909341SAndroid Build Coastguard Worker        vmov.i16        d17, #0
206*c0909341SAndroid Build Coastguard Worker.endif
207*c0909341SAndroid Build Coastguard Worker        vadd_n          d16, q8,  q9,  d20, q10, q11, d16, q8,  q9,  \n // v = ((cdf >> EC_PROB_SHIFT) * r) >> 1 + EC_MIN_PROB * (n_symbols - ret)
208*c0909341SAndroid Build Coastguard Worker
209*c0909341SAndroid Build Coastguard Worker        add             r9,  sp,  #16
210*c0909341SAndroid Build Coastguard Worker        vld1.16         {d20[]}, [r8, :16]                              // dif >> (EC_WIN_SIZE - 16)
211*c0909341SAndroid Build Coastguard Worker        movrel_local    r8,  bits
212*c0909341SAndroid Build Coastguard Worker        vst1_n          q8,  q8,  q9,  r9,  \n                          // store v values to allow indexed access
213*c0909341SAndroid Build Coastguard Worker
214*c0909341SAndroid Build Coastguard Worker        vmov            d21, d20
215*c0909341SAndroid Build Coastguard Worker        vld1_align_n    q12, q12, q13, r8,  \n
216*c0909341SAndroid Build Coastguard Worker.if \n == 16
217*c0909341SAndroid Build Coastguard Worker        vmov            q11, q10
218*c0909341SAndroid Build Coastguard Worker.endif
219*c0909341SAndroid Build Coastguard Worker
220*c0909341SAndroid Build Coastguard Worker        vcge_n          q2,  q2,  q3,  q10, q10, q11, q8,  q8,  q9,  \n // c >= v
221*c0909341SAndroid Build Coastguard Worker
222*c0909341SAndroid Build Coastguard Worker        vand_n          q10, q10, q11, q2,  q2,  q3,  q12, q12, q13, \n // One bit per halfword set in the mask
223*c0909341SAndroid Build Coastguard Worker.if \n == 16
224*c0909341SAndroid Build Coastguard Worker        vadd.i16        q10, q10, q11
225*c0909341SAndroid Build Coastguard Worker.endif
226*c0909341SAndroid Build Coastguard Worker        vadd.i16        d20, d20, d21                                   // Aggregate mask bits
227*c0909341SAndroid Build Coastguard Worker        ldr             r4,  [r0, #ALLOW_UPDATE_CDF]
228*c0909341SAndroid Build Coastguard Worker        vpadd.i16       d20, d20, d20
229*c0909341SAndroid Build Coastguard Worker        lsl             r10, r2,  #1
230*c0909341SAndroid Build Coastguard Worker        vpadd.i16       d20, d20, d20
231*c0909341SAndroid Build Coastguard Worker        vmov.u16        r3,  d20[0]
232*c0909341SAndroid Build Coastguard Worker        cmp             r4,  #0
233*c0909341SAndroid Build Coastguard Worker        rbit            r3,  r3
234*c0909341SAndroid Build Coastguard Worker        clz             lr,  r3                                         // ret
235*c0909341SAndroid Build Coastguard Worker
236*c0909341SAndroid Build Coastguard Worker        beq             L(renorm)
237*c0909341SAndroid Build Coastguard Worker        // update_cdf
238*c0909341SAndroid Build Coastguard Worker        ldrh            r3,  [r1, r10]                                  // count = cdf[n_symbols]
239*c0909341SAndroid Build Coastguard Worker        vmov.i8         q10, #0xff
240*c0909341SAndroid Build Coastguard Worker.if \n == 16
241*c0909341SAndroid Build Coastguard Worker        mov             r4,  #-5
242*c0909341SAndroid Build Coastguard Worker.else
243*c0909341SAndroid Build Coastguard Worker        mvn             r12, r2
244*c0909341SAndroid Build Coastguard Worker        mov             r4,  #-4
245*c0909341SAndroid Build Coastguard Worker        cmn             r12, #3                                         // set C if n_symbols <= 2
246*c0909341SAndroid Build Coastguard Worker.endif
247*c0909341SAndroid Build Coastguard Worker        vrhadd_n        d16, q8,  q9,  d20, q10, q10, d4,  q2,  q3,  \n // i >= val ? -1 : 32768
248*c0909341SAndroid Build Coastguard Worker.if \n == 16
249*c0909341SAndroid Build Coastguard Worker        sub             r4,  r4,  r3, lsr #4                            // -((count >> 4) + 5)
250*c0909341SAndroid Build Coastguard Worker.else
251*c0909341SAndroid Build Coastguard Worker        lsr             r12, r3,  #4                                    // count >> 4
252*c0909341SAndroid Build Coastguard Worker        sbc             r4,  r4,  r12                                   // -((count >> 4) + (n_symbols > 2) + 4)
253*c0909341SAndroid Build Coastguard Worker.endif
254*c0909341SAndroid Build Coastguard Worker        vsub_n          d16, q8,  q9,  d16, q8,  q9,  d0,  q0,  q1,  \n // (32768 - cdf[i]) or (-1 - cdf[i])
255*c0909341SAndroid Build Coastguard Worker.if \n == 4
256*c0909341SAndroid Build Coastguard Worker        vdup.16         d20, r4                                         // -rate
257*c0909341SAndroid Build Coastguard Worker.else
258*c0909341SAndroid Build Coastguard Worker        vdup.16         q10, r4                                         // -rate
259*c0909341SAndroid Build Coastguard Worker.endif
260*c0909341SAndroid Build Coastguard Worker
261*c0909341SAndroid Build Coastguard Worker        sub             r3,  r3,  r3, lsr #5                            // count - (count == 32)
262*c0909341SAndroid Build Coastguard Worker        vsub_n          d0,  q0,  q1,  d0,  q0,  q1,  d4,  q2,  q3,  \n // cdf + (i >= val ? 1 : 0)
263*c0909341SAndroid Build Coastguard Worker        vshl_n          d16, q8,  q9,  d16, q8,  q9,  d20, q10, q10, \n // ({32768,-1} - cdf[i]) >> rate
264*c0909341SAndroid Build Coastguard Worker        add             r3,  r3,  #1                                    // count + (count < 32)
265*c0909341SAndroid Build Coastguard Worker        vadd_n          d0,  q0,  q1,  d0,  q0,  q1,  d16, q8,  q9,  \n // cdf + (32768 - cdf[i]) >> rate
266*c0909341SAndroid Build Coastguard Worker        vst1_align_n    d0,  q0,  q1,  r1,  \n
267*c0909341SAndroid Build Coastguard Worker        strh            r3,  [r1, r10]
268*c0909341SAndroid Build Coastguard Worker.endm
269*c0909341SAndroid Build Coastguard Worker
270*c0909341SAndroid Build Coastguard Worker        decode_update   4
271*c0909341SAndroid Build Coastguard Worker
272*c0909341SAndroid Build Coastguard WorkerL(renorm):
273*c0909341SAndroid Build Coastguard Worker        add             r8,  sp,  #16
274*c0909341SAndroid Build Coastguard Worker        add             r8,  r8,  lr, lsl #1
275*c0909341SAndroid Build Coastguard Worker        ldrh            r3,  [r8]              // v
276*c0909341SAndroid Build Coastguard Worker        ldrh            r4,  [r8, #-2]         // u
277*c0909341SAndroid Build Coastguard Worker        ldr             r6,  [r0, #CNT]
278*c0909341SAndroid Build Coastguard Worker        ldr             r7,  [r0, #DIF]
279*c0909341SAndroid Build Coastguard Worker        sub             r4,  r4,  r3           // rng = u - v
280*c0909341SAndroid Build Coastguard Worker        clz             r5,  r4                // clz(rng)
281*c0909341SAndroid Build Coastguard Worker        eor             r5,  r5,  #16          // d = clz(rng) ^ 16
282*c0909341SAndroid Build Coastguard Worker        sub             r7,  r7,  r3, lsl #16  // dif - (v << 16)
283*c0909341SAndroid Build Coastguard WorkerL(renorm2):
284*c0909341SAndroid Build Coastguard Worker        lsl             r4,  r4,  r5           // rng << d
285*c0909341SAndroid Build Coastguard Worker        subs            r6,  r6,  r5           // cnt -= d
286*c0909341SAndroid Build Coastguard Worker        lsl             r7,  r7,  r5           // (dif - (v << 16)) << d
287*c0909341SAndroid Build Coastguard Worker        str             r4,  [r0, #RNG]
288*c0909341SAndroid Build Coastguard Worker        bhs             4f
289*c0909341SAndroid Build Coastguard Worker
290*c0909341SAndroid Build Coastguard Worker        // refill
291*c0909341SAndroid Build Coastguard Worker        ldr             r3,  [r0, #BUF_POS]    // BUF_POS
292*c0909341SAndroid Build Coastguard Worker        ldr             r4,  [r0, #BUF_END]    // BUF_END
293*c0909341SAndroid Build Coastguard Worker        add             r5,  r3,  #4
294*c0909341SAndroid Build Coastguard Worker        subs            r5,  r5,  r4
295*c0909341SAndroid Build Coastguard Worker        bhi             6f
296*c0909341SAndroid Build Coastguard Worker
297*c0909341SAndroid Build Coastguard Worker        ldr             r8,  [r3]              // next_bits
298*c0909341SAndroid Build Coastguard Worker        rsb             r5,  r6,  #16
299*c0909341SAndroid Build Coastguard Worker        add             r4,  r6,  #16          // shift_bits = cnt + 16
300*c0909341SAndroid Build Coastguard Worker        mvn             r8,  r8
301*c0909341SAndroid Build Coastguard Worker        lsr             r5,  r5,  #3           // num_bytes_read
302*c0909341SAndroid Build Coastguard Worker        rev             r8,  r8                // next_bits = bswap(next_bits)
303*c0909341SAndroid Build Coastguard Worker        lsr             r8,  r8,  r4           // next_bits >>= shift_bits
304*c0909341SAndroid Build Coastguard Worker
305*c0909341SAndroid Build Coastguard Worker2:      // refill_end
306*c0909341SAndroid Build Coastguard Worker        add             r3,  r3,  r5
307*c0909341SAndroid Build Coastguard Worker        add             r6,  r6,  r5, lsl #3   // cnt += num_bits_read
308*c0909341SAndroid Build Coastguard Worker        str             r3,  [r0, #BUF_POS]
309*c0909341SAndroid Build Coastguard Worker
310*c0909341SAndroid Build Coastguard Worker3:      // refill_end2
311*c0909341SAndroid Build Coastguard Worker        orr             r7,  r7,  r8           // dif |= next_bits
312*c0909341SAndroid Build Coastguard Worker
313*c0909341SAndroid Build Coastguard Worker4:      // end
314*c0909341SAndroid Build Coastguard Worker        str             r6,  [r0, #CNT]
315*c0909341SAndroid Build Coastguard Worker        str             r7,  [r0, #DIF]
316*c0909341SAndroid Build Coastguard Worker        mov             r0,  lr
317*c0909341SAndroid Build Coastguard Worker        add             sp,  sp,  #48
318*c0909341SAndroid Build Coastguard Worker        pop             {r4-r10,pc}
319*c0909341SAndroid Build Coastguard Worker
320*c0909341SAndroid Build Coastguard Worker5:      // pad_with_ones
321*c0909341SAndroid Build Coastguard Worker        add             r8,  r6,  #-240
322*c0909341SAndroid Build Coastguard Worker        lsr             r8,  r8,  r8
323*c0909341SAndroid Build Coastguard Worker        b               3b
324*c0909341SAndroid Build Coastguard Worker
325*c0909341SAndroid Build Coastguard Worker6:      // refill_eob
326*c0909341SAndroid Build Coastguard Worker        cmp             r3,  r4
327*c0909341SAndroid Build Coastguard Worker        bhs             5b
328*c0909341SAndroid Build Coastguard Worker
329*c0909341SAndroid Build Coastguard Worker        ldr             r8,  [r4, #-4]
330*c0909341SAndroid Build Coastguard Worker        lsl             r5,  r5,  #3
331*c0909341SAndroid Build Coastguard Worker        lsr             r8,  r8,  r5
332*c0909341SAndroid Build Coastguard Worker        add             r5,  r6,  #16
333*c0909341SAndroid Build Coastguard Worker        mvn             r8,  r8
334*c0909341SAndroid Build Coastguard Worker        sub             r4,  r4,  r3           // num_bytes_left
335*c0909341SAndroid Build Coastguard Worker        rev             r8,  r8
336*c0909341SAndroid Build Coastguard Worker        lsr             r8,  r8,  r5
337*c0909341SAndroid Build Coastguard Worker        rsb             r5,  r6,  #16
338*c0909341SAndroid Build Coastguard Worker        lsr             r5,  r5,  #3
339*c0909341SAndroid Build Coastguard Worker        cmp             r5,  r4
340*c0909341SAndroid Build Coastguard Worker        it              hs
341*c0909341SAndroid Build Coastguard Worker        movhs           r5,  r4
342*c0909341SAndroid Build Coastguard Worker        b               2b
343*c0909341SAndroid Build Coastguard Workerendfunc
344*c0909341SAndroid Build Coastguard Worker
345*c0909341SAndroid Build Coastguard Workerfunction msac_decode_symbol_adapt8_neon, export=1
346*c0909341SAndroid Build Coastguard Worker        decode_update   8
347*c0909341SAndroid Build Coastguard Worker        b               L(renorm)
348*c0909341SAndroid Build Coastguard Workerendfunc
349*c0909341SAndroid Build Coastguard Worker
350*c0909341SAndroid Build Coastguard Workerfunction msac_decode_symbol_adapt16_neon, export=1
351*c0909341SAndroid Build Coastguard Worker        decode_update   16
352*c0909341SAndroid Build Coastguard Worker        b               L(renorm)
353*c0909341SAndroid Build Coastguard Workerendfunc
354*c0909341SAndroid Build Coastguard Worker
355*c0909341SAndroid Build Coastguard Workerfunction msac_decode_hi_tok_neon, export=1
356*c0909341SAndroid Build Coastguard Worker        push            {r4-r10,lr}
357*c0909341SAndroid Build Coastguard Worker        vld1.16         {d0},  [r1, :64]       // cdf
358*c0909341SAndroid Build Coastguard Worker        add             r4,  r0,  #RNG
359*c0909341SAndroid Build Coastguard Worker        vmov.i16        d31, #0x7f00           // 0x7f00
360*c0909341SAndroid Build Coastguard Worker        movrel_local    r5,  coeffs, 30-2*3
361*c0909341SAndroid Build Coastguard Worker        vmvn.i16        d30, #0x3f             // 0xffc0
362*c0909341SAndroid Build Coastguard Worker        ldrh            r9,  [r1, #6]          // count = cdf[n_symbols]
363*c0909341SAndroid Build Coastguard Worker        vld1.16         {d1[]},  [r4, :16]     // rng
364*c0909341SAndroid Build Coastguard Worker        movrel_local    r4,  bits
365*c0909341SAndroid Build Coastguard Worker        vld1.16         {d29}, [r5]            // EC_MIN_PROB * (n_symbols - ret)
366*c0909341SAndroid Build Coastguard Worker        add             r5,  r0,  #DIF + 2
367*c0909341SAndroid Build Coastguard Worker        vld1.16         {q8}, [r4, :128]
368*c0909341SAndroid Build Coastguard Worker        mov             r2,  #-24
369*c0909341SAndroid Build Coastguard Worker        vand            d20, d0, d30           // cdf & 0xffc0
370*c0909341SAndroid Build Coastguard Worker        ldr             r10, [r0, #ALLOW_UPDATE_CDF]
371*c0909341SAndroid Build Coastguard Worker        vld1.16         {d2[]}, [r5, :16]      // dif >> (EC_WIN_SIZE - 16)
372*c0909341SAndroid Build Coastguard Worker        sub             sp,  sp,  #48
373*c0909341SAndroid Build Coastguard Worker        ldr             r6,  [r0, #CNT]
374*c0909341SAndroid Build Coastguard Worker        ldr             r7,  [r0, #DIF]
375*c0909341SAndroid Build Coastguard Worker        vmov            d3,  d2
376*c0909341SAndroid Build Coastguard Worker1:
377*c0909341SAndroid Build Coastguard Worker        vand            d23, d1,  d31          // rng & 0x7f00
378*c0909341SAndroid Build Coastguard Worker        vqdmulh.s16     d18, d20, d23          // ((cdf >> EC_PROB_SHIFT) * (r - 128)) >> 1
379*c0909341SAndroid Build Coastguard Worker        add             r12, sp,  #14
380*c0909341SAndroid Build Coastguard Worker        vadd.i16        d6,  d20, d29          // v = cdf + EC_MIN_PROB * (n_symbols - ret)
381*c0909341SAndroid Build Coastguard Worker        vadd.i16        d6,  d18, d6           // v = ((cdf >> EC_PROB_SHIFT) * r) >> 1 + EC_MIN_PROB * (n_symbols - ret)
382*c0909341SAndroid Build Coastguard Worker        vmov.i16        d7,  #0
383*c0909341SAndroid Build Coastguard Worker        vst1.16         {d1[0]}, [r12, :16]    // store original u = s->rng
384*c0909341SAndroid Build Coastguard Worker        add             r12, sp,  #16
385*c0909341SAndroid Build Coastguard Worker        vcge.u16        q2,  q1,  q3           // c >= v
386*c0909341SAndroid Build Coastguard Worker        vst1.16         {q3},  [r12]           // store v values to allow indexed access
387*c0909341SAndroid Build Coastguard Worker        vand            q9,  q2,  q8           // One bit per halfword set in the mask
388*c0909341SAndroid Build Coastguard Worker
389*c0909341SAndroid Build Coastguard Worker        vadd.i16        d18, d18, d19          // Aggregate mask bits
390*c0909341SAndroid Build Coastguard Worker        vpadd.i16       d18, d18, d18
391*c0909341SAndroid Build Coastguard Worker        vpadd.i16       d18, d18, d18
392*c0909341SAndroid Build Coastguard Worker        vmov.u16        r3,  d18[0]
393*c0909341SAndroid Build Coastguard Worker        cmp             r10, #0
394*c0909341SAndroid Build Coastguard Worker        add             r2,  r2,  #5
395*c0909341SAndroid Build Coastguard Worker        rbit            r3,  r3
396*c0909341SAndroid Build Coastguard Worker        add             r8,  sp,  #16
397*c0909341SAndroid Build Coastguard Worker        clz             lr,  r3                // ret
398*c0909341SAndroid Build Coastguard Worker
399*c0909341SAndroid Build Coastguard Worker        beq             2f
400*c0909341SAndroid Build Coastguard Worker        // update_cdf
401*c0909341SAndroid Build Coastguard Worker        vmov.i8         d22, #0xff
402*c0909341SAndroid Build Coastguard Worker        mov             r4,  #-5
403*c0909341SAndroid Build Coastguard Worker        vrhadd.u16      d6,  d22, d4           // i >= val ? -1 : 32768
404*c0909341SAndroid Build Coastguard Worker        sub             r4,  r4,  r9, lsr #4   // -((count >> 4) + 5)
405*c0909341SAndroid Build Coastguard Worker        vsub.i16        d6,  d6,  d0           // (32768 - cdf[i]) or (-1 - cdf[i])
406*c0909341SAndroid Build Coastguard Worker        vdup.16         d18, r4                // -rate
407*c0909341SAndroid Build Coastguard Worker
408*c0909341SAndroid Build Coastguard Worker        sub             r9,  r9,  r9, lsr #5   // count - (count == 32)
409*c0909341SAndroid Build Coastguard Worker        vsub.i16        d0,  d0,  d4           // cdf + (i >= val ? 1 : 0)
410*c0909341SAndroid Build Coastguard Worker        vshl.s16        d6,  d6,  d18          // ({32768,-1} - cdf[i]) >> rate
411*c0909341SAndroid Build Coastguard Worker        add             r9,  r9,  #1           // count + (count < 32)
412*c0909341SAndroid Build Coastguard Worker        vadd.i16        d0,  d0,  d6           // cdf + (32768 - cdf[i]) >> rate
413*c0909341SAndroid Build Coastguard Worker        vst1.16         {d0},  [r1, :64]
414*c0909341SAndroid Build Coastguard Worker        vand            d20, d0,  d30          // cdf & 0xffc0
415*c0909341SAndroid Build Coastguard Worker        strh            r9,  [r1, #6]
416*c0909341SAndroid Build Coastguard Worker
417*c0909341SAndroid Build Coastguard Worker2:
418*c0909341SAndroid Build Coastguard Worker        add             r8,  r8,  lr, lsl #1
419*c0909341SAndroid Build Coastguard Worker        ldrh            r3,  [r8]              // v
420*c0909341SAndroid Build Coastguard Worker        ldrh            r4,  [r8, #-2]         // u
421*c0909341SAndroid Build Coastguard Worker        sub             r4,  r4,  r3           // rng = u - v
422*c0909341SAndroid Build Coastguard Worker        clz             r5,  r4                // clz(rng)
423*c0909341SAndroid Build Coastguard Worker        eor             r5,  r5,  #16          // d = clz(rng) ^ 16
424*c0909341SAndroid Build Coastguard Worker        sub             r7,  r7,  r3, lsl #16  // dif - (v << 16)
425*c0909341SAndroid Build Coastguard Worker        lsl             r4,  r4,  r5           // rng << d
426*c0909341SAndroid Build Coastguard Worker        subs            r6,  r6,  r5           // cnt -= d
427*c0909341SAndroid Build Coastguard Worker        lsl             r7,  r7,  r5           // (dif - (v << 16)) << d
428*c0909341SAndroid Build Coastguard Worker        str             r4,  [r0, #RNG]
429*c0909341SAndroid Build Coastguard Worker        vdup.16         d1,  r4
430*c0909341SAndroid Build Coastguard Worker        bhs             5f
431*c0909341SAndroid Build Coastguard Worker
432*c0909341SAndroid Build Coastguard Worker        // refill
433*c0909341SAndroid Build Coastguard Worker        ldr             r3,  [r0, #BUF_POS]    // BUF_POS
434*c0909341SAndroid Build Coastguard Worker        ldr             r4,  [r0, #BUF_END]    // BUF_END
435*c0909341SAndroid Build Coastguard Worker        add             r5,  r3,  #4
436*c0909341SAndroid Build Coastguard Worker        subs            r5,  r5,  r4
437*c0909341SAndroid Build Coastguard Worker        bhi             7f
438*c0909341SAndroid Build Coastguard Worker
439*c0909341SAndroid Build Coastguard Worker        ldr             r8,  [r3]              // next_bits
440*c0909341SAndroid Build Coastguard Worker        rsb             r5,  r6,  #16
441*c0909341SAndroid Build Coastguard Worker        add             r4,  r6,  #16          // shift_bits = cnt + 16
442*c0909341SAndroid Build Coastguard Worker        mvn             r8,  r8
443*c0909341SAndroid Build Coastguard Worker        lsr             r5,  r5,  #3           // num_bytes_read
444*c0909341SAndroid Build Coastguard Worker        rev             r8,  r8                // next_bits = bswap(next_bits)
445*c0909341SAndroid Build Coastguard Worker        lsr             r8,  r8,  r4           // next_bits >>= shift_bits
446*c0909341SAndroid Build Coastguard Worker
447*c0909341SAndroid Build Coastguard Worker3:      // refill_end
448*c0909341SAndroid Build Coastguard Worker        add             r3,  r3,  r5
449*c0909341SAndroid Build Coastguard Worker        add             r6,  r6,  r5, lsl #3   // cnt += num_bits_read
450*c0909341SAndroid Build Coastguard Worker        str             r3,  [r0, #BUF_POS]
451*c0909341SAndroid Build Coastguard Worker
452*c0909341SAndroid Build Coastguard Worker4:      // refill_end2
453*c0909341SAndroid Build Coastguard Worker        orr             r7,  r7,  r8           // dif |= next_bits
454*c0909341SAndroid Build Coastguard Worker
455*c0909341SAndroid Build Coastguard Worker5:      // end
456*c0909341SAndroid Build Coastguard Worker        lsl             lr,  lr,  #1
457*c0909341SAndroid Build Coastguard Worker        sub             lr,  lr,  #5
458*c0909341SAndroid Build Coastguard Worker        lsr             r12, r7,  #16
459*c0909341SAndroid Build Coastguard Worker        adds            r2,  r2,  lr           // carry = tok_br < 3 || tok == 15
460*c0909341SAndroid Build Coastguard Worker        vdup.16         q1,  r12
461*c0909341SAndroid Build Coastguard Worker        bcc             1b                     // loop if !carry
462*c0909341SAndroid Build Coastguard Worker        add             r2,  r2,  #30
463*c0909341SAndroid Build Coastguard Worker        str             r6,  [r0, #CNT]
464*c0909341SAndroid Build Coastguard Worker        add             sp,  sp,  #48
465*c0909341SAndroid Build Coastguard Worker        str             r7,  [r0, #DIF]
466*c0909341SAndroid Build Coastguard Worker        lsr             r0,  r2,  #1
467*c0909341SAndroid Build Coastguard Worker        pop             {r4-r10,pc}
468*c0909341SAndroid Build Coastguard Worker
469*c0909341SAndroid Build Coastguard Worker6:      // pad_with_ones
470*c0909341SAndroid Build Coastguard Worker        add             r8,  r6,  #-240
471*c0909341SAndroid Build Coastguard Worker        lsr             r8,  r8,  r8
472*c0909341SAndroid Build Coastguard Worker        b               4b
473*c0909341SAndroid Build Coastguard Worker
474*c0909341SAndroid Build Coastguard Worker7:      // refill_eob
475*c0909341SAndroid Build Coastguard Worker        cmp             r3,  r4
476*c0909341SAndroid Build Coastguard Worker        bhs             6b
477*c0909341SAndroid Build Coastguard Worker
478*c0909341SAndroid Build Coastguard Worker        ldr             r8,  [r4, #-4]
479*c0909341SAndroid Build Coastguard Worker        lsl             r5,  r5,  #3
480*c0909341SAndroid Build Coastguard Worker        lsr             r8,  r8,  r5
481*c0909341SAndroid Build Coastguard Worker        add             r5,  r6,  #16
482*c0909341SAndroid Build Coastguard Worker        mvn             r8,  r8
483*c0909341SAndroid Build Coastguard Worker        sub             r4,  r4,  r3           // num_bytes_left
484*c0909341SAndroid Build Coastguard Worker        rev             r8,  r8
485*c0909341SAndroid Build Coastguard Worker        lsr             r8,  r8,  r5
486*c0909341SAndroid Build Coastguard Worker        rsb             r5,  r6,  #16
487*c0909341SAndroid Build Coastguard Worker        lsr             r5,  r5,  #3
488*c0909341SAndroid Build Coastguard Worker        cmp             r5,  r4
489*c0909341SAndroid Build Coastguard Worker        it              hs
490*c0909341SAndroid Build Coastguard Worker        movhs           r5,  r4
491*c0909341SAndroid Build Coastguard Worker        b               3b
492*c0909341SAndroid Build Coastguard Workerendfunc
493*c0909341SAndroid Build Coastguard Worker
494*c0909341SAndroid Build Coastguard Workerfunction msac_decode_bool_equi_neon, export=1
495*c0909341SAndroid Build Coastguard Worker        push            {r4-r10,lr}
496*c0909341SAndroid Build Coastguard Worker        ldr             r5,  [r0, #RNG]
497*c0909341SAndroid Build Coastguard Worker        ldr             r6,  [r0, #CNT]
498*c0909341SAndroid Build Coastguard Worker        sub             sp,  sp,  #48
499*c0909341SAndroid Build Coastguard Worker        ldr             r7,  [r0, #DIF]
500*c0909341SAndroid Build Coastguard Worker        bic             r4,  r5,  #0xff        // r &= 0xff00
501*c0909341SAndroid Build Coastguard Worker        add             r4,  r4,  #8
502*c0909341SAndroid Build Coastguard Worker        mov             r2,  #0
503*c0909341SAndroid Build Coastguard Worker        subs            r8,  r7,  r4, lsl #15  // dif - vw
504*c0909341SAndroid Build Coastguard Worker        lsr             r4,  r4,  #1           // v
505*c0909341SAndroid Build Coastguard Worker        sub             r5,  r5,  r4           // r - v
506*c0909341SAndroid Build Coastguard Worker        itee            lo
507*c0909341SAndroid Build Coastguard Worker        movlo           r2,  #1
508*c0909341SAndroid Build Coastguard Worker        movhs           r4,  r5                // if (ret) v = r - v;
509*c0909341SAndroid Build Coastguard Worker        movhs           r7,  r8                // if (ret) dif = dif - vw;
510*c0909341SAndroid Build Coastguard Worker
511*c0909341SAndroid Build Coastguard Worker        clz             r5,  r4                // clz(rng)
512*c0909341SAndroid Build Coastguard Worker        eor             r5,  r5,  #16          // d = clz(rng) ^ 16
513*c0909341SAndroid Build Coastguard Worker        mov             lr,  r2
514*c0909341SAndroid Build Coastguard Worker        b               L(renorm2)
515*c0909341SAndroid Build Coastguard Workerendfunc
516*c0909341SAndroid Build Coastguard Worker
517*c0909341SAndroid Build Coastguard Workerfunction msac_decode_bool_neon, export=1
518*c0909341SAndroid Build Coastguard Worker        push            {r4-r10,lr}
519*c0909341SAndroid Build Coastguard Worker        ldr             r5,  [r0, #RNG]
520*c0909341SAndroid Build Coastguard Worker        ldr             r6,  [r0, #CNT]
521*c0909341SAndroid Build Coastguard Worker        sub             sp,  sp,  #48
522*c0909341SAndroid Build Coastguard Worker        ldr             r7,  [r0, #DIF]
523*c0909341SAndroid Build Coastguard Worker        lsr             r4,  r5,  #8           // r >> 8
524*c0909341SAndroid Build Coastguard Worker        bic             r1,  r1,  #0x3f        // f &= ~63
525*c0909341SAndroid Build Coastguard Worker        mul             r4,  r4,  r1
526*c0909341SAndroid Build Coastguard Worker        mov             r2,  #0
527*c0909341SAndroid Build Coastguard Worker        lsr             r4,  r4,  #7
528*c0909341SAndroid Build Coastguard Worker        add             r4,  r4,  #4           // v
529*c0909341SAndroid Build Coastguard Worker        subs            r8,  r7,  r4, lsl #16  // dif - vw
530*c0909341SAndroid Build Coastguard Worker        sub             r5,  r5,  r4           // r - v
531*c0909341SAndroid Build Coastguard Worker        itee            lo
532*c0909341SAndroid Build Coastguard Worker        movlo           r2,  #1
533*c0909341SAndroid Build Coastguard Worker        movhs           r4,  r5                // if (ret) v = r - v;
534*c0909341SAndroid Build Coastguard Worker        movhs           r7,  r8                // if (ret) dif = dif - vw;
535*c0909341SAndroid Build Coastguard Worker
536*c0909341SAndroid Build Coastguard Worker        clz             r5,  r4                // clz(rng)
537*c0909341SAndroid Build Coastguard Worker        eor             r5,  r5,  #16          // d = clz(rng) ^ 16
538*c0909341SAndroid Build Coastguard Worker        mov             lr,  r2
539*c0909341SAndroid Build Coastguard Worker        b               L(renorm2)
540*c0909341SAndroid Build Coastguard Workerendfunc
541*c0909341SAndroid Build Coastguard Worker
542*c0909341SAndroid Build Coastguard Workerfunction msac_decode_bool_adapt_neon, export=1
543*c0909341SAndroid Build Coastguard Worker        push            {r4-r10,lr}
544*c0909341SAndroid Build Coastguard Worker        ldr             r9,  [r1]              // cdf[0-1]
545*c0909341SAndroid Build Coastguard Worker        ldr             r5,  [r0, #RNG]
546*c0909341SAndroid Build Coastguard Worker        movw            lr,  #0xffc0
547*c0909341SAndroid Build Coastguard Worker        ldr             r6,  [r0, #CNT]
548*c0909341SAndroid Build Coastguard Worker        sub             sp,  sp,  #48
549*c0909341SAndroid Build Coastguard Worker        ldr             r7,  [r0, #DIF]
550*c0909341SAndroid Build Coastguard Worker        lsr             r4,  r5,  #8           // r >> 8
551*c0909341SAndroid Build Coastguard Worker        and             r2,  r9,  lr           // f &= ~63
552*c0909341SAndroid Build Coastguard Worker        mul             r4,  r4,  r2
553*c0909341SAndroid Build Coastguard Worker        mov             r2,  #0
554*c0909341SAndroid Build Coastguard Worker        lsr             r4,  r4,  #7
555*c0909341SAndroid Build Coastguard Worker        add             r4,  r4,  #4           // v
556*c0909341SAndroid Build Coastguard Worker        subs            r8,  r7,  r4, lsl #16  // dif - vw
557*c0909341SAndroid Build Coastguard Worker        sub             r5,  r5,  r4           // r - v
558*c0909341SAndroid Build Coastguard Worker        ldr             r10, [r0, #ALLOW_UPDATE_CDF]
559*c0909341SAndroid Build Coastguard Worker        itee            lo
560*c0909341SAndroid Build Coastguard Worker        movlo           r2,  #1
561*c0909341SAndroid Build Coastguard Worker        movhs           r4,  r5                // if (ret) v = r - v;
562*c0909341SAndroid Build Coastguard Worker        movhs           r7,  r8                // if (ret) dif = dif - vw;
563*c0909341SAndroid Build Coastguard Worker
564*c0909341SAndroid Build Coastguard Worker        cmp             r10, #0
565*c0909341SAndroid Build Coastguard Worker        clz             r5,  r4                // clz(rng)
566*c0909341SAndroid Build Coastguard Worker        eor             r5,  r5,  #16          // d = clz(rng) ^ 16
567*c0909341SAndroid Build Coastguard Worker        mov             lr,  r2
568*c0909341SAndroid Build Coastguard Worker
569*c0909341SAndroid Build Coastguard Worker        beq             L(renorm2)
570*c0909341SAndroid Build Coastguard Worker
571*c0909341SAndroid Build Coastguard Worker        lsr             r2,  r9,  #16          // count = cdf[1]
572*c0909341SAndroid Build Coastguard Worker        uxth            r9,  r9                // cdf[0]
573*c0909341SAndroid Build Coastguard Worker
574*c0909341SAndroid Build Coastguard Worker        sub             r3,  r2,  r2,  lsr #5  // count - (count >= 32)
575*c0909341SAndroid Build Coastguard Worker        lsr             r2,  r2,  #4           // count >> 4
576*c0909341SAndroid Build Coastguard Worker        add             r10, r3,  #1           // count + (count < 32)
577*c0909341SAndroid Build Coastguard Worker        add             r2,  r2,  #4           // rate = (count >> 4) | 4
578*c0909341SAndroid Build Coastguard Worker
579*c0909341SAndroid Build Coastguard Worker        sub             r9,  r9,  lr           // cdf[0] -= bit
580*c0909341SAndroid Build Coastguard Worker        sub             r3,  r9,  lr,  lsl #15 // {cdf[0], cdf[0] - 32769}
581*c0909341SAndroid Build Coastguard Worker        asr             r3,  r3,  r2           // {cdf[0], cdf[0] - 32769} >> rate
582*c0909341SAndroid Build Coastguard Worker        sub             r9,  r9,  r3           // cdf[0]
583*c0909341SAndroid Build Coastguard Worker
584*c0909341SAndroid Build Coastguard Worker        strh            r9,  [r1]
585*c0909341SAndroid Build Coastguard Worker        strh            r10, [r1, #2]
586*c0909341SAndroid Build Coastguard Worker
587*c0909341SAndroid Build Coastguard Worker        b               L(renorm2)
588*c0909341SAndroid Build Coastguard Workerendfunc
589