xref: /aosp_15_r20/external/libdav1d/src/loongarch/msac.S (revision c09093415860a1c2373dacd84c4fde00c507cdfd)
1*c0909341SAndroid Build Coastguard Worker/*
2*c0909341SAndroid Build Coastguard Worker * Copyright © 2023, VideoLAN and dav1d authors
3*c0909341SAndroid Build Coastguard Worker * Copyright © 2023, Loongson Technology Corporation Limited
4*c0909341SAndroid Build Coastguard Worker * All rights reserved.
5*c0909341SAndroid Build Coastguard Worker *
6*c0909341SAndroid Build Coastguard Worker * Redistribution and use in source and binary forms, with or without
7*c0909341SAndroid Build Coastguard Worker * modification, are permitted provided that the following conditions are met:
8*c0909341SAndroid Build Coastguard Worker *
9*c0909341SAndroid Build Coastguard Worker * 1. Redistributions of source code must retain the above copyright notice, this
10*c0909341SAndroid Build Coastguard Worker *    list of conditions and the following disclaimer.
11*c0909341SAndroid Build Coastguard Worker *
12*c0909341SAndroid Build Coastguard Worker * 2. Redistributions in binary form must reproduce the above copyright notice,
13*c0909341SAndroid Build Coastguard Worker *    this list of conditions and the following disclaimer in the documentation
14*c0909341SAndroid Build Coastguard Worker *    and/or other materials provided with the distribution.
15*c0909341SAndroid Build Coastguard Worker *
16*c0909341SAndroid Build Coastguard Worker * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
17*c0909341SAndroid Build Coastguard Worker * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
18*c0909341SAndroid Build Coastguard Worker * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
19*c0909341SAndroid Build Coastguard Worker * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
20*c0909341SAndroid Build Coastguard Worker * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
21*c0909341SAndroid Build Coastguard Worker * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
22*c0909341SAndroid Build Coastguard Worker * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
23*c0909341SAndroid Build Coastguard Worker * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
24*c0909341SAndroid Build Coastguard Worker * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
25*c0909341SAndroid Build Coastguard Worker * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26*c0909341SAndroid Build Coastguard Worker */
27*c0909341SAndroid Build Coastguard Worker
28*c0909341SAndroid Build Coastguard Worker#include "loongson_asm.S"
29*c0909341SAndroid Build Coastguard Worker
30*c0909341SAndroid Build Coastguard Workerconst min_prob
31*c0909341SAndroid Build Coastguard Worker  .short 60, 56, 52, 48, 44, 40, 36, 32, 28, 24, 20, 16, 12, 8, 4, 0
32*c0909341SAndroid Build Coastguard Workerendconst
33*c0909341SAndroid Build Coastguard Worker
34*c0909341SAndroid Build Coastguard Workerconst ph_0xff00
35*c0909341SAndroid Build Coastguard Worker.rept 8
36*c0909341SAndroid Build Coastguard Worker  .short 0xff00
37*c0909341SAndroid Build Coastguard Worker.endr
38*c0909341SAndroid Build Coastguard Workerendconst
39*c0909341SAndroid Build Coastguard Worker
40*c0909341SAndroid Build Coastguard Worker.macro decode_symbol_adapt w
41*c0909341SAndroid Build Coastguard Worker    addi.d          sp,      sp,     -48
42*c0909341SAndroid Build Coastguard Worker    vldrepl.h       vr0,     a0,      24   //rng
43*c0909341SAndroid Build Coastguard Worker    fst.s           f0,      sp,      0    //val==0
44*c0909341SAndroid Build Coastguard Worker    vld             vr1,     a1,      0    //cdf
45*c0909341SAndroid Build Coastguard Worker.if \w == 16
46*c0909341SAndroid Build Coastguard Worker    vld             vr11,    a1,      16
47*c0909341SAndroid Build Coastguard Worker.endif
48*c0909341SAndroid Build Coastguard Worker    vldrepl.d       vr2,     a0,      16   //dif
49*c0909341SAndroid Build Coastguard Worker    ld.w            t1,      a0,      32   //allow_update_cdf
50*c0909341SAndroid Build Coastguard Worker    la.local        t2,      min_prob
51*c0909341SAndroid Build Coastguard Worker    addi.d          t2,      t2,      30
52*c0909341SAndroid Build Coastguard Worker    slli.w          t3,      a2,      1
53*c0909341SAndroid Build Coastguard Worker    sub.d           t2,      t2,      t3
54*c0909341SAndroid Build Coastguard Worker    vld             vr3,     t2,      0    //min_prob
55*c0909341SAndroid Build Coastguard Worker.if \w == 16
56*c0909341SAndroid Build Coastguard Worker    vld             vr13,    t2,      16
57*c0909341SAndroid Build Coastguard Worker.endif
58*c0909341SAndroid Build Coastguard Worker    vsrli.h         vr4,     vr0,     8    //r = s->rng >> 8
59*c0909341SAndroid Build Coastguard Worker    vslli.h         vr4,     vr4,     8    //r << 8
60*c0909341SAndroid Build Coastguard Worker    vsrli.h         vr5,     vr1,     6
61*c0909341SAndroid Build Coastguard Worker    vslli.h         vr5,     vr5,     7
62*c0909341SAndroid Build Coastguard Worker.if \w == 16
63*c0909341SAndroid Build Coastguard Worker    vsrli.h         vr15,    vr11,    6
64*c0909341SAndroid Build Coastguard Worker    vslli.h         vr15,    vr15,    7
65*c0909341SAndroid Build Coastguard Worker.endif
66*c0909341SAndroid Build Coastguard Worker    vmuh.hu         vr5,     vr4,     vr5
67*c0909341SAndroid Build Coastguard Worker    vadd.h          vr5,     vr5,     vr3  //v
68*c0909341SAndroid Build Coastguard Worker.if \w == 16
69*c0909341SAndroid Build Coastguard Worker    vmuh.hu         vr15,    vr4,     vr15
70*c0909341SAndroid Build Coastguard Worker    vadd.h          vr15,    vr15,    vr13
71*c0909341SAndroid Build Coastguard Worker.endif
72*c0909341SAndroid Build Coastguard Worker    addi.d          t8,      sp,      2
73*c0909341SAndroid Build Coastguard Worker    vst             vr5,     t8,      0    //store v
74*c0909341SAndroid Build Coastguard Worker.if \w == 16
75*c0909341SAndroid Build Coastguard Worker    vst             vr15,    t8,      16
76*c0909341SAndroid Build Coastguard Worker.endif
77*c0909341SAndroid Build Coastguard Worker    vreplvei.h      vr20,    vr2,     3    //c
78*c0909341SAndroid Build Coastguard Worker    vsle.hu         vr6,     vr5,     vr20
79*c0909341SAndroid Build Coastguard Worker.if \w == 16
80*c0909341SAndroid Build Coastguard Worker    vsle.hu         vr16,    vr15,    vr20
81*c0909341SAndroid Build Coastguard Worker    vpickev.b       vr21,    vr16,    vr6
82*c0909341SAndroid Build Coastguard Worker.endif
83*c0909341SAndroid Build Coastguard Worker.if \w <= 8
84*c0909341SAndroid Build Coastguard Worker    vmskltz.h       vr10,    vr6
85*c0909341SAndroid Build Coastguard Worker.else
86*c0909341SAndroid Build Coastguard Worker    vmskltz.b       vr10,    vr21
87*c0909341SAndroid Build Coastguard Worker.endif
88*c0909341SAndroid Build Coastguard Worker    beqz            t1,      .renorm\()\w
89*c0909341SAndroid Build Coastguard Worker
90*c0909341SAndroid Build Coastguard Worker    // update_cdf
91*c0909341SAndroid Build Coastguard Worker    alsl.d          t1,      a2,      a1,   1
92*c0909341SAndroid Build Coastguard Worker    ld.h            t2,      t1,      0    //count
93*c0909341SAndroid Build Coastguard Worker    srli.w          t3,      t2,      4    //count >> 4
94*c0909341SAndroid Build Coastguard Worker.if \w == 16
95*c0909341SAndroid Build Coastguard Worker    addi.w          t3,      t3,      5    //rate
96*c0909341SAndroid Build Coastguard Worker.else
97*c0909341SAndroid Build Coastguard Worker    addi.w          t3,      t3,      4
98*c0909341SAndroid Build Coastguard Worker    li.w            t5,      2
99*c0909341SAndroid Build Coastguard Worker    sltu            t5,      t5,      a2
100*c0909341SAndroid Build Coastguard Worker    add.w           t3,      t3,      t5   //rate
101*c0909341SAndroid Build Coastguard Worker.endif
102*c0909341SAndroid Build Coastguard Worker    sltui           t5,      t2,      32
103*c0909341SAndroid Build Coastguard Worker    add.w           t2,      t2,      t5   //count + (count < 32)
104*c0909341SAndroid Build Coastguard Worker    vreplgr2vr.h    vr9,     t3
105*c0909341SAndroid Build Coastguard Worker    vseq.h          vr7,     vr7,     vr7
106*c0909341SAndroid Build Coastguard Worker    vavgr.hu        vr5,     vr6,     vr7  //i >= val ? -1 : 32768
107*c0909341SAndroid Build Coastguard Worker    vsub.h          vr5,     vr5,     vr1
108*c0909341SAndroid Build Coastguard Worker    vsub.h          vr8,     vr1,     vr6
109*c0909341SAndroid Build Coastguard Worker.if \w == 16
110*c0909341SAndroid Build Coastguard Worker    vavgr.hu        vr15,    vr16,    vr7
111*c0909341SAndroid Build Coastguard Worker    vsub.h          vr15,    vr15,    vr11
112*c0909341SAndroid Build Coastguard Worker    vsub.h          vr18,    vr11,    vr16
113*c0909341SAndroid Build Coastguard Worker.endif
114*c0909341SAndroid Build Coastguard Worker    vsra.h          vr5,     vr5,     vr9
115*c0909341SAndroid Build Coastguard Worker    vadd.h          vr8,     vr8,     vr5
116*c0909341SAndroid Build Coastguard Worker.if \w == 4
117*c0909341SAndroid Build Coastguard Worker    fst.d           f8,      a1,      0
118*c0909341SAndroid Build Coastguard Worker.else
119*c0909341SAndroid Build Coastguard Worker    vst             vr8,     a1,      0
120*c0909341SAndroid Build Coastguard Worker.endif
121*c0909341SAndroid Build Coastguard Worker.if \w == 16
122*c0909341SAndroid Build Coastguard Worker    vsra.h          vr15,    vr15,    vr9
123*c0909341SAndroid Build Coastguard Worker    vadd.h          vr18,    vr18,    vr15
124*c0909341SAndroid Build Coastguard Worker    vst             vr18,    a1,      16
125*c0909341SAndroid Build Coastguard Worker.endif
126*c0909341SAndroid Build Coastguard Worker    st.h            t2,      t1,      0
127*c0909341SAndroid Build Coastguard Worker
128*c0909341SAndroid Build Coastguard Worker.renorm\()\w:
129*c0909341SAndroid Build Coastguard Worker    vpickve2gr.h    t3,      vr10,    0
130*c0909341SAndroid Build Coastguard Worker    ctz.w           a7,      t3            // ret
131*c0909341SAndroid Build Coastguard Worker    alsl.d          t3,      a7,      t8,      1
132*c0909341SAndroid Build Coastguard Worker    ld.hu           t4,      t3,      0    // v
133*c0909341SAndroid Build Coastguard Worker    ld.hu           t5,      t3,      -2   // u
134*c0909341SAndroid Build Coastguard Worker    sub.w           t5,      t5,      t4   // rng
135*c0909341SAndroid Build Coastguard Worker    slli.d          t4,      t4,      48
136*c0909341SAndroid Build Coastguard Worker    vpickve2gr.d    t6,      vr2,     0
137*c0909341SAndroid Build Coastguard Worker    sub.d           t6,      t6,      t4   // dif
138*c0909341SAndroid Build Coastguard Worker    clz.w           t4,      t5            // d
139*c0909341SAndroid Build Coastguard Worker    xori            t4,      t4,      16   // d
140*c0909341SAndroid Build Coastguard Worker    sll.d           t6,      t6,      t4
141*c0909341SAndroid Build Coastguard Worker    ld.w            t0,      a0,      28   //cnt
142*c0909341SAndroid Build Coastguard Worker    sll.w           t5,      t5,      t4
143*c0909341SAndroid Build Coastguard Worker    sub.w           t7,      t0,      t4   // cnt-d
144*c0909341SAndroid Build Coastguard Worker    st.w            t5,      a0,      24   // store rng
145*c0909341SAndroid Build Coastguard Worker    bgeu            t0,      t4,      9f
146*c0909341SAndroid Build Coastguard Worker
147*c0909341SAndroid Build Coastguard Worker    // refill
148*c0909341SAndroid Build Coastguard Worker    ld.d            t0,      a0,      0    // buf_pos
149*c0909341SAndroid Build Coastguard Worker    ld.d            t1,      a0,      8    // buf_end
150*c0909341SAndroid Build Coastguard Worker    addi.d          t2,      t0,      8
151*c0909341SAndroid Build Coastguard Worker    bltu            t1,      t2,      2f
152*c0909341SAndroid Build Coastguard Worker
153*c0909341SAndroid Build Coastguard Worker    ld.d            t3,      t0,      0    // next_bits
154*c0909341SAndroid Build Coastguard Worker    addi.w          t1,      t7,      -48  // shift_bits = cnt + 16 (- 64)
155*c0909341SAndroid Build Coastguard Worker    nor             t3,      t3,      t3
156*c0909341SAndroid Build Coastguard Worker    sub.w           t2,      zero,    t1
157*c0909341SAndroid Build Coastguard Worker    revb.d          t3,      t3            // next_bits = bswap(next_bits)
158*c0909341SAndroid Build Coastguard Worker    srli.w          t2,      t2,      3    // num_bytes_read
159*c0909341SAndroid Build Coastguard Worker    srl.d           t3,      t3,      t1   // next_bits >>= (shift_bits & 63)
160*c0909341SAndroid Build Coastguard Worker    b               3f
161*c0909341SAndroid Build Coastguard Worker1:
162*c0909341SAndroid Build Coastguard Worker    addi.w          t3,      t7,      -48
163*c0909341SAndroid Build Coastguard Worker    srl.d           t3,      t3,      t3   // pad with ones
164*c0909341SAndroid Build Coastguard Worker    b               4f
165*c0909341SAndroid Build Coastguard Worker2:
166*c0909341SAndroid Build Coastguard Worker    bgeu            t0,      t1,      1b
167*c0909341SAndroid Build Coastguard Worker    ld.d            t3,      t1,      -8   // next_bits
168*c0909341SAndroid Build Coastguard Worker    sub.w           t2,      t2,      t1
169*c0909341SAndroid Build Coastguard Worker    sub.w           t1,      t1,      t0   // num_bytes_left
170*c0909341SAndroid Build Coastguard Worker    slli.w          t2,      t2,      3
171*c0909341SAndroid Build Coastguard Worker    srl.d           t3,      t3,      t2
172*c0909341SAndroid Build Coastguard Worker    addi.w          t2,      t7,      -48
173*c0909341SAndroid Build Coastguard Worker    nor             t3,      t3,      t3
174*c0909341SAndroid Build Coastguard Worker    sub.w           t4,      zero,    t2
175*c0909341SAndroid Build Coastguard Worker    revb.d          t3,      t3
176*c0909341SAndroid Build Coastguard Worker    srli.w          t4,      t4,      3
177*c0909341SAndroid Build Coastguard Worker    srl.d           t3,      t3,      t2
178*c0909341SAndroid Build Coastguard Worker    sltu            t2,      t1,      t4
179*c0909341SAndroid Build Coastguard Worker    maskeqz         t1,      t1,      t2
180*c0909341SAndroid Build Coastguard Worker    masknez         t2,      t4,      t2
181*c0909341SAndroid Build Coastguard Worker    or              t2,      t2,      t1   // num_bytes_read
182*c0909341SAndroid Build Coastguard Worker3:
183*c0909341SAndroid Build Coastguard Worker    slli.w          t1,      t2,      3
184*c0909341SAndroid Build Coastguard Worker    add.d           t0,      t0,      t2
185*c0909341SAndroid Build Coastguard Worker    add.w           t7,      t7,      t1   // cnt += num_bits_read
186*c0909341SAndroid Build Coastguard Worker    st.d            t0,      a0,      0
187*c0909341SAndroid Build Coastguard Worker4:
188*c0909341SAndroid Build Coastguard Worker    or              t6,      t6,      t3   // dif |= next_bits
189*c0909341SAndroid Build Coastguard Worker9:
190*c0909341SAndroid Build Coastguard Worker    st.w            t7,      a0,      28   // store cnt
191*c0909341SAndroid Build Coastguard Worker    st.d            t6,      a0,      16   // store dif
192*c0909341SAndroid Build Coastguard Worker    move            a0,      a7
193*c0909341SAndroid Build Coastguard Worker    addi.d          sp,      sp,      48
194*c0909341SAndroid Build Coastguard Worker.endm
195*c0909341SAndroid Build Coastguard Worker
196*c0909341SAndroid Build Coastguard Workerfunction msac_decode_symbol_adapt4_lsx
197*c0909341SAndroid Build Coastguard Worker    decode_symbol_adapt 4
198*c0909341SAndroid Build Coastguard Workerendfunc
199*c0909341SAndroid Build Coastguard Worker
200*c0909341SAndroid Build Coastguard Workerfunction msac_decode_symbol_adapt8_lsx
201*c0909341SAndroid Build Coastguard Worker    decode_symbol_adapt 8
202*c0909341SAndroid Build Coastguard Workerendfunc
203*c0909341SAndroid Build Coastguard Worker
204*c0909341SAndroid Build Coastguard Workerfunction msac_decode_symbol_adapt16_lsx
205*c0909341SAndroid Build Coastguard Worker    decode_symbol_adapt 16
206*c0909341SAndroid Build Coastguard Workerendfunc
207*c0909341SAndroid Build Coastguard Worker
208*c0909341SAndroid Build Coastguard Workerfunction msac_decode_bool_lsx
209*c0909341SAndroid Build Coastguard Worker    ld.w            t0,      a0,      24   // rng
210*c0909341SAndroid Build Coastguard Worker    srli.w          a1,      a1,      6
211*c0909341SAndroid Build Coastguard Worker    ld.d            t1,      a0,      16   // dif
212*c0909341SAndroid Build Coastguard Worker    srli.w          t2,      t0,      8    // r >> 8
213*c0909341SAndroid Build Coastguard Worker    mul.w           t2,      t2,      a1
214*c0909341SAndroid Build Coastguard Worker    ld.w            a5,      a0,      28   // cnt
215*c0909341SAndroid Build Coastguard Worker    srli.w          t2,      t2,      1
216*c0909341SAndroid Build Coastguard Worker    addi.w          t2,      t2,      4    // v
217*c0909341SAndroid Build Coastguard Worker    slli.d          t3,      t2,      48   // vw
218*c0909341SAndroid Build Coastguard Worker    sltu            t4,      t1,      t3
219*c0909341SAndroid Build Coastguard Worker    move            t8,      t4            // ret
220*c0909341SAndroid Build Coastguard Worker    xori            t4,      t4,      1
221*c0909341SAndroid Build Coastguard Worker    maskeqz         t6,      t3,      t4   // if (ret) vw
222*c0909341SAndroid Build Coastguard Worker    sub.d           t6,      t1,      t6   // dif
223*c0909341SAndroid Build Coastguard Worker    slli.w          t5,      t2,      1
224*c0909341SAndroid Build Coastguard Worker    sub.w           t5,      t0,      t5   // r - 2v
225*c0909341SAndroid Build Coastguard Worker    maskeqz         t7,      t5,      t4   // if (ret) r - 2v
226*c0909341SAndroid Build Coastguard Worker    add.w           t5,      t2,      t7   // v(rng)
227*c0909341SAndroid Build Coastguard Worker
228*c0909341SAndroid Build Coastguard Worker    // renorm
229*c0909341SAndroid Build Coastguard Worker    clz.w           t4,      t5            // d
230*c0909341SAndroid Build Coastguard Worker    xori            t4,      t4,      16   // d
231*c0909341SAndroid Build Coastguard Worker    sll.d           t6,      t6,      t4
232*c0909341SAndroid Build Coastguard Worker    sll.w           t5,      t5,      t4
233*c0909341SAndroid Build Coastguard Worker    sub.w           t7,      a5,      t4   // cnt-d
234*c0909341SAndroid Build Coastguard Worker    st.w            t5,      a0,      24   // store rng
235*c0909341SAndroid Build Coastguard Worker    bgeu            a5,      t4,      9f
236*c0909341SAndroid Build Coastguard Worker
237*c0909341SAndroid Build Coastguard Worker    // refill
238*c0909341SAndroid Build Coastguard Worker    ld.d            t0,      a0,      0    // buf_pos
239*c0909341SAndroid Build Coastguard Worker    ld.d            t1,      a0,      8    // buf_end
240*c0909341SAndroid Build Coastguard Worker    addi.d          t2,      t0,      8
241*c0909341SAndroid Build Coastguard Worker    bltu            t1,      t2,      2f
242*c0909341SAndroid Build Coastguard Worker
243*c0909341SAndroid Build Coastguard Worker    ld.d            t3,      t0,      0    // next_bits
244*c0909341SAndroid Build Coastguard Worker    addi.w          t1,      t7,      -48  // shift_bits = cnt + 16 (- 64)
245*c0909341SAndroid Build Coastguard Worker    nor             t3,      t3,      t3
246*c0909341SAndroid Build Coastguard Worker    sub.w           t2,      zero,    t1
247*c0909341SAndroid Build Coastguard Worker    revb.d          t3,      t3            // next_bits = bswap(next_bits)
248*c0909341SAndroid Build Coastguard Worker    srli.w          t2,      t2,      3    // num_bytes_read
249*c0909341SAndroid Build Coastguard Worker    srl.d           t3,      t3,      t1   // next_bits >>= (shift_bits & 63)
250*c0909341SAndroid Build Coastguard Worker    b               3f
251*c0909341SAndroid Build Coastguard Worker1:
252*c0909341SAndroid Build Coastguard Worker    addi.w          t3,      t7,      -48
253*c0909341SAndroid Build Coastguard Worker    srl.d           t3,      t3,      t3   // pad with ones
254*c0909341SAndroid Build Coastguard Worker    b               4f
255*c0909341SAndroid Build Coastguard Worker2:
256*c0909341SAndroid Build Coastguard Worker    bgeu            t0,      t1,      1b
257*c0909341SAndroid Build Coastguard Worker    ld.d            t3,      t1,      -8   // next_bits
258*c0909341SAndroid Build Coastguard Worker    sub.w           t2,      t2,      t1
259*c0909341SAndroid Build Coastguard Worker    sub.w           t1,      t1,      t0   // num_bytes_left
260*c0909341SAndroid Build Coastguard Worker    slli.w          t2,      t2,      3
261*c0909341SAndroid Build Coastguard Worker    srl.d           t3,      t3,      t2
262*c0909341SAndroid Build Coastguard Worker    addi.w          t2,      t7,      -48
263*c0909341SAndroid Build Coastguard Worker    nor             t3,      t3,      t3
264*c0909341SAndroid Build Coastguard Worker    sub.w           t4,      zero,    t2
265*c0909341SAndroid Build Coastguard Worker    revb.d          t3,      t3
266*c0909341SAndroid Build Coastguard Worker    srli.w          t4,      t4,      3
267*c0909341SAndroid Build Coastguard Worker    srl.d           t3,      t3,      t2
268*c0909341SAndroid Build Coastguard Worker    sltu            t2,      t1,      t4
269*c0909341SAndroid Build Coastguard Worker    maskeqz         t1,      t1,      t2
270*c0909341SAndroid Build Coastguard Worker    masknez         t2,      t4,      t2
271*c0909341SAndroid Build Coastguard Worker    or              t2,      t2,      t1   // num_bytes_read
272*c0909341SAndroid Build Coastguard Worker3:
273*c0909341SAndroid Build Coastguard Worker    slli.w          t1,      t2,      3
274*c0909341SAndroid Build Coastguard Worker    add.d           t0,      t0,      t2
275*c0909341SAndroid Build Coastguard Worker    add.w           t7,      t7,      t1   // cnt += num_bits_read
276*c0909341SAndroid Build Coastguard Worker    st.d            t0,      a0,      0
277*c0909341SAndroid Build Coastguard Worker4:
278*c0909341SAndroid Build Coastguard Worker    or              t6,      t6,      t3   // dif |= next_bits
279*c0909341SAndroid Build Coastguard Worker9:
280*c0909341SAndroid Build Coastguard Worker    st.w            t7,      a0,      28   // store cnt
281*c0909341SAndroid Build Coastguard Worker    st.d            t6,      a0,      16   // store dif
282*c0909341SAndroid Build Coastguard Worker    move            a0,      t8
283*c0909341SAndroid Build Coastguard Workerendfunc
284*c0909341SAndroid Build Coastguard Worker
285*c0909341SAndroid Build Coastguard Workerfunction msac_decode_bool_equi_lsx
286*c0909341SAndroid Build Coastguard Worker    ld.w            t0,      a0,      24   // rng
287*c0909341SAndroid Build Coastguard Worker    ld.d            t1,      a0,      16   // dif
288*c0909341SAndroid Build Coastguard Worker    ld.w            a5,      a0,      28   // cnt
289*c0909341SAndroid Build Coastguard Worker    srli.w          t2,      t0,      8    // r >> 8
290*c0909341SAndroid Build Coastguard Worker    slli.w          t2,      t2,      7
291*c0909341SAndroid Build Coastguard Worker    addi.w          t2,      t2,      4    // v
292*c0909341SAndroid Build Coastguard Worker
293*c0909341SAndroid Build Coastguard Worker    slli.d          t3,      t2,      48   // vw
294*c0909341SAndroid Build Coastguard Worker    sltu            t4,      t1,      t3
295*c0909341SAndroid Build Coastguard Worker    move            t8,      t4            // ret
296*c0909341SAndroid Build Coastguard Worker    xori            t4,      t4,      1
297*c0909341SAndroid Build Coastguard Worker    maskeqz         t6,      t3,      t4   // if (ret) vw
298*c0909341SAndroid Build Coastguard Worker    sub.d           t6,      t1,      t6   // dif
299*c0909341SAndroid Build Coastguard Worker    slli.w          t5,      t2,      1
300*c0909341SAndroid Build Coastguard Worker    sub.w           t5,      t0,      t5   // r - 2v
301*c0909341SAndroid Build Coastguard Worker    maskeqz         t7,      t5,      t4   // if (ret) r - 2v
302*c0909341SAndroid Build Coastguard Worker    add.w           t5,      t2,      t7   // v(rng)
303*c0909341SAndroid Build Coastguard Worker
304*c0909341SAndroid Build Coastguard Worker    // renorm
305*c0909341SAndroid Build Coastguard Worker    clz.w           t4,      t5            // d
306*c0909341SAndroid Build Coastguard Worker    xori            t4,      t4,      16   // d
307*c0909341SAndroid Build Coastguard Worker    sll.d           t6,      t6,      t4
308*c0909341SAndroid Build Coastguard Worker    sll.w           t5,      t5,      t4
309*c0909341SAndroid Build Coastguard Worker    sub.w           t7,      a5,      t4   // cnt-d
310*c0909341SAndroid Build Coastguard Worker    st.w            t5,      a0,      24   // store rng
311*c0909341SAndroid Build Coastguard Worker    bgeu            a5,      t4,      9f
312*c0909341SAndroid Build Coastguard Worker
313*c0909341SAndroid Build Coastguard Worker    // refill
314*c0909341SAndroid Build Coastguard Worker    ld.d            t0,      a0,      0    // buf_pos
315*c0909341SAndroid Build Coastguard Worker    ld.d            t1,      a0,      8    // buf_end
316*c0909341SAndroid Build Coastguard Worker    addi.d          t2,      t0,      8
317*c0909341SAndroid Build Coastguard Worker    bltu            t1,      t2,      2f
318*c0909341SAndroid Build Coastguard Worker
319*c0909341SAndroid Build Coastguard Worker    ld.d            t3,      t0,      0    // next_bits
320*c0909341SAndroid Build Coastguard Worker    addi.w          t1,      t7,      -48  // shift_bits = cnt + 16 (- 64)
321*c0909341SAndroid Build Coastguard Worker    nor             t3,      t3,      t3
322*c0909341SAndroid Build Coastguard Worker    sub.w           t2,      zero,    t1
323*c0909341SAndroid Build Coastguard Worker    revb.d          t3,      t3            // next_bits = bswap(next_bits)
324*c0909341SAndroid Build Coastguard Worker    srli.w          t2,      t2,      3    // num_bytes_read
325*c0909341SAndroid Build Coastguard Worker    srl.d           t3,      t3,      t1   // next_bits >>= (shift_bits & 63)
326*c0909341SAndroid Build Coastguard Worker    b               3f
327*c0909341SAndroid Build Coastguard Worker1:
328*c0909341SAndroid Build Coastguard Worker    addi.w          t3,      t7,      -48
329*c0909341SAndroid Build Coastguard Worker    srl.d           t3,      t3,      t3   // pad with ones
330*c0909341SAndroid Build Coastguard Worker    b               4f
331*c0909341SAndroid Build Coastguard Worker2:
332*c0909341SAndroid Build Coastguard Worker    bgeu            t0,      t1,      1b
333*c0909341SAndroid Build Coastguard Worker    ld.d            t3,      t1,      -8   // next_bits
334*c0909341SAndroid Build Coastguard Worker    sub.w           t2,      t2,      t1
335*c0909341SAndroid Build Coastguard Worker    sub.w           t1,      t1,      t0   // num_bytes_left
336*c0909341SAndroid Build Coastguard Worker    slli.w          t2,      t2,      3
337*c0909341SAndroid Build Coastguard Worker    srl.d           t3,      t3,      t2
338*c0909341SAndroid Build Coastguard Worker    addi.w          t2,      t7,      -48
339*c0909341SAndroid Build Coastguard Worker    nor             t3,      t3,      t3
340*c0909341SAndroid Build Coastguard Worker    sub.w           t4,      zero,    t2
341*c0909341SAndroid Build Coastguard Worker    revb.d          t3,      t3
342*c0909341SAndroid Build Coastguard Worker    srli.w          t4,      t4,      3
343*c0909341SAndroid Build Coastguard Worker    srl.d           t3,      t3,      t2
344*c0909341SAndroid Build Coastguard Worker    sltu            t2,      t1,      t4
345*c0909341SAndroid Build Coastguard Worker    maskeqz         t1,      t1,      t2
346*c0909341SAndroid Build Coastguard Worker    masknez         t2,      t4,      t2
347*c0909341SAndroid Build Coastguard Worker    or              t2,      t2,      t1   // num_bytes_read
348*c0909341SAndroid Build Coastguard Worker3:
349*c0909341SAndroid Build Coastguard Worker    slli.w          t1,      t2,      3
350*c0909341SAndroid Build Coastguard Worker    add.d           t0,      t0,      t2
351*c0909341SAndroid Build Coastguard Worker    add.w           t7,      t7,      t1   // cnt += num_bits_read
352*c0909341SAndroid Build Coastguard Worker    st.d            t0,      a0,      0
353*c0909341SAndroid Build Coastguard Worker4:
354*c0909341SAndroid Build Coastguard Worker    or              t6,      t6,      t3   // dif |= next_bits
355*c0909341SAndroid Build Coastguard Worker9:
356*c0909341SAndroid Build Coastguard Worker    st.w            t7,      a0,      28   // store cnt
357*c0909341SAndroid Build Coastguard Worker    st.d            t6,      a0,      16   // store dif
358*c0909341SAndroid Build Coastguard Worker    move            a0,      t8
359*c0909341SAndroid Build Coastguard Workerendfunc
360*c0909341SAndroid Build Coastguard Worker
361*c0909341SAndroid Build Coastguard Workerfunction msac_decode_bool_adapt_lsx
362*c0909341SAndroid Build Coastguard Worker    ld.hu           a3,      a1,      0    // cdf[0] /f
363*c0909341SAndroid Build Coastguard Worker    ld.w            t0,      a0,      24   // rng
364*c0909341SAndroid Build Coastguard Worker    ld.d            t1,      a0,      16   // dif
365*c0909341SAndroid Build Coastguard Worker    srli.w          t2,      t0,      8    // r >> 8
366*c0909341SAndroid Build Coastguard Worker    srli.w          a7,      a3,      6
367*c0909341SAndroid Build Coastguard Worker    mul.w           t2,      t2,      a7
368*c0909341SAndroid Build Coastguard Worker    ld.w            a4,      a0,      32   // allow_update_cdf
369*c0909341SAndroid Build Coastguard Worker    ld.w            a5,      a0,      28   // cnt
370*c0909341SAndroid Build Coastguard Worker    srli.w          t2,      t2,      1
371*c0909341SAndroid Build Coastguard Worker    addi.w          t2,      t2,      4    // v
372*c0909341SAndroid Build Coastguard Worker    slli.d          t3,      t2,      48   // vw
373*c0909341SAndroid Build Coastguard Worker    sltu            t4,      t1,      t3
374*c0909341SAndroid Build Coastguard Worker    move            t8,      t4            // bit
375*c0909341SAndroid Build Coastguard Worker    xori            t4,      t4,      1
376*c0909341SAndroid Build Coastguard Worker    maskeqz         t6,      t3,      t4   // if (ret) vw
377*c0909341SAndroid Build Coastguard Worker    sub.d           t6,      t1,      t6   // dif
378*c0909341SAndroid Build Coastguard Worker    slli.w          t5,      t2,      1
379*c0909341SAndroid Build Coastguard Worker    sub.w           t5,      t0,      t5   // r - 2v
380*c0909341SAndroid Build Coastguard Worker    maskeqz         t7,      t5,      t4   // if (ret) r - 2v
381*c0909341SAndroid Build Coastguard Worker    add.w           t5,      t2,      t7   // v(rng)
382*c0909341SAndroid Build Coastguard Worker    beqz            a4,      .renorm
383*c0909341SAndroid Build Coastguard Worker
384*c0909341SAndroid Build Coastguard Worker    // update_cdf
385*c0909341SAndroid Build Coastguard Worker    ld.hu           t0,      a1,      2    // cdf[1]
386*c0909341SAndroid Build Coastguard Worker    srli.w          t1,      t0,      4
387*c0909341SAndroid Build Coastguard Worker    addi.w          t1,      t1,      4    // rate
388*c0909341SAndroid Build Coastguard Worker    sltui           t2,      t0,      32   // count < 32
389*c0909341SAndroid Build Coastguard Worker    add.w           t0,      t0,      t2   // count + (count < 32)
390*c0909341SAndroid Build Coastguard Worker    sub.w           a3,      a3,      t8   // cdf[0] -= bit
391*c0909341SAndroid Build Coastguard Worker    slli.w          t4,      t8,      15
392*c0909341SAndroid Build Coastguard Worker    sub.w           t7,      a3,      t4   // cdf[0] - bit - 32768
393*c0909341SAndroid Build Coastguard Worker    sra.w           t7,      t7,      t1   // (cdf[0] - bit - 32768) >> rate
394*c0909341SAndroid Build Coastguard Worker    sub.w           t7,      a3,      t7   // cdf[0]
395*c0909341SAndroid Build Coastguard Worker    st.h            t7,      a1,      0
396*c0909341SAndroid Build Coastguard Worker    st.h            t0,      a1,      2
397*c0909341SAndroid Build Coastguard Worker
398*c0909341SAndroid Build Coastguard Worker.renorm:
399*c0909341SAndroid Build Coastguard Worker    clz.w           t4,      t5            // d
400*c0909341SAndroid Build Coastguard Worker    xori            t4,      t4,      16   // d
401*c0909341SAndroid Build Coastguard Worker    sll.d           t6,      t6,      t4
402*c0909341SAndroid Build Coastguard Worker    sll.w           t5,      t5,      t4
403*c0909341SAndroid Build Coastguard Worker    sub.w           t7,      a5,      t4   // cnt-d
404*c0909341SAndroid Build Coastguard Worker    st.w            t5,      a0,      24   // store rng
405*c0909341SAndroid Build Coastguard Worker    bgeu            a5,      t4,      9f
406*c0909341SAndroid Build Coastguard Worker
407*c0909341SAndroid Build Coastguard Worker    // refill
408*c0909341SAndroid Build Coastguard Worker    ld.d            t0,      a0,      0    // buf_pos
409*c0909341SAndroid Build Coastguard Worker    ld.d            t1,      a0,      8    // buf_end
410*c0909341SAndroid Build Coastguard Worker    addi.d          t2,      t0,      8
411*c0909341SAndroid Build Coastguard Worker    bltu            t1,      t2,      2f
412*c0909341SAndroid Build Coastguard Worker
413*c0909341SAndroid Build Coastguard Worker    ld.d            t3,      t0,      0    // next_bits
414*c0909341SAndroid Build Coastguard Worker    addi.w          t1,      t7,      -48  // shift_bits = cnt + 16 (- 64)
415*c0909341SAndroid Build Coastguard Worker    nor             t3,      t3,      t3
416*c0909341SAndroid Build Coastguard Worker    sub.w           t2,      zero,    t1
417*c0909341SAndroid Build Coastguard Worker    revb.d          t3,      t3            // next_bits = bswap(next_bits)
418*c0909341SAndroid Build Coastguard Worker    srli.w          t2,      t2,      3    // num_bytes_read
419*c0909341SAndroid Build Coastguard Worker    srl.d           t3,      t3,      t1   // next_bits >>= (shift_bits & 63)
420*c0909341SAndroid Build Coastguard Worker    b               3f
421*c0909341SAndroid Build Coastguard Worker1:
422*c0909341SAndroid Build Coastguard Worker    addi.w          t3,      t7,      -48
423*c0909341SAndroid Build Coastguard Worker    srl.d           t3,      t3,      t3   // pad with ones
424*c0909341SAndroid Build Coastguard Worker    b               4f
425*c0909341SAndroid Build Coastguard Worker2:
426*c0909341SAndroid Build Coastguard Worker    bgeu            t0,      t1,      1b
427*c0909341SAndroid Build Coastguard Worker    ld.d            t3,      t1,      -8   // next_bits
428*c0909341SAndroid Build Coastguard Worker    sub.w           t2,      t2,      t1
429*c0909341SAndroid Build Coastguard Worker    sub.w           t1,      t1,      t0   // num_bytes_left
430*c0909341SAndroid Build Coastguard Worker    slli.w          t2,      t2,      3
431*c0909341SAndroid Build Coastguard Worker    srl.d           t3,      t3,      t2
432*c0909341SAndroid Build Coastguard Worker    addi.w          t2,      t7,      -48
433*c0909341SAndroid Build Coastguard Worker    nor             t3,      t3,      t3
434*c0909341SAndroid Build Coastguard Worker    sub.w           t4,      zero,    t2
435*c0909341SAndroid Build Coastguard Worker    revb.d          t3,      t3
436*c0909341SAndroid Build Coastguard Worker    srli.w          t4,      t4,      3
437*c0909341SAndroid Build Coastguard Worker    srl.d           t3,      t3,      t2
438*c0909341SAndroid Build Coastguard Worker    sltu            t2,      t1,      t4
439*c0909341SAndroid Build Coastguard Worker    maskeqz         t1,      t1,      t2
440*c0909341SAndroid Build Coastguard Worker    masknez         t2,      t4,      t2
441*c0909341SAndroid Build Coastguard Worker    or              t2,      t2,      t1   // num_bytes_read
442*c0909341SAndroid Build Coastguard Worker3:
443*c0909341SAndroid Build Coastguard Worker    slli.w          t1,      t2,      3
444*c0909341SAndroid Build Coastguard Worker    add.d           t0,      t0,      t2
445*c0909341SAndroid Build Coastguard Worker    add.w           t7,      t7,      t1   // cnt += num_bits_read
446*c0909341SAndroid Build Coastguard Worker    st.d            t0,      a0,      0
447*c0909341SAndroid Build Coastguard Worker4:
448*c0909341SAndroid Build Coastguard Worker    or              t6,      t6,      t3   // dif |= next_bits
449*c0909341SAndroid Build Coastguard Worker9:
450*c0909341SAndroid Build Coastguard Worker    st.w            t7,      a0,      28   // store cnt
451*c0909341SAndroid Build Coastguard Worker    st.d            t6,      a0,      16   // store dif
452*c0909341SAndroid Build Coastguard Worker    move            a0,      t8
453*c0909341SAndroid Build Coastguard Workerendfunc
454*c0909341SAndroid Build Coastguard Worker
455*c0909341SAndroid Build Coastguard Worker.macro HI_TOK allow_update_cdf
456*c0909341SAndroid Build Coastguard Worker.\allow_update_cdf\()_hi_tok_lsx_start:
457*c0909341SAndroid Build Coastguard Worker.if \allow_update_cdf == 1
458*c0909341SAndroid Build Coastguard Worker    ld.hu        a4,    a1,    0x06 // cdf[3]
459*c0909341SAndroid Build Coastguard Worker.endif
460*c0909341SAndroid Build Coastguard Worker    vor.v        vr1,   vr0,   vr0
461*c0909341SAndroid Build Coastguard Worker    vsrli.h      vr1,   vr1,   0x06 // cdf[val] >> EC_PROB_SHIFT
462*c0909341SAndroid Build Coastguard Worker    vstelm.h     vr2,   sp,    0, 0 // -0x1a
463*c0909341SAndroid Build Coastguard Worker    vand.v       vr2,   vr2,   vr4  // (8 x rng) & 0xff00
464*c0909341SAndroid Build Coastguard Worker    vslli.h      vr1,   vr1,   0x07
465*c0909341SAndroid Build Coastguard Worker    vmuh.hu      vr1,   vr1,   vr2
466*c0909341SAndroid Build Coastguard Worker    vadd.h       vr1,   vr1,   vr5 // v += EC_MIN_PROB/* 4 */ * ((unsigned)n_symbols/* 3 */ - val);
467*c0909341SAndroid Build Coastguard Worker    vst          vr1,   sp,    0x02 // -0x18
468*c0909341SAndroid Build Coastguard Worker    vssub.hu     vr1,   vr1,   vr3 // v - c
469*c0909341SAndroid Build Coastguard Worker    vseqi.h      vr1,   vr1,   0
470*c0909341SAndroid Build Coastguard Worker.if \allow_update_cdf == 1
471*c0909341SAndroid Build Coastguard Worker    addi.d       t4,    a4,    0x50
472*c0909341SAndroid Build Coastguard Worker    srli.d       t4,    t4,    0x04
473*c0909341SAndroid Build Coastguard Worker    sltui        t7,    a4,    32
474*c0909341SAndroid Build Coastguard Worker    add.w        a4,    a4,    t7
475*c0909341SAndroid Build Coastguard Worker
476*c0909341SAndroid Build Coastguard Worker    vreplgr2vr.h vr7,   t4
477*c0909341SAndroid Build Coastguard Worker    vavgr.hu     vr9,   vr8,   vr1
478*c0909341SAndroid Build Coastguard Worker    vsub.h       vr9,   vr9,   vr0
479*c0909341SAndroid Build Coastguard Worker    vsub.h       vr0,   vr0,   vr1
480*c0909341SAndroid Build Coastguard Worker    vsra.h       vr9,   vr9,   vr7
481*c0909341SAndroid Build Coastguard Worker    vadd.h       vr0,   vr0,   vr9
482*c0909341SAndroid Build Coastguard Worker    vstelm.d     vr0,   a1,    0,  0
483*c0909341SAndroid Build Coastguard Worker    st.h         a4,    a1,    0x06
484*c0909341SAndroid Build Coastguard Worker.endif
485*c0909341SAndroid Build Coastguard Worker    vmsknz.b     vr7,   vr1
486*c0909341SAndroid Build Coastguard Worker    movfr2gr.s   t4,    f7
487*c0909341SAndroid Build Coastguard Worker    ctz.w        t4,    t4 // loop_times * 2
488*c0909341SAndroid Build Coastguard Worker    addi.d       t7,    t4,    2
489*c0909341SAndroid Build Coastguard Worker    ldx.hu       t6,    sp,    t4  // u
490*c0909341SAndroid Build Coastguard Worker    ldx.hu       t5,    sp,    t7  // v
491*c0909341SAndroid Build Coastguard Worker    addi.w       t3,    t3,    0x05
492*c0909341SAndroid Build Coastguard Worker    addi.w       t4,    t4,   -0x05 // if t4 == 3, continue
493*c0909341SAndroid Build Coastguard Worker    sub.w        t6,    t6,    t5   // u - v , rng for ctx_norm
494*c0909341SAndroid Build Coastguard Worker    slli.d       t5,    t5,    0x30 //  (ec_win)v << (EC_WIN_SIZE - 16)
495*c0909341SAndroid Build Coastguard Worker    sub.d        t1,    t1,    t5   //  s->dif - ((ec_win)v << (EC_WIN_SIZE - 16))
496*c0909341SAndroid Build Coastguard Worker    // Init ctx_norm  param
497*c0909341SAndroid Build Coastguard Worker    clz.w        t7,    t6
498*c0909341SAndroid Build Coastguard Worker    xori         t7,    t7,    0x1f
499*c0909341SAndroid Build Coastguard Worker    xori         t7,    t7,    0x0f //  d = 15 ^ (31 ^ clz(rng));
500*c0909341SAndroid Build Coastguard Worker    sll.d        t1,    t1,    t7   //  dif << d
501*c0909341SAndroid Build Coastguard Worker    sll.d        t6,    t6,    t7   //  rng << d
502*c0909341SAndroid Build Coastguard Worker    // update vr2 8 x rng
503*c0909341SAndroid Build Coastguard Worker    vreplgr2vr.h vr2,   t6
504*c0909341SAndroid Build Coastguard Worker    vreplvei.h   vr2,   vr2,   0
505*c0909341SAndroid Build Coastguard Worker    st.w         t6,    a0,    0x18 // store rng
506*c0909341SAndroid Build Coastguard Worker    move         t0,    t2
507*c0909341SAndroid Build Coastguard Worker    sub.w        t2,    t2,    t7   // cnt - d
508*c0909341SAndroid Build Coastguard Worker    bgeu         t0,    t7,    .\allow_update_cdf\()_hi_tok_lsx_ctx_norm_end     // if ((unsigned)cnt < (unsigned)d)  goto ctx_norm_end
509*c0909341SAndroid Build Coastguard Worker    // Step into ctx_fill
510*c0909341SAndroid Build Coastguard Worker    ld.d         t5,    a0,    0x00 // buf_pos
511*c0909341SAndroid Build Coastguard Worker    ld.d         t6,    a0,    0x08 // end_pos
512*c0909341SAndroid Build Coastguard Worker    addi.d       t7,    t5,    0x08 // buf_pos + 8
513*c0909341SAndroid Build Coastguard Worker    sub.d        t7,    t7,    t6   // (buf_pos + 8) - end_pos
514*c0909341SAndroid Build Coastguard Worker    blt          zero,  t7,    .\allow_update_cdf\()_hi_tok_lsx_ctx_refill_eob
515*c0909341SAndroid Build Coastguard Worker    // (end_pos - buf_pos) >= 8
516*c0909341SAndroid Build Coastguard Worker    ld.d         t6,    t5,    0x00 // load buf_pos[0]~buf_pos[7]
517*c0909341SAndroid Build Coastguard Worker    addi.w       t7,    t2,   -0x30 // cnt - 0x30
518*c0909341SAndroid Build Coastguard Worker    nor          t6,    t6,    t6   // not buf data
519*c0909341SAndroid Build Coastguard Worker    revb.d       t6,    t6          // Byte reversal
520*c0909341SAndroid Build Coastguard Worker    srl.d        t6,    t6,    t7   // Replace left shift with right shift
521*c0909341SAndroid Build Coastguard Worker    sub.w        t7,    zero,  t7   // neg
522*c0909341SAndroid Build Coastguard Worker    srli.w       t7,    t7,    0x03 // Loop times
523*c0909341SAndroid Build Coastguard Worker    or           t1,    t1,    t6   // dif |= (ec_win)(*buf_pos++ ^ 0xff) << c
524*c0909341SAndroid Build Coastguard Worker    b            .\allow_update_cdf\()_hi_tok_lsx_ctx_refill_end
525*c0909341SAndroid Build Coastguard Worker.\allow_update_cdf\()_hi_tok_lsx_ctx_refill_eob:
526*c0909341SAndroid Build Coastguard Worker    bge          t5,    t6,    .\allow_update_cdf\()_hi_tok_lsx_ctx_refill_one
527*c0909341SAndroid Build Coastguard Worker    // end_pos - buf_pos < 8 && buf_pos < end_pos
528*c0909341SAndroid Build Coastguard Worker    ld.d         t0,    t6,   -0x08
529*c0909341SAndroid Build Coastguard Worker    slli.d       t7,    t7,    0x03
530*c0909341SAndroid Build Coastguard Worker    srl.d        t6,    t0,    t7   // Retrieve the buf data and remove the excess data
531*c0909341SAndroid Build Coastguard Worker    addi.w       t7,    t2,   -0x30 // cnt - 0x30
532*c0909341SAndroid Build Coastguard Worker    nor          t6,    t6,    t6   // not
533*c0909341SAndroid Build Coastguard Worker    revb.d       t6,    t6          // Byte reversal
534*c0909341SAndroid Build Coastguard Worker    srl.d        t6,    t6,    t7   // Replace left shift with right shift
535*c0909341SAndroid Build Coastguard Worker    sub.w        t7,    zero,  t7   // neg
536*c0909341SAndroid Build Coastguard Worker    or           t1,    t1,    t6   // dif |= (ec_win)(*buf_pos++ ^ 0xff) << c
537*c0909341SAndroid Build Coastguard Worker    ld.d         t6,    a0,    0x08 // end_pos
538*c0909341SAndroid Build Coastguard Worker    srli.w       t7,    t7,    0x03 // Loop times
539*c0909341SAndroid Build Coastguard Worker    sub.d        t6,    t6,    t5   // end_pos - buf_pos
540*c0909341SAndroid Build Coastguard Worker    slt          t0,    t6,    t7
541*c0909341SAndroid Build Coastguard Worker    maskeqz      a3,    t6,    t0   // min(loop_times, end_pos - buf_pos)
542*c0909341SAndroid Build Coastguard Worker    masknez      t0,    t7,    t0
543*c0909341SAndroid Build Coastguard Worker    or           t7,    a3,    t0
544*c0909341SAndroid Build Coastguard Worker    b            .\allow_update_cdf\()_hi_tok_lsx_ctx_refill_end
545*c0909341SAndroid Build Coastguard Worker.\allow_update_cdf\()_hi_tok_lsx_ctx_refill_one:
546*c0909341SAndroid Build Coastguard Worker    // buf_pos >= end_pos
547*c0909341SAndroid Build Coastguard Worker    addi.w       t7,    t2,   -0x10
548*c0909341SAndroid Build Coastguard Worker    andi         t7,    t7,    0xf
549*c0909341SAndroid Build Coastguard Worker    nor          t0,    zero,  zero
550*c0909341SAndroid Build Coastguard Worker    srl.d        t0,    t0,    t7
551*c0909341SAndroid Build Coastguard Worker    or           t1,    t1,    t0 // dif |= ~(~(ec_win)0xff << c);
552*c0909341SAndroid Build Coastguard Worker    b            .\allow_update_cdf\()_hi_tok_lsx_ctx_norm_end
553*c0909341SAndroid Build Coastguard Worker.\allow_update_cdf\()_hi_tok_lsx_ctx_refill_end:
554*c0909341SAndroid Build Coastguard Worker    add.d        t5,    t5,    t7        // buf_pos + Loop_times
555*c0909341SAndroid Build Coastguard Worker    st.d         t5,    a0,    0x00      // Store buf_pos
556*c0909341SAndroid Build Coastguard Worker    alsl.w       t2,    t7,    t2,  0x03 // update cnt
557*c0909341SAndroid Build Coastguard Worker.\allow_update_cdf\()_hi_tok_lsx_ctx_norm_end:
558*c0909341SAndroid Build Coastguard Worker    srli.d       t7,    t1,    0x30
559*c0909341SAndroid Build Coastguard Worker    vreplgr2vr.h vr3,   t7        // broadcast the high 16 bits of dif
560*c0909341SAndroid Build Coastguard Worker    add.w        t3,    t4,    t3 // update control parameter
561*c0909341SAndroid Build Coastguard Worker    beqz         t3,    .\allow_update_cdf\()_hi_tok_lsx_end // control loop for at most 4 times.
562*c0909341SAndroid Build Coastguard Worker    blt          zero,  t4,    .\allow_update_cdf\()_hi_tok_lsx_start // tok_br == 3
563*c0909341SAndroid Build Coastguard Worker.\allow_update_cdf\()_hi_tok_lsx_end:
564*c0909341SAndroid Build Coastguard Worker    addi.d       t3,    t3,    0x1e
565*c0909341SAndroid Build Coastguard Worker    st.d         t1,    a0,    0x10 // store dif
566*c0909341SAndroid Build Coastguard Worker    st.w         t2,    a0,    0x1c // store cnt
567*c0909341SAndroid Build Coastguard Worker    srli.w       a0,    t3,    0x01 // tok
568*c0909341SAndroid Build Coastguard Worker    addi.d       sp,    sp,    0x1a
569*c0909341SAndroid Build Coastguard Worker.endm
570*c0909341SAndroid Build Coastguard Worker
571*c0909341SAndroid Build Coastguard Worker/**
572*c0909341SAndroid Build Coastguard Worker * @param unsigned dav1d_msac_decode_hi_tok_c(MsacContext *const s, uint16_t *const cdf)
573*c0909341SAndroid Build Coastguard Worker * * Reg Alloction
574*c0909341SAndroid Build Coastguard Worker * * vr0: cdf;
575*c0909341SAndroid Build Coastguard Worker * * vr1: temp;
576*c0909341SAndroid Build Coastguard Worker * * vr2: rng;
577*c0909341SAndroid Build Coastguard Worker * * vr3: dif;
578*c0909341SAndroid Build Coastguard Worker * * vr4: const 0xff00ff00...ff00ff00;
579*c0909341SAndroid Build Coastguard Worker * * vr5: const 0x0004080c;
580*c0909341SAndroid Build Coastguard Worker * * vr6: const 0;
581*c0909341SAndroid Build Coastguard Worker * * t0: allow_update_cdf, tmp;
582*c0909341SAndroid Build Coastguard Worker * * t1: dif;
583*c0909341SAndroid Build Coastguard Worker * * t2: cnt;
584*c0909341SAndroid Build Coastguard Worker * * t3: 0xffffffe8, outermost control parameter;
585*c0909341SAndroid Build Coastguard Worker * * t4: loop time
586*c0909341SAndroid Build Coastguard Worker * * t5: v, buf_pos, temp;
587*c0909341SAndroid Build Coastguard Worker * * t6: u, rng, end_pos, buf, temp;
588*c0909341SAndroid Build Coastguard Worker * * t7: temp;
589*c0909341SAndroid Build Coastguard Worker */
590*c0909341SAndroid Build Coastguard Workerfunction msac_decode_hi_tok_lsx
591*c0909341SAndroid Build Coastguard Worker    fld.d     f0,    a1,   0    // Load cdf[0]~cdf[3]
592*c0909341SAndroid Build Coastguard Worker    vldrepl.h vr2,   a0,   0x18 //  8 x rng, assert(rng <= 65535U), only the lower 16 bits are valid
593*c0909341SAndroid Build Coastguard Worker    vldrepl.h vr3,   a0,   0x16 // broadcast the high 16 bits of dif, c = s->dif >> (EC_WIN_SIZE - 16)
594*c0909341SAndroid Build Coastguard Worker    ld.w      t0,    a0,   0x20 // allow_update_cdf
595*c0909341SAndroid Build Coastguard Worker    la.local  t7,    ph_0xff00
596*c0909341SAndroid Build Coastguard Worker    vld       vr4,   t7,   0x00 // 0xff00ff00...ff00ff00
597*c0909341SAndroid Build Coastguard Worker    la.local  t7,    min_prob
598*c0909341SAndroid Build Coastguard Worker    vld       vr5,   t7,   12 * 2 // 0x0004080c
599*c0909341SAndroid Build Coastguard Worker    vxor.v    vr6,   vr6,  vr6    // const 0
600*c0909341SAndroid Build Coastguard Worker    ld.d      t1,    a0,   0x10   // dif
601*c0909341SAndroid Build Coastguard Worker    ld.w      t2,    a0,   0x1c   // cnt
602*c0909341SAndroid Build Coastguard Worker    orn       t3,    t3,   t3
603*c0909341SAndroid Build Coastguard Worker    srli.d    t3,    t3,   32
604*c0909341SAndroid Build Coastguard Worker    addi.d    t3,    t3,  -0x17 // 0xffffffe8
605*c0909341SAndroid Build Coastguard Worker    vseq.h    vr8,   vr8,  vr8
606*c0909341SAndroid Build Coastguard Worker    addi.d    sp,    sp,  -0x1a // alloc stack
607*c0909341SAndroid Build Coastguard Worker    beqz      t0,    .hi_tok_lsx_no_update_cdf
608*c0909341SAndroid Build Coastguard Worker    HI_TOK 1
609*c0909341SAndroid Build Coastguard Worker    jirl      zero,  ra,   0x0
610*c0909341SAndroid Build Coastguard Worker.hi_tok_lsx_no_update_cdf:
611*c0909341SAndroid Build Coastguard Worker    HI_TOK 0
612*c0909341SAndroid Build Coastguard Workerendfunc
613