xref: /aosp_15_r20/external/libdav1d/src/loongarch/refmvs.S (revision c09093415860a1c2373dacd84c4fde00c507cdfd)
1*c0909341SAndroid Build Coastguard Worker/*
2*c0909341SAndroid Build Coastguard Worker * Copyright © 2023, VideoLAN and dav1d authors
3*c0909341SAndroid Build Coastguard Worker * Copyright © 2023, Loongson Technology Corporation Limited
4*c0909341SAndroid Build Coastguard Worker * All rights reserved.
5*c0909341SAndroid Build Coastguard Worker *
6*c0909341SAndroid Build Coastguard Worker * Redistribution and use in source and binary forms, with or without
7*c0909341SAndroid Build Coastguard Worker * modification, are permitted provided that the following conditions are met:
8*c0909341SAndroid Build Coastguard Worker *
9*c0909341SAndroid Build Coastguard Worker * 1. Redistributions of source code must retain the above copyright notice, this
10*c0909341SAndroid Build Coastguard Worker *    list of conditions and the following disclaimer.
11*c0909341SAndroid Build Coastguard Worker *
12*c0909341SAndroid Build Coastguard Worker * 2. Redistributions in binary form must reproduce the above copyright notice,
13*c0909341SAndroid Build Coastguard Worker *    this list of conditions and the following disclaimer in the documentation
14*c0909341SAndroid Build Coastguard Worker *    and/or other materials provided with the distribution.
15*c0909341SAndroid Build Coastguard Worker *
16*c0909341SAndroid Build Coastguard Worker * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
17*c0909341SAndroid Build Coastguard Worker * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
18*c0909341SAndroid Build Coastguard Worker * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
19*c0909341SAndroid Build Coastguard Worker * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
20*c0909341SAndroid Build Coastguard Worker * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
21*c0909341SAndroid Build Coastguard Worker * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
22*c0909341SAndroid Build Coastguard Worker * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
23*c0909341SAndroid Build Coastguard Worker * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
24*c0909341SAndroid Build Coastguard Worker * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
25*c0909341SAndroid Build Coastguard Worker * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26*c0909341SAndroid Build Coastguard Worker */
27*c0909341SAndroid Build Coastguard Worker
28*c0909341SAndroid Build Coastguard Worker#include "src/loongarch/loongson_asm.S"
29*c0909341SAndroid Build Coastguard Worker
30*c0909341SAndroid Build Coastguard Worker/*
31*c0909341SAndroid Build Coastguard Workerstatic void splat_mv_c(refmvs_block **rr, const refmvs_block *const rmv,
32*c0909341SAndroid Build Coastguard Worker                       const int bx4, const int bw4, int bh4)
33*c0909341SAndroid Build Coastguard Worker*/
34*c0909341SAndroid Build Coastguard Worker
35*c0909341SAndroid Build Coastguard Workerfunction splat_mv_lsx
36*c0909341SAndroid Build Coastguard Worker    vld           vr0,      a1,       0          // 0 1 ... 11 ...
37*c0909341SAndroid Build Coastguard Worker    clz.w         t4,       a3
38*c0909341SAndroid Build Coastguard Worker    vaddi.bu      vr1,      vr0,      0
39*c0909341SAndroid Build Coastguard Worker    addi.w        t4,       t4,       -26
40*c0909341SAndroid Build Coastguard Worker    vextrins.w    vr1,      vr0,      0x30       // 0 1 2 ... 11 0 1 2 3
41*c0909341SAndroid Build Coastguard Worker    la.local      t5,       .SPLAT_LSX_JRTABLE
42*c0909341SAndroid Build Coastguard Worker    vbsrl.v       vr2,      vr1,      4          // 4 5 6 7...11 0 1 2 3 0 0 0 0
43*c0909341SAndroid Build Coastguard Worker    alsl.d        t6,       t4,       t5,     1
44*c0909341SAndroid Build Coastguard Worker    vextrins.w    vr2,      vr0,      0x31       // 4 5 6 7...11 0 1 2 3 4 5 6 7
45*c0909341SAndroid Build Coastguard Worker    ld.h          t7,       t6,       0
46*c0909341SAndroid Build Coastguard Worker    vbsrl.v       vr3,      vr2,      4          // 8 9 10 11 0 1 2 3 4 5 6 7 0 0 0 0
47*c0909341SAndroid Build Coastguard Worker    add.d         t8,       t5,       t7
48*c0909341SAndroid Build Coastguard Worker    alsl.d        a2,       a2,       a2,     1
49*c0909341SAndroid Build Coastguard Worker    vextrins.w    vr3,      vr0,      0x32       // 8 9 10 11 0 1 2 3 4 5 6 7 8 9 10 11
50*c0909341SAndroid Build Coastguard Worker    slli.w        a2,       a2,       2
51*c0909341SAndroid Build Coastguard Worker    jirl          $r0,      t8,       0
52*c0909341SAndroid Build Coastguard Worker
53*c0909341SAndroid Build Coastguard Worker.SPLAT_LSX_JRTABLE:
54*c0909341SAndroid Build Coastguard Worker    .hword .SPLAT_W32_LSX - .SPLAT_LSX_JRTABLE
55*c0909341SAndroid Build Coastguard Worker    .hword .SPLAT_W16_LSX - .SPLAT_LSX_JRTABLE
56*c0909341SAndroid Build Coastguard Worker    .hword .SPLAT_W8_LSX  - .SPLAT_LSX_JRTABLE
57*c0909341SAndroid Build Coastguard Worker    .hword .SPLAT_W4_LSX  - .SPLAT_LSX_JRTABLE
58*c0909341SAndroid Build Coastguard Worker    .hword .SPLAT_W2_LSX  - .SPLAT_LSX_JRTABLE
59*c0909341SAndroid Build Coastguard Worker    .hword .SPLAT_W1_LSX  - .SPLAT_LSX_JRTABLE
60*c0909341SAndroid Build Coastguard Worker
61*c0909341SAndroid Build Coastguard Worker.SPLAT_W1_LSX:
62*c0909341SAndroid Build Coastguard Worker    ld.d          t3,       a0,       0
63*c0909341SAndroid Build Coastguard Worker    addi.d        a0,       a0,       8
64*c0909341SAndroid Build Coastguard Worker    addi.d        a4,       a4,       -1
65*c0909341SAndroid Build Coastguard Worker    add.d         t3,       t3,       a2
66*c0909341SAndroid Build Coastguard Worker
67*c0909341SAndroid Build Coastguard Worker    fst.d         f1,       t3,       0
68*c0909341SAndroid Build Coastguard Worker    fst.s         f3,       t3,       8
69*c0909341SAndroid Build Coastguard Worker    blt           zero,     a4,       .SPLAT_W1_LSX
70*c0909341SAndroid Build Coastguard Worker    b             .splat_end
71*c0909341SAndroid Build Coastguard Worker.SPLAT_W2_LSX:
72*c0909341SAndroid Build Coastguard Worker    ld.d          t3,       a0,       0
73*c0909341SAndroid Build Coastguard Worker    addi.d        a0,       a0,       8
74*c0909341SAndroid Build Coastguard Worker    addi.d        a4,       a4,       -1
75*c0909341SAndroid Build Coastguard Worker    add.d         t3,       t3,       a2
76*c0909341SAndroid Build Coastguard Worker
77*c0909341SAndroid Build Coastguard Worker    vst           vr1,      t3,       0
78*c0909341SAndroid Build Coastguard Worker    fst.d         f2,       t3,       16
79*c0909341SAndroid Build Coastguard Worker    blt           zero,     a4,       .SPLAT_W2_LSX
80*c0909341SAndroid Build Coastguard Worker    b             .splat_end
81*c0909341SAndroid Build Coastguard Worker
82*c0909341SAndroid Build Coastguard Worker.SPLAT_W4_LSX:
83*c0909341SAndroid Build Coastguard Worker    ld.d          t3,       a0,       0
84*c0909341SAndroid Build Coastguard Worker    addi.d        a0,       a0,       8
85*c0909341SAndroid Build Coastguard Worker    addi.d        a4,       a4,       -1
86*c0909341SAndroid Build Coastguard Worker    add.d         t3,       t3,       a2
87*c0909341SAndroid Build Coastguard Worker
88*c0909341SAndroid Build Coastguard Worker    vst           vr1,      t3,       0
89*c0909341SAndroid Build Coastguard Worker    vst           vr2,      t3,       16
90*c0909341SAndroid Build Coastguard Worker    vst           vr3,      t3,       32
91*c0909341SAndroid Build Coastguard Worker    blt           zero,     a4,       .SPLAT_W4_LSX
92*c0909341SAndroid Build Coastguard Worker    b             .splat_end
93*c0909341SAndroid Build Coastguard Worker
94*c0909341SAndroid Build Coastguard Worker.SPLAT_W8_LSX:
95*c0909341SAndroid Build Coastguard Worker    ld.d          t3,       a0,       0
96*c0909341SAndroid Build Coastguard Worker    addi.d        a0,       a0,       8
97*c0909341SAndroid Build Coastguard Worker    addi.d        a4,       a4,       -1
98*c0909341SAndroid Build Coastguard Worker    add.d         t3,       t3,       a2
99*c0909341SAndroid Build Coastguard Worker
100*c0909341SAndroid Build Coastguard Worker    vst           vr1,      t3,       0
101*c0909341SAndroid Build Coastguard Worker    vst           vr2,      t3,       16
102*c0909341SAndroid Build Coastguard Worker    vst           vr3,      t3,       32
103*c0909341SAndroid Build Coastguard Worker
104*c0909341SAndroid Build Coastguard Worker    vst           vr1,      t3,       48
105*c0909341SAndroid Build Coastguard Worker    vst           vr2,      t3,       64
106*c0909341SAndroid Build Coastguard Worker    vst           vr3,      t3,       80
107*c0909341SAndroid Build Coastguard Worker    blt           zero,     a4,       .SPLAT_W8_LSX
108*c0909341SAndroid Build Coastguard Worker    b             .splat_end
109*c0909341SAndroid Build Coastguard Worker
110*c0909341SAndroid Build Coastguard Worker.SPLAT_W16_LSX:
111*c0909341SAndroid Build Coastguard Worker    ld.d          t3,       a0,       0
112*c0909341SAndroid Build Coastguard Worker    addi.d        a0,       a0,       8
113*c0909341SAndroid Build Coastguard Worker    addi.d        a4,       a4,       -1
114*c0909341SAndroid Build Coastguard Worker    add.d         t3,       t3,       a2
115*c0909341SAndroid Build Coastguard Worker
116*c0909341SAndroid Build Coastguard Worker.rept 2
117*c0909341SAndroid Build Coastguard Worker    vst           vr1,      t3,       0
118*c0909341SAndroid Build Coastguard Worker    vst           vr2,      t3,       16
119*c0909341SAndroid Build Coastguard Worker    vst           vr3,      t3,       32
120*c0909341SAndroid Build Coastguard Worker
121*c0909341SAndroid Build Coastguard Worker    vst           vr1,      t3,       48
122*c0909341SAndroid Build Coastguard Worker    vst           vr2,      t3,       64
123*c0909341SAndroid Build Coastguard Worker    vst           vr3,      t3,       80
124*c0909341SAndroid Build Coastguard Worker
125*c0909341SAndroid Build Coastguard Worker    addi.d        t3,       t3,       96
126*c0909341SAndroid Build Coastguard Worker.endr
127*c0909341SAndroid Build Coastguard Worker
128*c0909341SAndroid Build Coastguard Worker    blt           zero,     a4,       .SPLAT_W16_LSX
129*c0909341SAndroid Build Coastguard Worker    b             .splat_end
130*c0909341SAndroid Build Coastguard Worker
131*c0909341SAndroid Build Coastguard Worker.SPLAT_W32_LSX:
132*c0909341SAndroid Build Coastguard Worker    ld.d          t3,       a0,       0
133*c0909341SAndroid Build Coastguard Worker    addi.d        a0,       a0,       8
134*c0909341SAndroid Build Coastguard Worker    addi.d        a4,       a4,       -1
135*c0909341SAndroid Build Coastguard Worker    add.d         t3,       t3,       a2
136*c0909341SAndroid Build Coastguard Worker
137*c0909341SAndroid Build Coastguard Worker.rept 4
138*c0909341SAndroid Build Coastguard Worker    vst           vr1,      t3,       0
139*c0909341SAndroid Build Coastguard Worker    vst           vr2,      t3,       16
140*c0909341SAndroid Build Coastguard Worker    vst           vr3,      t3,       32
141*c0909341SAndroid Build Coastguard Worker
142*c0909341SAndroid Build Coastguard Worker    vst           vr1,      t3,       48
143*c0909341SAndroid Build Coastguard Worker    vst           vr2,      t3,       64
144*c0909341SAndroid Build Coastguard Worker    vst           vr3,      t3,       80
145*c0909341SAndroid Build Coastguard Worker
146*c0909341SAndroid Build Coastguard Worker    addi.d        t3,       t3,       96
147*c0909341SAndroid Build Coastguard Worker.endr
148*c0909341SAndroid Build Coastguard Worker
149*c0909341SAndroid Build Coastguard Worker    blt           zero,     a4,       .SPLAT_W32_LSX
150*c0909341SAndroid Build Coastguard Worker
151*c0909341SAndroid Build Coastguard Worker.splat_end:
152*c0909341SAndroid Build Coastguard Workerendfunc
153*c0909341SAndroid Build Coastguard Worker
154*c0909341SAndroid Build Coastguard Workerconst la_div_mult
155*c0909341SAndroid Build Coastguard Worker.short    0, 16384, 8192, 5461, 4096, 3276, 2730, 2340
156*c0909341SAndroid Build Coastguard Worker.short 2048,  1820, 1638, 1489, 1365, 1260, 1170, 1092
157*c0909341SAndroid Build Coastguard Worker.short 1024,   963,  910,  862,  819,  780,  744,  712
158*c0909341SAndroid Build Coastguard Worker.short  682,   655,  630,  606,  585,  564,  546,  528
159*c0909341SAndroid Build Coastguard Workerendconst
160*c0909341SAndroid Build Coastguard Worker
161*c0909341SAndroid Build Coastguard Worker/*
162*c0909341SAndroid Build Coastguard Worker *  temp reg: a6 a7
163*c0909341SAndroid Build Coastguard Worker */
164*c0909341SAndroid Build Coastguard Worker.macro LOAD_SET_LOOP is_odd
165*c0909341SAndroid Build Coastguard Worker    slli.d          a6,      t6,     2
166*c0909341SAndroid Build Coastguard Worker    add.d           a6,      a6,     t6  // col_w * 5
167*c0909341SAndroid Build Coastguard Worker0:
168*c0909341SAndroid Build Coastguard Worker    addi.d          a7,      zero,   0   // x
169*c0909341SAndroid Build Coastguard Worker.if \is_odd
170*c0909341SAndroid Build Coastguard Worker    stx.w           t7,      t3,     a7
171*c0909341SAndroid Build Coastguard Worker    addi.d          a7,      a7,     5
172*c0909341SAndroid Build Coastguard Worker    bge             a7,      a6,     2f
173*c0909341SAndroid Build Coastguard Worker.endif
174*c0909341SAndroid Build Coastguard Worker
175*c0909341SAndroid Build Coastguard Worker1:
176*c0909341SAndroid Build Coastguard Worker    stx.w           t7,      t3,     a7
177*c0909341SAndroid Build Coastguard Worker    addi.d          a7,      a7,     5
178*c0909341SAndroid Build Coastguard Worker    stx.w           t7,      t3,     a7
179*c0909341SAndroid Build Coastguard Worker    addi.d          a7,      a7,     5
180*c0909341SAndroid Build Coastguard Worker    blt             a7,      a6,     1b
181*c0909341SAndroid Build Coastguard Worker2:
182*c0909341SAndroid Build Coastguard Worker    add.d           t3,      t3,     t2
183*c0909341SAndroid Build Coastguard Worker    addi.d          t5,      t5,     1
184*c0909341SAndroid Build Coastguard Worker    blt             t5,      a5,     0b
185*c0909341SAndroid Build Coastguard Worker.endm
186*c0909341SAndroid Build Coastguard Worker
187*c0909341SAndroid Build Coastguard Worker/*
188*c0909341SAndroid Build Coastguard Worker * static void load_tmvs_c(const refmvs_frame *const rf, int tile_row_idx,
189*c0909341SAndroid Build Coastguard Worker *                         const int col_start8, const int col_end8,
190*c0909341SAndroid Build Coastguard Worker *                         const int row_start8, int row_end8)
191*c0909341SAndroid Build Coastguard Worker */
192*c0909341SAndroid Build Coastguard Workerfunction load_tmvs_lsx
193*c0909341SAndroid Build Coastguard Worker    addi.d         sp,      sp,       -80
194*c0909341SAndroid Build Coastguard Worker    st.d           s0,      sp,       0
195*c0909341SAndroid Build Coastguard Worker    st.d           s1,      sp,       8
196*c0909341SAndroid Build Coastguard Worker    st.d           s2,      sp,       16
197*c0909341SAndroid Build Coastguard Worker    st.d           s3,      sp,       24
198*c0909341SAndroid Build Coastguard Worker    st.d           s4,      sp,       32
199*c0909341SAndroid Build Coastguard Worker    st.d           s5,      sp,       40
200*c0909341SAndroid Build Coastguard Worker    st.d           s6,      sp,       48
201*c0909341SAndroid Build Coastguard Worker    st.d           s7,      sp,       56
202*c0909341SAndroid Build Coastguard Worker    st.d           s8,      sp,       64
203*c0909341SAndroid Build Coastguard Worker
204*c0909341SAndroid Build Coastguard Worker    vld           vr16,     a0,       16
205*c0909341SAndroid Build Coastguard Worker    vld           vr0,      a0,       52    // rf->mfmv_ref
206*c0909341SAndroid Build Coastguard Worker    ld.w          s8,       a0,       152   // [0] - rf->n_mfmvs
207*c0909341SAndroid Build Coastguard Worker    vld           vr17,     a0,       168   // [0] - rp_ref| [1]- rp_proj
208*c0909341SAndroid Build Coastguard Worker    ld.d          t1,       a0,       184   // stride
209*c0909341SAndroid Build Coastguard Worker    ld.w          t0,       a0,       200
210*c0909341SAndroid Build Coastguard Worker    addi.w        t0,       t0,       -1
211*c0909341SAndroid Build Coastguard Worker    bnez          t0,       1f
212*c0909341SAndroid Build Coastguard Worker    addi.w        a1,       zero,     0
213*c0909341SAndroid Build Coastguard Worker1:
214*c0909341SAndroid Build Coastguard Worker    addi.d        t0,       a3,       8
215*c0909341SAndroid Build Coastguard Worker    vinsgr2vr.w   vr1,      t0,       0
216*c0909341SAndroid Build Coastguard Worker    vinsgr2vr.w   vr1,      a5,       1
217*c0909341SAndroid Build Coastguard Worker    vmin.w        vr1,      vr1,      vr16  // [0] col_end8i [1] row_end8
218*c0909341SAndroid Build Coastguard Worker    addi.d        t0,       a2,       -8
219*c0909341SAndroid Build Coastguard Worker    bge           t0,       zero,     2f
220*c0909341SAndroid Build Coastguard Worker    addi.w        t0,       zero,     0     // t0 col_start8i
221*c0909341SAndroid Build Coastguard Worker2:
222*c0909341SAndroid Build Coastguard Worker    vpickve2gr.d  t4,       vr17,     1     // rf->rp_proj
223*c0909341SAndroid Build Coastguard Worker    slli.d        t2,       t1,       2
224*c0909341SAndroid Build Coastguard Worker    add.d         t2,       t2,       t1    // stride * 5
225*c0909341SAndroid Build Coastguard Worker    slli.d        a1,       a1,       4     // tile_row_idx * 16
226*c0909341SAndroid Build Coastguard Worker    andi          t3,       a4,       0xf
227*c0909341SAndroid Build Coastguard Worker    add.d         t3,       t3,       a1    // tile_row_idx * 16 + row_start8 & 15
228*c0909341SAndroid Build Coastguard Worker    mul.w         t3,       t3,       t2
229*c0909341SAndroid Build Coastguard Worker    mul.w         t8,       a1,       t2
230*c0909341SAndroid Build Coastguard Worker    vpickve2gr.w  a5,       vr1,      1
231*c0909341SAndroid Build Coastguard Worker    addi.d        t5,       a4,       0
232*c0909341SAndroid Build Coastguard Worker    sub.d         t6,       a3,       a2     // col_end8 - col_start8
233*c0909341SAndroid Build Coastguard Worker    li.w          t7,       0x80008000
234*c0909341SAndroid Build Coastguard Worker    slli.d        a7,       a2,       2
235*c0909341SAndroid Build Coastguard Worker    add.d         t3,       t3,       a2
236*c0909341SAndroid Build Coastguard Worker    add.d         t3,       t3,       a7
237*c0909341SAndroid Build Coastguard Worker    add.d         t3,       t3,       t4     // rp_proj
238*c0909341SAndroid Build Coastguard Worker    andi          a6,       t6,       1
239*c0909341SAndroid Build Coastguard Worker    bnez          a6,       3f
240*c0909341SAndroid Build Coastguard Worker    LOAD_SET_LOOP 0
241*c0909341SAndroid Build Coastguard Worker    b             4f
242*c0909341SAndroid Build Coastguard Worker3:
243*c0909341SAndroid Build Coastguard Worker    LOAD_SET_LOOP 1
244*c0909341SAndroid Build Coastguard Worker4:
245*c0909341SAndroid Build Coastguard Worker    addi.d        a6,       zero,     0      // n
246*c0909341SAndroid Build Coastguard Worker    bge           a6,       s8,       .end_load
247*c0909341SAndroid Build Coastguard Worker    add.d         t3,       t8,       t4     // rp_proj
248*c0909341SAndroid Build Coastguard Worker    mul.w         t6,       a4,       t2
249*c0909341SAndroid Build Coastguard Worker    addi.d        s7,       zero,     40
250*c0909341SAndroid Build Coastguard Worker    vpickve2gr.w  t1,       vr1,      0      // col_end8i
251*c0909341SAndroid Build Coastguard Worker    vbsrl.v       vr2,      vr0,      4      // rf->mfmv_ref2cur
252*c0909341SAndroid Build Coastguard Worker    addi.d        t5,       a0,       64     // rf->mfmv_ref2ref
253*c0909341SAndroid Build Coastguard Worker    la.local      t8,       la_div_mult
254*c0909341SAndroid Build Coastguard Worker    vld           vr6,      t8,       0
255*c0909341SAndroid Build Coastguard Worker    vld           vr7,      t8,       16
256*c0909341SAndroid Build Coastguard Worker    vld           vr8,      t8,       32
257*c0909341SAndroid Build Coastguard Worker    vld           vr9,      t8,       48
258*c0909341SAndroid Build Coastguard Worker    li.w          t8,       0x3fff
259*c0909341SAndroid Build Coastguard Worker    vreplgr2vr.h  vr21,     t8
260*c0909341SAndroid Build Coastguard Worker    vxor.v        vr18,     vr18,     vr18   // zero
261*c0909341SAndroid Build Coastguard Worker    vsub.h        vr20,     vr18,     vr21
262*c0909341SAndroid Build Coastguard Worker    vpickev.b     vr12,     vr7,      vr6
263*c0909341SAndroid Build Coastguard Worker    vpickod.b     vr13,     vr7,      vr6
264*c0909341SAndroid Build Coastguard Worker    vpickev.b     vr14,     vr9,      vr8
265*c0909341SAndroid Build Coastguard Worker    vpickod.b     vr15,     vr9,      vr8
266*c0909341SAndroid Build Coastguard Worker    vpickve2gr.d  s6,       vr17,     0       // rf->rp_ref
267*c0909341SAndroid Build Coastguard Worker5:
268*c0909341SAndroid Build Coastguard Worker    vld           vr10,     t5,       0
269*c0909341SAndroid Build Coastguard Worker    vld           vr11,     t5,       16
270*c0909341SAndroid Build Coastguard Worker    vpickev.h     vr10,     vr11,     vr10
271*c0909341SAndroid Build Coastguard Worker    vpickev.b     vr10,     vr11,     vr10    // [1...7]
272*c0909341SAndroid Build Coastguard Worker
273*c0909341SAndroid Build Coastguard Worker    vbsrl.v       vr0,      vr0,      1
274*c0909341SAndroid Build Coastguard Worker    vpickve2gr.wu t8,       vr2,      0       // ref2cur
275*c0909341SAndroid Build Coastguard Worker    vbsrl.v       vr2,      vr2,      4
276*c0909341SAndroid Build Coastguard Worker    srli.d        t4,       t8,       24
277*c0909341SAndroid Build Coastguard Worker    xori          t4,       t4,       0x80
278*c0909341SAndroid Build Coastguard Worker    beqz          t4,       8f
279*c0909341SAndroid Build Coastguard Worker
280*c0909341SAndroid Build Coastguard Worker    vreplgr2vr.h  vr23,     t8
281*c0909341SAndroid Build Coastguard Worker    vshuf.b       vr6,      vr14,     vr12,    vr10
282*c0909341SAndroid Build Coastguard Worker    vshuf.b       vr7,      vr15,     vr13,    vr10
283*c0909341SAndroid Build Coastguard Worker    vilvl.b       vr8,      vr7,      vr6
284*c0909341SAndroid Build Coastguard Worker    vmulwev.w.h   vr6,      vr8,      vr23
285*c0909341SAndroid Build Coastguard Worker    vmulwod.w.h   vr7,      vr8,      vr23
286*c0909341SAndroid Build Coastguard Worker
287*c0909341SAndroid Build Coastguard Worker    vpickve2gr.b  s0,       vr0,      0       // ref
288*c0909341SAndroid Build Coastguard Worker    slli.d        t8,       s0,       3
289*c0909341SAndroid Build Coastguard Worker    ldx.d         s1,       s6,       t8      // rf->rp_ref[ref]
290*c0909341SAndroid Build Coastguard Worker    addi.d        s0,       s0,       -4      // ref_sign
291*c0909341SAndroid Build Coastguard Worker    vreplgr2vr.h  vr19,     s0
292*c0909341SAndroid Build Coastguard Worker    add.d         s1,       s1,       t6      // &rf->rp_ref[ref][row_start8 * stride]
293*c0909341SAndroid Build Coastguard Worker    addi.d        s2,       a4,       0       // y
294*c0909341SAndroid Build Coastguard Worker    vilvl.w       vr8,      vr7,      vr6
295*c0909341SAndroid Build Coastguard Worker    vilvh.w       vr9,      vr7,      vr6
296*c0909341SAndroid Build Coastguard Worker6:                                            // for (int y = row_start8;
297*c0909341SAndroid Build Coastguard Worker    andi          s3,       s2,       0xff8
298*c0909341SAndroid Build Coastguard Worker
299*c0909341SAndroid Build Coastguard Worker    addi.d        s4,       s3,       8
300*c0909341SAndroid Build Coastguard Worker    blt           a4,       s3,       0f
301*c0909341SAndroid Build Coastguard Worker    addi.d        s3,       a4,       0        // y_proj_start
302*c0909341SAndroid Build Coastguard Worker0:
303*c0909341SAndroid Build Coastguard Worker    blt           s4,       a5,       0f
304*c0909341SAndroid Build Coastguard Worker    addi.d        s4,       a5,       0        // y_proj_end
305*c0909341SAndroid Build Coastguard Worker0:
306*c0909341SAndroid Build Coastguard Worker    addi.d        s5,       t0,       0        // x
307*c0909341SAndroid Build Coastguard Worker7:                                             // for (int x = col_start8i;
308*c0909341SAndroid Build Coastguard Worker    slli.d        a7,       s5,       2
309*c0909341SAndroid Build Coastguard Worker    add.d         a7,       a7,       s5
310*c0909341SAndroid Build Coastguard Worker    add.d         a7,       s1,       a7      // rb
311*c0909341SAndroid Build Coastguard Worker    vld           vr3,      a7,       0       // [rb]
312*c0909341SAndroid Build Coastguard Worker    vpickve2gr.b  t4,       vr3,      4       // b_ref
313*c0909341SAndroid Build Coastguard Worker    beqz          t4,       .end_x
314*c0909341SAndroid Build Coastguard Worker    vreplve.b     vr11,     vr10,     t4
315*c0909341SAndroid Build Coastguard Worker    vpickve2gr.b  t7,       vr11,     4       // ref2ref
316*c0909341SAndroid Build Coastguard Worker    beqz          t7,       .end_x
317*c0909341SAndroid Build Coastguard Worker    vsllwil.w.h   vr4,      vr3,      0
318*c0909341SAndroid Build Coastguard Worker    vreplgr2vr.w  vr6,      t4
319*c0909341SAndroid Build Coastguard Worker    vshuf.w       vr6,      vr9,      vr8      // frac
320*c0909341SAndroid Build Coastguard Worker    vmul.w        vr5,      vr6,      vr4
321*c0909341SAndroid Build Coastguard Worker    vsrai.w       vr4,      vr5,      31
322*c0909341SAndroid Build Coastguard Worker    vadd.w        vr4,      vr4,      vr5
323*c0909341SAndroid Build Coastguard Worker    vssrarni.h.w  vr4,      vr4,      14
324*c0909341SAndroid Build Coastguard Worker    vclip.h       vr4,      vr4,      vr20,    vr21  // offset
325*c0909341SAndroid Build Coastguard Worker    vxor.v        vr5,      vr4,      vr19    // offset.x ^ ref_sign
326*c0909341SAndroid Build Coastguard Worker    vori.b        vr5,      vr5,      0x1     // offset.x ^ ref_sign
327*c0909341SAndroid Build Coastguard Worker    vabsd.h       vr4,      vr4,      vr18
328*c0909341SAndroid Build Coastguard Worker    vsrli.h       vr4,      vr4,      6       // abs(offset.x) >> 6
329*c0909341SAndroid Build Coastguard Worker    vsigncov.h    vr4,      vr5,      vr4     // apply_sign
330*c0909341SAndroid Build Coastguard Worker    vpickve2gr.h  s0,       vr4,      0
331*c0909341SAndroid Build Coastguard Worker    add.d         s0,       s2,       s0      // pos_y
332*c0909341SAndroid Build Coastguard Worker    blt           s0,       s3,       .n_posy
333*c0909341SAndroid Build Coastguard Worker    bge           s0,       s4,       .n_posy
334*c0909341SAndroid Build Coastguard Worker    andi          s0,       s0,       0xf
335*c0909341SAndroid Build Coastguard Worker    mul.w         s0,       s0,       t2      // pos
336*c0909341SAndroid Build Coastguard Worker    vpickve2gr.h  t7,       vr4,      1
337*c0909341SAndroid Build Coastguard Worker    add.d         t7,       t7,       s5      // pos_x
338*c0909341SAndroid Build Coastguard Worker    add.d         s0,       t3,       s0      // rp_proj + pos
339*c0909341SAndroid Build Coastguard Worker
340*c0909341SAndroid Build Coastguard Worker.loop_posx:
341*c0909341SAndroid Build Coastguard Worker    andi          t4,       s5,       0xff8 // x_sb_align
342*c0909341SAndroid Build Coastguard Worker
343*c0909341SAndroid Build Coastguard Worker    blt           t7,       a2,       .n_posx
344*c0909341SAndroid Build Coastguard Worker    addi.d        t8,       t4,       -8
345*c0909341SAndroid Build Coastguard Worker    blt           t7,       t8,       .n_posx
346*c0909341SAndroid Build Coastguard Worker
347*c0909341SAndroid Build Coastguard Worker    bge           t7,       a3,       .n_posx
348*c0909341SAndroid Build Coastguard Worker    addi.d        t4,       t4,       16
349*c0909341SAndroid Build Coastguard Worker    bge           t7,       t4,       .n_posx
350*c0909341SAndroid Build Coastguard Worker
351*c0909341SAndroid Build Coastguard Worker    slli.d        t4,       t7,       2
352*c0909341SAndroid Build Coastguard Worker    add.d         t4,       t4,       t7      // pos_x * 5
353*c0909341SAndroid Build Coastguard Worker    add.d         t4,       s0,       t4      // rp_proj[pos + pos_x]
354*c0909341SAndroid Build Coastguard Worker    vstelm.w      vr3,      t4,       0,   0
355*c0909341SAndroid Build Coastguard Worker    vstelm.b      vr11,     t4,       4,   4
356*c0909341SAndroid Build Coastguard Worker
357*c0909341SAndroid Build Coastguard Worker.n_posx:
358*c0909341SAndroid Build Coastguard Worker    addi.d        s5,       s5,       1       // x + 1
359*c0909341SAndroid Build Coastguard Worker    bge           s5,       t1,       .ret_posx
360*c0909341SAndroid Build Coastguard Worker    addi.d        a7,       a7,       5       // rb + 1
361*c0909341SAndroid Build Coastguard Worker    vld           vr4,      a7,       0       // [rb]
362*c0909341SAndroid Build Coastguard Worker    vseq.b        vr5,      vr4,      vr3
363*c0909341SAndroid Build Coastguard Worker
364*c0909341SAndroid Build Coastguard Worker    vpickve2gr.d  t8,       vr5,      0
365*c0909341SAndroid Build Coastguard Worker    cto.d         t8,       t8
366*c0909341SAndroid Build Coastguard Worker    blt           t8,       s7,       7b
367*c0909341SAndroid Build Coastguard Worker
368*c0909341SAndroid Build Coastguard Worker    addi.d        t7,       t7,       1       // pos_x + 1
369*c0909341SAndroid Build Coastguard Worker
370*c0909341SAndroid Build Coastguard Worker    /*  Core computing loop expansion(sencond)  */
371*c0909341SAndroid Build Coastguard Worker    andi          t4,       s5,       0xff8 // x_sb_align
372*c0909341SAndroid Build Coastguard Worker
373*c0909341SAndroid Build Coastguard Worker    blt           t7,       a2,       .n_posx
374*c0909341SAndroid Build Coastguard Worker    addi.d        t8,       t4,       -8
375*c0909341SAndroid Build Coastguard Worker    blt           t7,       t8,       .n_posx
376*c0909341SAndroid Build Coastguard Worker
377*c0909341SAndroid Build Coastguard Worker    bge           t7,       a3,       .n_posx
378*c0909341SAndroid Build Coastguard Worker    addi.d        t4,       t4,       16
379*c0909341SAndroid Build Coastguard Worker    bge           t7,       t4,       .n_posx
380*c0909341SAndroid Build Coastguard Worker
381*c0909341SAndroid Build Coastguard Worker    slli.d        t4,       t7,       2
382*c0909341SAndroid Build Coastguard Worker    add.d         t4,       t4,       t7      // pos_x * 5
383*c0909341SAndroid Build Coastguard Worker    add.d         t4,       s0,       t4      // rp_proj[pos + pos_x]
384*c0909341SAndroid Build Coastguard Worker    vstelm.w      vr3,      t4,       0,   0
385*c0909341SAndroid Build Coastguard Worker    vstelm.b      vr11,     t4,       4,   4
386*c0909341SAndroid Build Coastguard Worker
387*c0909341SAndroid Build Coastguard Worker    addi.d        s5,       s5,       1       // x + 1
388*c0909341SAndroid Build Coastguard Worker    bge           s5,       t1,       .ret_posx
389*c0909341SAndroid Build Coastguard Worker    addi.d        a7,       a7,       5       // rb + 1
390*c0909341SAndroid Build Coastguard Worker    vld           vr4,      a7,       0       // [rb]
391*c0909341SAndroid Build Coastguard Worker    vseq.b        vr5,      vr4,      vr3
392*c0909341SAndroid Build Coastguard Worker
393*c0909341SAndroid Build Coastguard Worker    vpickve2gr.d  t8,       vr5,      0
394*c0909341SAndroid Build Coastguard Worker    cto.d         t8,       t8
395*c0909341SAndroid Build Coastguard Worker    blt           t8,       s7,       7b
396*c0909341SAndroid Build Coastguard Worker
397*c0909341SAndroid Build Coastguard Worker    addi.d        t7,       t7,       1       // pos_x + 1
398*c0909341SAndroid Build Coastguard Worker
399*c0909341SAndroid Build Coastguard Worker    /*  Core computing loop expansion(third)  */
400*c0909341SAndroid Build Coastguard Worker    andi          t4,       s5,       0xff8 // x_sb_align
401*c0909341SAndroid Build Coastguard Worker
402*c0909341SAndroid Build Coastguard Worker    blt           t7,       a2,       .n_posx
403*c0909341SAndroid Build Coastguard Worker    addi.d        t8,       t4,       -8
404*c0909341SAndroid Build Coastguard Worker    blt           t7,       t8,       .n_posx
405*c0909341SAndroid Build Coastguard Worker
406*c0909341SAndroid Build Coastguard Worker    bge           t7,       a3,       .n_posx
407*c0909341SAndroid Build Coastguard Worker    addi.d        t4,       t4,       16
408*c0909341SAndroid Build Coastguard Worker    bge           t7,       t4,       .n_posx
409*c0909341SAndroid Build Coastguard Worker
410*c0909341SAndroid Build Coastguard Worker    slli.d        t4,       t7,       2
411*c0909341SAndroid Build Coastguard Worker    add.d         t4,       t4,       t7      // pos_x * 5
412*c0909341SAndroid Build Coastguard Worker    add.d         t4,       s0,       t4      // rp_proj[pos + pos_x]
413*c0909341SAndroid Build Coastguard Worker    vstelm.w      vr3,      t4,       0,   0
414*c0909341SAndroid Build Coastguard Worker    vstelm.b      vr11,     t4,       4,   4
415*c0909341SAndroid Build Coastguard Worker
416*c0909341SAndroid Build Coastguard Worker    addi.d        s5,       s5,       1       // x + 1
417*c0909341SAndroid Build Coastguard Worker    bge           s5,       t1,       .ret_posx
418*c0909341SAndroid Build Coastguard Worker    addi.d        a7,       a7,       5       // rb + 1
419*c0909341SAndroid Build Coastguard Worker    vld           vr4,      a7,       0       // [rb]
420*c0909341SAndroid Build Coastguard Worker    vseq.b        vr5,      vr4,      vr3
421*c0909341SAndroid Build Coastguard Worker
422*c0909341SAndroid Build Coastguard Worker    vpickve2gr.d  t8,       vr5,      0
423*c0909341SAndroid Build Coastguard Worker    cto.d         t8,       t8
424*c0909341SAndroid Build Coastguard Worker    blt           t8,       s7,       7b
425*c0909341SAndroid Build Coastguard Worker
426*c0909341SAndroid Build Coastguard Worker    addi.d        t7,       t7,       1       // pos_x + 1
427*c0909341SAndroid Build Coastguard Worker
428*c0909341SAndroid Build Coastguard Worker    b             .loop_posx
429*c0909341SAndroid Build Coastguard Worker
430*c0909341SAndroid Build Coastguard Worker.n_posy:
431*c0909341SAndroid Build Coastguard Worker    addi.d        s5,       s5,       1       // x + 1
432*c0909341SAndroid Build Coastguard Worker    bge           s5,       t1,       .ret_posx
433*c0909341SAndroid Build Coastguard Worker    addi.d        a7,       a7,       5       // rb + 1
434*c0909341SAndroid Build Coastguard Worker    vld           vr4,      a7,       0       // [rb]
435*c0909341SAndroid Build Coastguard Worker    vseq.b        vr5,      vr4,      vr3
436*c0909341SAndroid Build Coastguard Worker
437*c0909341SAndroid Build Coastguard Worker    vpickve2gr.d  t8,       vr5,      0
438*c0909341SAndroid Build Coastguard Worker    cto.d         t8,       t8
439*c0909341SAndroid Build Coastguard Worker    blt           t8,       s7,       7b
440*c0909341SAndroid Build Coastguard Worker
441*c0909341SAndroid Build Coastguard Worker    addi.d        s5,       s5,       1       // x + 1
442*c0909341SAndroid Build Coastguard Worker    bge           s5,       t1,       .ret_posx
443*c0909341SAndroid Build Coastguard Worker    addi.d        a7,       a7,       5       // rb + 1
444*c0909341SAndroid Build Coastguard Worker    vld           vr4,      a7,       0       // [rb]
445*c0909341SAndroid Build Coastguard Worker    vseq.b        vr5,      vr4,      vr3
446*c0909341SAndroid Build Coastguard Worker
447*c0909341SAndroid Build Coastguard Worker    vpickve2gr.d  t8,       vr5,      0
448*c0909341SAndroid Build Coastguard Worker    cto.d         t8,       t8
449*c0909341SAndroid Build Coastguard Worker    blt           t8,       s7,       7b
450*c0909341SAndroid Build Coastguard Worker
451*c0909341SAndroid Build Coastguard Worker    b             .n_posy
452*c0909341SAndroid Build Coastguard Worker
453*c0909341SAndroid Build Coastguard Worker.end_x:
454*c0909341SAndroid Build Coastguard Worker    addi.d        s5,       s5,       1       // x + 1
455*c0909341SAndroid Build Coastguard Worker    blt           s5,       t1,       7b
456*c0909341SAndroid Build Coastguard Worker
457*c0909341SAndroid Build Coastguard Worker.ret_posx:
458*c0909341SAndroid Build Coastguard Worker    add.d         s1,       s1,       t2      // r + stride
459*c0909341SAndroid Build Coastguard Worker    addi.d        s2,       s2,       1       // y + 1
460*c0909341SAndroid Build Coastguard Worker    blt           s2,       a5,       6b
461*c0909341SAndroid Build Coastguard Worker8:
462*c0909341SAndroid Build Coastguard Worker    addi.d        a6,       a6,       1       // n + 1
463*c0909341SAndroid Build Coastguard Worker    addi.d        t5,       t5,       28      // mfmv_ref2ref(offset) + 28
464*c0909341SAndroid Build Coastguard Worker    blt           a6,       s8,       5b
465*c0909341SAndroid Build Coastguard Worker
466*c0909341SAndroid Build Coastguard Worker.end_load:
467*c0909341SAndroid Build Coastguard Worker    ld.d           s0,      sp,       0
468*c0909341SAndroid Build Coastguard Worker    ld.d           s1,      sp,       8
469*c0909341SAndroid Build Coastguard Worker    ld.d           s2,      sp,       16
470*c0909341SAndroid Build Coastguard Worker    ld.d           s3,      sp,       24
471*c0909341SAndroid Build Coastguard Worker    ld.d           s4,      sp,       32
472*c0909341SAndroid Build Coastguard Worker    ld.d           s5,      sp,       40
473*c0909341SAndroid Build Coastguard Worker    ld.d           s6,      sp,       48
474*c0909341SAndroid Build Coastguard Worker    ld.d           s7,      sp,       56
475*c0909341SAndroid Build Coastguard Worker    ld.d           s8,      sp,       64
476*c0909341SAndroid Build Coastguard Worker    addi.d         sp,      sp,       80
477*c0909341SAndroid Build Coastguard Workerendfunc
478*c0909341SAndroid Build Coastguard Worker
479*c0909341SAndroid Build Coastguard Workerconst mv_tbls
480*c0909341SAndroid Build Coastguard Worker    .byte           255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255
481*c0909341SAndroid Build Coastguard Worker    .byte           0, 1, 2, 3, 8, 0, 1, 2, 3, 8, 0, 1, 2, 3, 8, 0
482*c0909341SAndroid Build Coastguard Worker    .byte           4, 5, 6, 7, 9, 4, 5, 6, 7, 9, 4, 5, 6, 7, 9, 4
483*c0909341SAndroid Build Coastguard Worker    .byte           4, 5, 6, 7, 9, 4, 5, 6, 7, 9, 4, 5, 6, 7, 9, 4
484*c0909341SAndroid Build Coastguard Workerendconst
485*c0909341SAndroid Build Coastguard Worker
486*c0909341SAndroid Build Coastguard Workerconst mask_mult
487*c0909341SAndroid Build Coastguard Worker    .byte           1, 0, 2, 0, 1, 0, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0
488*c0909341SAndroid Build Coastguard Workerendconst
489*c0909341SAndroid Build Coastguard Worker
490*c0909341SAndroid Build Coastguard Workerconst mask_mv0
491*c0909341SAndroid Build Coastguard Worker    .byte           1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16
492*c0909341SAndroid Build Coastguard Workerendconst
493*c0909341SAndroid Build Coastguard Worker
494*c0909341SAndroid Build Coastguard Workerconst mask_mv1
495*c0909341SAndroid Build Coastguard Worker    .byte           4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19
496*c0909341SAndroid Build Coastguard Workerendconst
497*c0909341SAndroid Build Coastguard Worker
498*c0909341SAndroid Build Coastguard Worker// void dav1d_save_tmvs_lsx(refmvs_temporal_block *rp, ptrdiff_t stride,
499*c0909341SAndroid Build Coastguard Worker//                          refmvs_block **rr, const uint8_t *ref_sign,
500*c0909341SAndroid Build Coastguard Worker//                          int col_end8, int row_end8,
501*c0909341SAndroid Build Coastguard Worker//                          int col_start8, int row_start8)
502*c0909341SAndroid Build Coastguard Workerfunction save_tmvs_lsx
503*c0909341SAndroid Build Coastguard Worker    addi.d      sp,         sp,        -0x28
504*c0909341SAndroid Build Coastguard Worker    st.d        s0,         sp,         0x00
505*c0909341SAndroid Build Coastguard Worker    st.d        s1,         sp,         0x08
506*c0909341SAndroid Build Coastguard Worker    st.d        s2,         sp,         0x10
507*c0909341SAndroid Build Coastguard Worker    st.d        s3,         sp,         0x18
508*c0909341SAndroid Build Coastguard Worker    st.d        s4,         sp,         0x20
509*c0909341SAndroid Build Coastguard Worker    move        t0,         ra
510*c0909341SAndroid Build Coastguard Worker
511*c0909341SAndroid Build Coastguard Worker    vxor.v      vr10,       vr10,       vr10
512*c0909341SAndroid Build Coastguard Worker    vld         vr11,       a3,         0       // Load ref_sign[0] ~ Load ref_sign[7]
513*c0909341SAndroid Build Coastguard Worker    la.local    t2,         .save_tevs_tbl
514*c0909341SAndroid Build Coastguard Worker    la.local    s1,         mask_mult
515*c0909341SAndroid Build Coastguard Worker    la.local    t7,         mv_tbls
516*c0909341SAndroid Build Coastguard Worker    vld         vr9,        s1,         0       // Load mask_mult
517*c0909341SAndroid Build Coastguard Worker    vslli.d     vr11,       vr11,       8       // 0, ref_sign[0], ... ,ref_sign[6]
518*c0909341SAndroid Build Coastguard Worker    la.local    s3,         mask_mv0
519*c0909341SAndroid Build Coastguard Worker    vld         vr8,        s3,         0       // Load mask_mv0
520*c0909341SAndroid Build Coastguard Worker    la.local    s4,         mask_mv1
521*c0909341SAndroid Build Coastguard Worker    vld         vr7,        s4,         0       // Load mask_mv1
522*c0909341SAndroid Build Coastguard Worker    li.d        s0,         5
523*c0909341SAndroid Build Coastguard Worker    li.d        t8,         12 * 2
524*c0909341SAndroid Build Coastguard Worker    mul.d       a1,         a1,         s0     // stride *= 5
525*c0909341SAndroid Build Coastguard Worker    sub.d       a5,         a5,         a7      // h = row_end8 - row_start8
526*c0909341SAndroid Build Coastguard Worker    slli.d      a7,         a7,         1       // row_start8 <<= 1
527*c0909341SAndroid Build Coastguard Worker1:
528*c0909341SAndroid Build Coastguard Worker    li.d        s0,         5
529*c0909341SAndroid Build Coastguard Worker    andi        t3,         a7,         30      // (y & 15) * 2
530*c0909341SAndroid Build Coastguard Worker    slli.d      s4,         t3,         3
531*c0909341SAndroid Build Coastguard Worker    ldx.d       t3,         a2,         s4      // b = rr[(y & 15) * 2]
532*c0909341SAndroid Build Coastguard Worker    addi.d      t3,         t3,         12      // &b[... + 1]
533*c0909341SAndroid Build Coastguard Worker    mul.d       s4,         a4,         t8
534*c0909341SAndroid Build Coastguard Worker    add.d       t4,         s4,         t3      // end_cand_b = &b[col_end8*2 + 1]
535*c0909341SAndroid Build Coastguard Worker    mul.d       s3,         a6,         t8
536*c0909341SAndroid Build Coastguard Worker    add.d       t3,         s3,         t3      // cand_b = &b[x*2 + 1]
537*c0909341SAndroid Build Coastguard Worker    mul.d       s4,         a6,         s0
538*c0909341SAndroid Build Coastguard Worker    add.d       a3,         s4,         a0      // &rp[x]
539*c0909341SAndroid Build Coastguard Worker2:
540*c0909341SAndroid Build Coastguard Worker    /* First cand_b */
541*c0909341SAndroid Build Coastguard Worker    ld.b        t5,         t3,         10      // cand_b->bs
542*c0909341SAndroid Build Coastguard Worker    vld         vr0,        t3,         0       // cand_b->mv and ref
543*c0909341SAndroid Build Coastguard Worker    alsl.d      t5,         t5,         t2,     2  // bt2 index
544*c0909341SAndroid Build Coastguard Worker    ld.h        s3,         t3,         8       // cand_b->ref
545*c0909341SAndroid Build Coastguard Worker    ld.h        t6,         t5,         0       // bt2
546*c0909341SAndroid Build Coastguard Worker    move        s0,         t2
547*c0909341SAndroid Build Coastguard Worker    alsl.d      t3,         t6,         t3,     1   // Next cand_b += bt2 * 2
548*c0909341SAndroid Build Coastguard Worker    vor.v       vr2,        vr0,        vr0
549*c0909341SAndroid Build Coastguard Worker    vinsgr2vr.h vr1,        s3,         0
550*c0909341SAndroid Build Coastguard Worker    move        t1 ,        t3
551*c0909341SAndroid Build Coastguard Worker    bge         t3,         t4,        3f
552*c0909341SAndroid Build Coastguard Worker
553*c0909341SAndroid Build Coastguard Worker    /* Next cand_b */
554*c0909341SAndroid Build Coastguard Worker    ld.b        s0,         t3,         10      // cand_b->bs
555*c0909341SAndroid Build Coastguard Worker    vld         vr4,        t3,         0       // cand_b->mv and ref
556*c0909341SAndroid Build Coastguard Worker    alsl.d      s0,         s0,         t2,     2 // bt2 index
557*c0909341SAndroid Build Coastguard Worker    ld.h        s4,         t3,         8       // cand_b->ref
558*c0909341SAndroid Build Coastguard Worker    ld.h        t6,         s0,         0       // bt2
559*c0909341SAndroid Build Coastguard Worker    alsl.d      t3,         t6,         t3,     1   // Next cand_b += bt2*2
560*c0909341SAndroid Build Coastguard Worker    vpackev.d   vr2,        vr4,        vr0     // a0.mv[0] a0.mv[1] a1.mv[0], a1.mv[1]
561*c0909341SAndroid Build Coastguard Worker    vinsgr2vr.h vr1,        s4,         1   // a0.ref[0] a0.ref[1], a1.ref[0], a1.ref[1]
562*c0909341SAndroid Build Coastguard Worker3:
563*c0909341SAndroid Build Coastguard Worker    vabsd.h     vr2,        vr2,        vr10    // abs(mv[].xy)
564*c0909341SAndroid Build Coastguard Worker    vsle.b      vr16,       vr10,       vr1
565*c0909341SAndroid Build Coastguard Worker    vand.v      vr1,        vr16,       vr1
566*c0909341SAndroid Build Coastguard Worker    vshuf.b     vr1,        vr11,       vr11,   vr1     // ref_sign[ref]
567*c0909341SAndroid Build Coastguard Worker    vsrli.h     vr2,        vr2,        12      // abs(mv[].xy) >> 12
568*c0909341SAndroid Build Coastguard Worker    vilvl.b     vr1,        vr1,        vr1
569*c0909341SAndroid Build Coastguard Worker    vmulwev.h.bu    vr1,    vr1,        vr9    // ef_sign[ref] * {1, 2}
570*c0909341SAndroid Build Coastguard Worker
571*c0909341SAndroid Build Coastguard Worker    vseqi.w     vr2,        vr2,        0       // abs(mv[].xy) <= 4096
572*c0909341SAndroid Build Coastguard Worker    vpickev.h   vr2,        vr2,        vr2     // abs() condition to 16 bit
573*c0909341SAndroid Build Coastguard Worker
574*c0909341SAndroid Build Coastguard Worker    vand.v      vr1,        vr2,        vr1     // h[0-3] contains conditions for mv[0-1]
575*c0909341SAndroid Build Coastguard Worker    vhaddw.wu.hu    vr1,    vr1,        vr1     // Combine condition for [1] and [0]
576*c0909341SAndroid Build Coastguard Worker    vpickve2gr.wu   s1,     vr1,        0       // Extract case for first block
577*c0909341SAndroid Build Coastguard Worker    vpickve2gr.wu   s2,     vr1,        1
578*c0909341SAndroid Build Coastguard Worker
579*c0909341SAndroid Build Coastguard Worker    ld.hu           t5,     t5,         2       // Fetch jump table entry
580*c0909341SAndroid Build Coastguard Worker    ld.hu           s0,     s0,         2
581*c0909341SAndroid Build Coastguard Worker    alsl.d          s3,     s1,         t7,    4   // Load permutation table base on case
582*c0909341SAndroid Build Coastguard Worker    vld             vr1,    s3,         0
583*c0909341SAndroid Build Coastguard Worker    alsl.d          s4,     s2,         t7,    4
584*c0909341SAndroid Build Coastguard Worker    vld             vr5,    s4,         0
585*c0909341SAndroid Build Coastguard Worker    sub.d           t5,     t2,         t5     // Find jump table target
586*c0909341SAndroid Build Coastguard Worker    sub.d           s0,     t2,         s0
587*c0909341SAndroid Build Coastguard Worker
588*c0909341SAndroid Build Coastguard Worker    vshuf.b         vr0,    vr0,        vr0,    vr1 // Permute cand_b to output refmvs_temporal_block
589*c0909341SAndroid Build Coastguard Worker    vshuf.b         vr4,    vr4,        vr4,    vr5
590*c0909341SAndroid Build Coastguard Worker    vsle.b          vr16,   vr10,       vr1
591*c0909341SAndroid Build Coastguard Worker    vand.v          vr0,    vr16,       vr0
592*c0909341SAndroid Build Coastguard Worker
593*c0909341SAndroid Build Coastguard Worker    vsle.b          vr17,   vr10,       vr5
594*c0909341SAndroid Build Coastguard Worker    vand.v          vr4,    vr17,       vr4
595*c0909341SAndroid Build Coastguard Worker    // v1 follows on v0, with another 3 full repetitions of the pattern.
596*c0909341SAndroid Build Coastguard Worker    vshuf.b         vr1,    vr0,        vr0,    vr8 // 1, 2, 3, ... , 15, 16
597*c0909341SAndroid Build Coastguard Worker    vshuf.b         vr5,    vr4,        vr4,    vr8 // 1, 2, 3, ... , 15, 16
598*c0909341SAndroid Build Coastguard Worker    // v2 ends with 3 complete repetitions of the pattern.
599*c0909341SAndroid Build Coastguard Worker    vshuf.b         vr2,    vr1,        vr0,    vr7
600*c0909341SAndroid Build Coastguard Worker    vshuf.b         vr6,    vr5,        vr4,    vr7    // 4, 5, 6, 7, ... , 12, 13, 14, 15, 16, 17, 18, 19
601*c0909341SAndroid Build Coastguard Worker
602*c0909341SAndroid Build Coastguard Worker    jirl            ra,     t5,         0
603*c0909341SAndroid Build Coastguard Worker    bge             t1 ,    t4,         4f      // if (cand_b >= end)
604*c0909341SAndroid Build Coastguard Worker    vor.v           vr0,    vr4,        vr4
605*c0909341SAndroid Build Coastguard Worker    vor.v           vr1,    vr5,        vr5
606*c0909341SAndroid Build Coastguard Worker    vor.v           vr2,    vr6,        vr6
607*c0909341SAndroid Build Coastguard Worker    jirl            ra,     s0,         0
608*c0909341SAndroid Build Coastguard Worker    blt             t3,     t4,         2b      // if (cand_b < end)
609*c0909341SAndroid Build Coastguard Worker
610*c0909341SAndroid Build Coastguard Worker4:
611*c0909341SAndroid Build Coastguard Worker    addi.d          a5,     a5,         -1      // h--
612*c0909341SAndroid Build Coastguard Worker    addi.d          a7,     a7,         2       // y += 2
613*c0909341SAndroid Build Coastguard Worker    add.d           a0,     a0,         a1      // rp += stride
614*c0909341SAndroid Build Coastguard Worker    blt             zero,   a5,         1b
615*c0909341SAndroid Build Coastguard Worker
616*c0909341SAndroid Build Coastguard Worker    ld.d        s0,         sp,         0x00
617*c0909341SAndroid Build Coastguard Worker    ld.d        s1,         sp,         0x08
618*c0909341SAndroid Build Coastguard Worker    ld.d        s2,         sp,         0x10
619*c0909341SAndroid Build Coastguard Worker    ld.d        s3,         sp,         0x18
620*c0909341SAndroid Build Coastguard Worker    ld.d        s4,         sp,         0x20
621*c0909341SAndroid Build Coastguard Worker    addi.d      sp,         sp,         0x28
622*c0909341SAndroid Build Coastguard Worker
623*c0909341SAndroid Build Coastguard Worker    move            ra,     t0
624*c0909341SAndroid Build Coastguard Worker    jirl            zero,   ra,         0x00
625*c0909341SAndroid Build Coastguard Worker
626*c0909341SAndroid Build Coastguard Worker10:
627*c0909341SAndroid Build Coastguard Worker    addi.d          s1,     a3,         4
628*c0909341SAndroid Build Coastguard Worker    vstelm.w        vr0,    a3,         0,      0   // .mv
629*c0909341SAndroid Build Coastguard Worker    vstelm.b        vr0,    s1,         0,      4   // .ref
630*c0909341SAndroid Build Coastguard Worker    addi.d          a3,     a3,         5
631*c0909341SAndroid Build Coastguard Worker    jirl            zero,   ra,         0x00
632*c0909341SAndroid Build Coastguard Worker20:
633*c0909341SAndroid Build Coastguard Worker    addi.d          s1,     a3,         8
634*c0909341SAndroid Build Coastguard Worker    vstelm.d        vr0,    a3,         0,      0   // .mv
635*c0909341SAndroid Build Coastguard Worker    vstelm.h        vr0,    s1,         0,      4   // .ref
636*c0909341SAndroid Build Coastguard Worker    addi.d          a3,     a3,         2 * 5
637*c0909341SAndroid Build Coastguard Worker    jirl            zero,   ra,         0x00
638*c0909341SAndroid Build Coastguard Worker40:
639*c0909341SAndroid Build Coastguard Worker    vst             vr0,    a3,         0
640*c0909341SAndroid Build Coastguard Worker    vstelm.w        vr1,    a3,         0x10,   0
641*c0909341SAndroid Build Coastguard Worker    addi.d          a3,     a3,         4 * 5
642*c0909341SAndroid Build Coastguard Worker    jirl            zero,   ra,         0x00
643*c0909341SAndroid Build Coastguard Worker
644*c0909341SAndroid Build Coastguard Worker80:
645*c0909341SAndroid Build Coastguard Worker    vst             vr0,    a3,         0
646*c0909341SAndroid Build Coastguard Worker    vst             vr1,    a3,         0x10           // This writes 6 full entries plus 2 extra bytes
647*c0909341SAndroid Build Coastguard Worker    vst             vr2,    a3,         5 * 8 - 16     // Write the last few, overlapping with the first write.
648*c0909341SAndroid Build Coastguard Worker    addi.d          a3,     a3,         8 * 5
649*c0909341SAndroid Build Coastguard Worker    jirl            zero,   ra,         0x00
650*c0909341SAndroid Build Coastguard Worker160:
651*c0909341SAndroid Build Coastguard Worker    addi.d          s1,     a3,         6 * 5
652*c0909341SAndroid Build Coastguard Worker    addi.d          s2,     a3,         12 * 5
653*c0909341SAndroid Build Coastguard Worker    vst             vr0,    a3,         0
654*c0909341SAndroid Build Coastguard Worker    vst             vr1,    a3,         0x10          // This writes 6 full entries plus 2 extra bytes
655*c0909341SAndroid Build Coastguard Worker    vst             vr0,    a3,         6 * 5
656*c0909341SAndroid Build Coastguard Worker    vst             vr1,    a3,         6 * 5 + 16    // Write another 6 full entries, slightly overlapping with the first set
657*c0909341SAndroid Build Coastguard Worker    vstelm.d        vr0,    s2,         0,      0     // Write 8 bytes (one full entry) after the first 12
658*c0909341SAndroid Build Coastguard Worker    vst             vr2,    a3,         5 * 16 - 16   // Write the last 3 entries
659*c0909341SAndroid Build Coastguard Worker    addi.d          a3,     a3,         16 * 5
660*c0909341SAndroid Build Coastguard Worker    jirl            zero,   ra,         0x00
661*c0909341SAndroid Build Coastguard Worker
662*c0909341SAndroid Build Coastguard Worker.save_tevs_tbl:
663*c0909341SAndroid Build Coastguard Worker        .hword 16 * 12   // bt2 * 12, 12 is sizeof(refmvs_block)
664*c0909341SAndroid Build Coastguard Worker        .hword .save_tevs_tbl - 160b
665*c0909341SAndroid Build Coastguard Worker        .hword 16 * 12
666*c0909341SAndroid Build Coastguard Worker        .hword .save_tevs_tbl - 160b
667*c0909341SAndroid Build Coastguard Worker        .hword 8 * 12
668*c0909341SAndroid Build Coastguard Worker        .hword .save_tevs_tbl -  80b
669*c0909341SAndroid Build Coastguard Worker        .hword 8 * 12
670*c0909341SAndroid Build Coastguard Worker        .hword .save_tevs_tbl -  80b
671*c0909341SAndroid Build Coastguard Worker        .hword 8 * 12
672*c0909341SAndroid Build Coastguard Worker        .hword .save_tevs_tbl -  80b
673*c0909341SAndroid Build Coastguard Worker        .hword 8 * 12
674*c0909341SAndroid Build Coastguard Worker        .hword .save_tevs_tbl -  80b
675*c0909341SAndroid Build Coastguard Worker        .hword 4 * 12
676*c0909341SAndroid Build Coastguard Worker        .hword .save_tevs_tbl -  40b
677*c0909341SAndroid Build Coastguard Worker        .hword 4 * 12
678*c0909341SAndroid Build Coastguard Worker        .hword .save_tevs_tbl -  40b
679*c0909341SAndroid Build Coastguard Worker        .hword 4 * 12
680*c0909341SAndroid Build Coastguard Worker        .hword .save_tevs_tbl -  40b
681*c0909341SAndroid Build Coastguard Worker        .hword 4 * 12
682*c0909341SAndroid Build Coastguard Worker        .hword .save_tevs_tbl -  40b
683*c0909341SAndroid Build Coastguard Worker        .hword 2 * 12
684*c0909341SAndroid Build Coastguard Worker        .hword .save_tevs_tbl -  20b
685*c0909341SAndroid Build Coastguard Worker        .hword 2 * 12
686*c0909341SAndroid Build Coastguard Worker        .hword .save_tevs_tbl -  20b
687*c0909341SAndroid Build Coastguard Worker        .hword 2 * 12
688*c0909341SAndroid Build Coastguard Worker        .hword .save_tevs_tbl -  20b
689*c0909341SAndroid Build Coastguard Worker        .hword 2 * 12
690*c0909341SAndroid Build Coastguard Worker        .hword .save_tevs_tbl -  20b
691*c0909341SAndroid Build Coastguard Worker        .hword 2 * 12
692*c0909341SAndroid Build Coastguard Worker        .hword .save_tevs_tbl -  20b
693*c0909341SAndroid Build Coastguard Worker        .hword 1 * 12
694*c0909341SAndroid Build Coastguard Worker        .hword .save_tevs_tbl -  10b
695*c0909341SAndroid Build Coastguard Worker        .hword 1 * 12
696*c0909341SAndroid Build Coastguard Worker        .hword .save_tevs_tbl -  10b
697*c0909341SAndroid Build Coastguard Worker        .hword 1 * 12
698*c0909341SAndroid Build Coastguard Worker        .hword .save_tevs_tbl -  10b
699*c0909341SAndroid Build Coastguard Worker        .hword 1 * 12
700*c0909341SAndroid Build Coastguard Worker        .hword .save_tevs_tbl -  10b
701*c0909341SAndroid Build Coastguard Worker        .hword 1 * 12
702*c0909341SAndroid Build Coastguard Worker        .hword .save_tevs_tbl -  10b
703*c0909341SAndroid Build Coastguard Worker        .hword 1 * 12
704*c0909341SAndroid Build Coastguard Worker        .hword .save_tevs_tbl -  10b
705*c0909341SAndroid Build Coastguard Worker        .hword 1 * 12
706*c0909341SAndroid Build Coastguard Worker        .hword .save_tevs_tbl -  10b
707*c0909341SAndroid Build Coastguard Workerendfunc
708*c0909341SAndroid Build Coastguard Worker
709