xref: /aosp_15_r20/external/libdav1d/src/riscv/64/mc.S (revision c09093415860a1c2373dacd84c4fde00c507cdfd)
1*c0909341SAndroid Build Coastguard Worker/******************************************************************************
2*c0909341SAndroid Build Coastguard Worker * Copyright © 2018, VideoLAN and dav1d authors
3*c0909341SAndroid Build Coastguard Worker * Copyright © 2024, Nathan Egge, Niklas Haas, Bogdan Gligorijevic
4*c0909341SAndroid Build Coastguard Worker * All rights reserved.
5*c0909341SAndroid Build Coastguard Worker *
6*c0909341SAndroid Build Coastguard Worker * Redistribution and use in source and binary forms, with or without
7*c0909341SAndroid Build Coastguard Worker * modification, are permitted provided that the following conditions are met:
8*c0909341SAndroid Build Coastguard Worker *
9*c0909341SAndroid Build Coastguard Worker * 1. Redistributions of source code must retain the above copyright notice, this
10*c0909341SAndroid Build Coastguard Worker *    list of conditions and the following disclaimer.
11*c0909341SAndroid Build Coastguard Worker *
12*c0909341SAndroid Build Coastguard Worker * 2. Redistributions in binary form must reproduce the above copyright notice,
13*c0909341SAndroid Build Coastguard Worker *    this list of conditions and the following disclaimer in the documentation
14*c0909341SAndroid Build Coastguard Worker *    and/or other materials provided with the distribution.
15*c0909341SAndroid Build Coastguard Worker *
16*c0909341SAndroid Build Coastguard Worker * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
17*c0909341SAndroid Build Coastguard Worker * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
18*c0909341SAndroid Build Coastguard Worker * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
19*c0909341SAndroid Build Coastguard Worker * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
20*c0909341SAndroid Build Coastguard Worker * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
21*c0909341SAndroid Build Coastguard Worker * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
22*c0909341SAndroid Build Coastguard Worker * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
23*c0909341SAndroid Build Coastguard Worker * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
24*c0909341SAndroid Build Coastguard Worker * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
25*c0909341SAndroid Build Coastguard Worker * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26*c0909341SAndroid Build Coastguard Worker *****************************************************************************/
27*c0909341SAndroid Build Coastguard Worker
28*c0909341SAndroid Build Coastguard Worker#include "src/riscv/asm.S"
29*c0909341SAndroid Build Coastguard Worker
30*c0909341SAndroid Build Coastguard Workerfunction blend_vl256_8bpc_rvv, export=1, ext=zbb
31*c0909341SAndroid Build Coastguard Worker  ctz t0, a3
32*c0909341SAndroid Build Coastguard Worker  addi t0, t0, 0xc3
33*c0909341SAndroid Build Coastguard Worker  j L(blend_epilog)
34*c0909341SAndroid Build Coastguard Workerendfunc
35*c0909341SAndroid Build Coastguard Worker
36*c0909341SAndroid Build Coastguard Workerfunction blend_8bpc_rvv, export=1, ext="v,zbb"
37*c0909341SAndroid Build Coastguard Worker  ctz t0, a3
38*c0909341SAndroid Build Coastguard Worker  addi t0, t0, 0xc4
39*c0909341SAndroid Build Coastguard WorkerL(blend_epilog):
40*c0909341SAndroid Build Coastguard Worker  csrw vxrm, zero
41*c0909341SAndroid Build Coastguard Worker  andi t0, t0, 0xc7
42*c0909341SAndroid Build Coastguard Worker  vsetvl zero, a3, t0
43*c0909341SAndroid Build Coastguard Worker  li t1, 64
44*c0909341SAndroid Build Coastguard Worker1:
45*c0909341SAndroid Build Coastguard Worker  addi a4, a4, -2
46*c0909341SAndroid Build Coastguard Worker  vle8.v v4, (a2)
47*c0909341SAndroid Build Coastguard Worker  add a2, a2, a3
48*c0909341SAndroid Build Coastguard Worker  vle8.v v6, (a2)
49*c0909341SAndroid Build Coastguard Worker  add a2, a2, a3
50*c0909341SAndroid Build Coastguard Worker  vle8.v v8, (a5)
51*c0909341SAndroid Build Coastguard Worker  add a5, a5, a3
52*c0909341SAndroid Build Coastguard Worker  vle8.v v10, (a5)
53*c0909341SAndroid Build Coastguard Worker  add a5, a5, a3
54*c0909341SAndroid Build Coastguard Worker  vle8.v v0, (a0)
55*c0909341SAndroid Build Coastguard Worker  add t0, a0, a1
56*c0909341SAndroid Build Coastguard Worker  vle8.v v2, (t0)
57*c0909341SAndroid Build Coastguard Worker  vwmulu.vv v16, v4, v8
58*c0909341SAndroid Build Coastguard Worker  vwmulu.vv v20, v6, v10
59*c0909341SAndroid Build Coastguard Worker  vrsub.vx v8, v8, t1
60*c0909341SAndroid Build Coastguard Worker  vrsub.vx v10, v10, t1
61*c0909341SAndroid Build Coastguard Worker  vwmaccu.vv v16, v0, v8
62*c0909341SAndroid Build Coastguard Worker  vwmaccu.vv v20, v2, v10
63*c0909341SAndroid Build Coastguard Worker  vnclipu.wi v0, v16, 6
64*c0909341SAndroid Build Coastguard Worker  vnclipu.wi v2, v20, 6
65*c0909341SAndroid Build Coastguard Worker  vse8.v v0, (a0)
66*c0909341SAndroid Build Coastguard Worker  vse8.v v2, (t0)
67*c0909341SAndroid Build Coastguard Worker  add a0, t0, a1
68*c0909341SAndroid Build Coastguard Worker  bnez a4, 1b
69*c0909341SAndroid Build Coastguard Worker  ret
70*c0909341SAndroid Build Coastguard Workerendfunc
71*c0909341SAndroid Build Coastguard Worker
72*c0909341SAndroid Build Coastguard Workerfunction blend_h_vl256_8bpc_rvv, export=1, ext=zbb
73*c0909341SAndroid Build Coastguard Worker  srai t0, a3, 2
74*c0909341SAndroid Build Coastguard Worker  li t2, 64
75*c0909341SAndroid Build Coastguard Worker  ctz t0, t0
76*c0909341SAndroid Build Coastguard Worker  addi t0, t0, 0xc5
77*c0909341SAndroid Build Coastguard Worker  j L(blend_h_epilog)
78*c0909341SAndroid Build Coastguard Workerendfunc
79*c0909341SAndroid Build Coastguard Worker
80*c0909341SAndroid Build Coastguard Workerfunction blend_h_8bpc_rvv, export=1, ext="v,zbb"
81*c0909341SAndroid Build Coastguard Worker  li t2, 64
82*c0909341SAndroid Build Coastguard Worker  bgt a3, t2, 128f
83*c0909341SAndroid Build Coastguard Worker  ctz t0, a3
84*c0909341SAndroid Build Coastguard Worker  addi t0, t0, 0xc4
85*c0909341SAndroid Build Coastguard WorkerL(blend_h_epilog):
86*c0909341SAndroid Build Coastguard Worker  csrw vxrm, zero
87*c0909341SAndroid Build Coastguard Worker  andi t0, t0, 0xc7
88*c0909341SAndroid Build Coastguard Worker  vsetvl zero, a3, t0
89*c0909341SAndroid Build Coastguard Worker  la t1, dav1d_obmc_masks
90*c0909341SAndroid Build Coastguard Worker  srai t0, a4, 2
91*c0909341SAndroid Build Coastguard Worker  add t1, t1, a4
92*c0909341SAndroid Build Coastguard Worker  sub a4, a4, t0
93*c0909341SAndroid Build Coastguard Worker0:
94*c0909341SAndroid Build Coastguard Worker  mv t5, ra
95*c0909341SAndroid Build Coastguard Worker1:
96*c0909341SAndroid Build Coastguard Worker  addi a4, a4, -2
97*c0909341SAndroid Build Coastguard Worker  lbu t3, (t1)
98*c0909341SAndroid Build Coastguard Worker  addi t1, t1, 1
99*c0909341SAndroid Build Coastguard Worker  lbu t4, (t1)
100*c0909341SAndroid Build Coastguard Worker  addi t1, t1, 1
101*c0909341SAndroid Build Coastguard Worker  vle8.v v8, (a2)
102*c0909341SAndroid Build Coastguard Worker  add a2, a2, a3
103*c0909341SAndroid Build Coastguard Worker  vle8.v v12, (a2)
104*c0909341SAndroid Build Coastguard Worker  add a2, a2, a3
105*c0909341SAndroid Build Coastguard Worker  vle8.v v0, (a0)
106*c0909341SAndroid Build Coastguard Worker  add t0, a0, a1
107*c0909341SAndroid Build Coastguard Worker  vle8.v v4, (t0)
108*c0909341SAndroid Build Coastguard Worker  vwmulu.vx v16, v8, t3
109*c0909341SAndroid Build Coastguard Worker  vwmulu.vx v24, v12, t4
110*c0909341SAndroid Build Coastguard Worker  sub t3, t2, t3
111*c0909341SAndroid Build Coastguard Worker  sub t4, t2, t4
112*c0909341SAndroid Build Coastguard Worker  vwmaccu.vx v16, t3, v0
113*c0909341SAndroid Build Coastguard Worker  vwmaccu.vx v24, t4, v4
114*c0909341SAndroid Build Coastguard Worker  vnclipu.wi v0, v16, 6
115*c0909341SAndroid Build Coastguard Worker  vnclipu.wi v4, v24, 6
116*c0909341SAndroid Build Coastguard Worker  vse8.v v0, (a0)
117*c0909341SAndroid Build Coastguard Worker  vse8.v v4, (t0)
118*c0909341SAndroid Build Coastguard Worker  add a0, t0, a1
119*c0909341SAndroid Build Coastguard Worker  bgtz a4, 1b
120*c0909341SAndroid Build Coastguard Worker  jr t5
121*c0909341SAndroid Build Coastguard Worker128:
122*c0909341SAndroid Build Coastguard Worker  csrw vxrm, zero
123*c0909341SAndroid Build Coastguard Worker  vsetvli zero, t2, e8, m4, ta, ma
124*c0909341SAndroid Build Coastguard Worker  la t1, dav1d_obmc_masks
125*c0909341SAndroid Build Coastguard Worker  srai t0, a4, 2
126*c0909341SAndroid Build Coastguard Worker  add t1, t1, a4
127*c0909341SAndroid Build Coastguard Worker  sub a4, a4, t0
128*c0909341SAndroid Build Coastguard Worker  mv a5, a0
129*c0909341SAndroid Build Coastguard Worker  mv a6, a2
130*c0909341SAndroid Build Coastguard Worker  mv a7, a4
131*c0909341SAndroid Build Coastguard Worker  jal t5, 1b
132*c0909341SAndroid Build Coastguard Worker  add t1, t1, a4
133*c0909341SAndroid Build Coastguard Worker  add a0, a5, t2
134*c0909341SAndroid Build Coastguard Worker  add a2, a6, t2
135*c0909341SAndroid Build Coastguard Worker  mv a4, a7
136*c0909341SAndroid Build Coastguard Worker  sub t1, t1, a4
137*c0909341SAndroid Build Coastguard Worker  j 0b
138*c0909341SAndroid Build Coastguard Workerendfunc
139*c0909341SAndroid Build Coastguard Worker
140*c0909341SAndroid Build Coastguard Workerfunction blend_v_vl256_8bpc_rvv, export=1, ext=zbb
141*c0909341SAndroid Build Coastguard Worker  srai t0, a3, 2
142*c0909341SAndroid Build Coastguard Worker  ctz t0, t0
143*c0909341SAndroid Build Coastguard Worker  addi t0, t0, 0xc5
144*c0909341SAndroid Build Coastguard Worker  j L(blend_v_epilog)
145*c0909341SAndroid Build Coastguard Workerendfunc
146*c0909341SAndroid Build Coastguard Worker
147*c0909341SAndroid Build Coastguard Workerfunction blend_v_8bpc_rvv, export=1, ext="v,zbb"
148*c0909341SAndroid Build Coastguard Worker  ctz t0, a3
149*c0909341SAndroid Build Coastguard Worker  addi t0, t0, 0xc4
150*c0909341SAndroid Build Coastguard WorkerL(blend_v_epilog):
151*c0909341SAndroid Build Coastguard Worker  andi t0, t0, 0xc7
152*c0909341SAndroid Build Coastguard Worker  vsetvl zero, a3, t0
153*c0909341SAndroid Build Coastguard Worker  csrw vxrm, zero
154*c0909341SAndroid Build Coastguard Worker  la t1, dav1d_obmc_masks
155*c0909341SAndroid Build Coastguard Worker  add t1, t1, a3
156*c0909341SAndroid Build Coastguard Worker  vle8.v v8, (t1)
157*c0909341SAndroid Build Coastguard Worker  li t0, 64
158*c0909341SAndroid Build Coastguard Worker  vrsub.vx v10, v8, t0
159*c0909341SAndroid Build Coastguard Worker1:
160*c0909341SAndroid Build Coastguard Worker  addi a4, a4, -2
161*c0909341SAndroid Build Coastguard Worker  vle8.v v4, (a2)
162*c0909341SAndroid Build Coastguard Worker  add a2, a2, a3
163*c0909341SAndroid Build Coastguard Worker  vle8.v v6, (a2)
164*c0909341SAndroid Build Coastguard Worker  add a2, a2, a3
165*c0909341SAndroid Build Coastguard Worker  vle8.v v0, (a0)
166*c0909341SAndroid Build Coastguard Worker  add t0, a0, a1
167*c0909341SAndroid Build Coastguard Worker  vle8.v v2, (t0)
168*c0909341SAndroid Build Coastguard Worker  vwmulu.vv v12, v4, v8
169*c0909341SAndroid Build Coastguard Worker  vwmulu.vv v16, v6, v8
170*c0909341SAndroid Build Coastguard Worker  vwmaccu.vv v12, v0, v10
171*c0909341SAndroid Build Coastguard Worker  vwmaccu.vv v16, v2, v10
172*c0909341SAndroid Build Coastguard Worker  vnclipu.wi v0, v12, 6
173*c0909341SAndroid Build Coastguard Worker  vnclipu.wi v2, v16, 6
174*c0909341SAndroid Build Coastguard Worker  vse8.v v0, (a0)
175*c0909341SAndroid Build Coastguard Worker  vse8.v v2, (t0)
176*c0909341SAndroid Build Coastguard Worker  add a0, t0, a1
177*c0909341SAndroid Build Coastguard Worker  bnez a4, 1b
178*c0909341SAndroid Build Coastguard Worker  ret
179*c0909341SAndroid Build Coastguard Workerendfunc
180*c0909341SAndroid Build Coastguard Worker
181*c0909341SAndroid Build Coastguard Worker.macro avg va, vb, vm
182*c0909341SAndroid Build Coastguard Worker    vadd.vv \va, \va, \vb
183*c0909341SAndroid Build Coastguard Worker.endm
184*c0909341SAndroid Build Coastguard Worker
185*c0909341SAndroid Build Coastguard Worker.macro w_avg va, vb, vm
186*c0909341SAndroid Build Coastguard Worker    vwmul.vx v24, \va, a6
187*c0909341SAndroid Build Coastguard Worker    vwmacc.vx v24, a7, \vb
188*c0909341SAndroid Build Coastguard Worker    vnclip.wi \va, v24, 8
189*c0909341SAndroid Build Coastguard Worker.endm
190*c0909341SAndroid Build Coastguard Worker
191*c0909341SAndroid Build Coastguard Worker.macro mask va, vb, vm
192*c0909341SAndroid Build Coastguard Worker    vwmul.vv v24, \va, \vm
193*c0909341SAndroid Build Coastguard Worker    vrsub.vx \vm, \vm, a7
194*c0909341SAndroid Build Coastguard Worker    vwmacc.vv v24, \vb, \vm
195*c0909341SAndroid Build Coastguard Worker    vnclip.wi \va, v24, 10
196*c0909341SAndroid Build Coastguard Worker.endm
197*c0909341SAndroid Build Coastguard Worker
198*c0909341SAndroid Build Coastguard Worker.macro bidir_fn type, shift
199*c0909341SAndroid Build Coastguard Workerfunction \type\()_8bpc_rvv, export=1, ext="v,zba,zbb"
200*c0909341SAndroid Build Coastguard Worker.ifc \type, w_avg
201*c0909341SAndroid Build Coastguard Worker    li a7, 16
202*c0909341SAndroid Build Coastguard Worker    sub a7, a7, a6
203*c0909341SAndroid Build Coastguard Worker.endif
204*c0909341SAndroid Build Coastguard Worker.ifc \type, mask
205*c0909341SAndroid Build Coastguard Worker    li a7, 64
206*c0909341SAndroid Build Coastguard Worker.endif
207*c0909341SAndroid Build Coastguard Worker    li t0, 4
208*c0909341SAndroid Build Coastguard Worker    csrw vxrm, zero
209*c0909341SAndroid Build Coastguard Worker    beq t0, a4, 4f
210*c0909341SAndroid Build Coastguard Worker    csrr t0, vlenb
211*c0909341SAndroid Build Coastguard Worker    ctz t1, a4
212*c0909341SAndroid Build Coastguard Worker    ctz t0, t0
213*c0909341SAndroid Build Coastguard Worker    li t2, 1
214*c0909341SAndroid Build Coastguard Worker    sub t0, t1, t0
215*c0909341SAndroid Build Coastguard Worker    li t4, -3
216*c0909341SAndroid Build Coastguard Worker    bgt t0, t2, 2f
217*c0909341SAndroid Build Coastguard Worker    max t0, t0, t4
218*c0909341SAndroid Build Coastguard Worker    andi t1, t0, 0x7
219*c0909341SAndroid Build Coastguard Worker    addi t0, t1, 1 # may overflow into E16 bit
220*c0909341SAndroid Build Coastguard Worker    ori t0, t0, MA | TA | E16
221*c0909341SAndroid Build Coastguard Worker    ori t1, t1, MA | TA | E8
222*c0909341SAndroid Build Coastguard Worker1:
223*c0909341SAndroid Build Coastguard Worker    addi a5, a5, -4
224*c0909341SAndroid Build Coastguard Worker.rept 2
225*c0909341SAndroid Build Coastguard Worker    vsetvl zero, a4, t0
226*c0909341SAndroid Build Coastguard Worker    sh1add t3, a4, a2
227*c0909341SAndroid Build Coastguard Worker    vle16.v v0, (a2)
228*c0909341SAndroid Build Coastguard Worker    sh1add a2, a4, t3
229*c0909341SAndroid Build Coastguard Worker    vle16.v v4, (t3)
230*c0909341SAndroid Build Coastguard Worker    sh1add t3, a4, a3
231*c0909341SAndroid Build Coastguard Worker    vle16.v v8, (a3)
232*c0909341SAndroid Build Coastguard Worker    sh1add a3, a4, t3
233*c0909341SAndroid Build Coastguard Worker    vle16.v v12, (t3)
234*c0909341SAndroid Build Coastguard Worker.ifc \type, mask
235*c0909341SAndroid Build Coastguard Worker    add t3, a4, a6
236*c0909341SAndroid Build Coastguard Worker    vle8.v v24, (a6)
237*c0909341SAndroid Build Coastguard Worker    add a6, a4, t3
238*c0909341SAndroid Build Coastguard Worker    vle8.v v26, (t3)
239*c0909341SAndroid Build Coastguard Worker    vzext.vf2 v16, v24
240*c0909341SAndroid Build Coastguard Worker    vzext.vf2 v20, v26
241*c0909341SAndroid Build Coastguard Worker.endif
242*c0909341SAndroid Build Coastguard Worker    \type v0, v8, v16
243*c0909341SAndroid Build Coastguard Worker    \type v4, v12, v20
244*c0909341SAndroid Build Coastguard Worker    vmax.vx v8, v0, zero
245*c0909341SAndroid Build Coastguard Worker    vmax.vx v12, v4, zero
246*c0909341SAndroid Build Coastguard Worker    vsetvl zero, zero, t1
247*c0909341SAndroid Build Coastguard Worker    vnclipu.wi v0, v8,  \shift
248*c0909341SAndroid Build Coastguard Worker    vnclipu.wi v2, v12, \shift
249*c0909341SAndroid Build Coastguard Worker    add t3, a1, a0
250*c0909341SAndroid Build Coastguard Worker    vse8.v v0, (a0)
251*c0909341SAndroid Build Coastguard Worker    add a0, a1, t3
252*c0909341SAndroid Build Coastguard Worker    vse8.v v2, (t3)
253*c0909341SAndroid Build Coastguard Worker.endr
254*c0909341SAndroid Build Coastguard Worker    bnez a5, 1b
255*c0909341SAndroid Build Coastguard Worker    ret
256*c0909341SAndroid Build Coastguard Worker2:
257*c0909341SAndroid Build Coastguard Worker    mv t0, a0
258*c0909341SAndroid Build Coastguard Worker    neg t4, a4
259*c0909341SAndroid Build Coastguard Worker    add a0, a1, a0
260*c0909341SAndroid Build Coastguard Worker    addi a5, a5, -1
261*c0909341SAndroid Build Coastguard Worker20:
262*c0909341SAndroid Build Coastguard Worker    vsetvli t2, a4, e16, m4, ta, ma
263*c0909341SAndroid Build Coastguard Worker    sh1add t4, t2, t4
264*c0909341SAndroid Build Coastguard Worker    sh1add t3, t2, a2
265*c0909341SAndroid Build Coastguard Worker    vle16.v v0, (a2)
266*c0909341SAndroid Build Coastguard Worker    sh1add a2, t2, t3
267*c0909341SAndroid Build Coastguard Worker    vle16.v v4, (t3)
268*c0909341SAndroid Build Coastguard Worker    sh1add t3, t2, a3
269*c0909341SAndroid Build Coastguard Worker    vle16.v v8, (a3)
270*c0909341SAndroid Build Coastguard Worker    sh1add a3, t2, t3
271*c0909341SAndroid Build Coastguard Worker    vle16.v v12, (t3)
272*c0909341SAndroid Build Coastguard Worker.ifc \type, mask
273*c0909341SAndroid Build Coastguard Worker    add t3, t2, a6
274*c0909341SAndroid Build Coastguard Worker    vle8.v v24, (a6)
275*c0909341SAndroid Build Coastguard Worker    add a6, t2, t3
276*c0909341SAndroid Build Coastguard Worker    vle8.v v26, (t3)
277*c0909341SAndroid Build Coastguard Worker    vzext.vf2 v16, v24
278*c0909341SAndroid Build Coastguard Worker    vzext.vf2 v20, v26
279*c0909341SAndroid Build Coastguard Worker.endif
280*c0909341SAndroid Build Coastguard Worker    \type v0, v8, v16
281*c0909341SAndroid Build Coastguard Worker    \type v4, v12, v20
282*c0909341SAndroid Build Coastguard Worker    vmax.vx v8, v0, zero
283*c0909341SAndroid Build Coastguard Worker    vmax.vx v12, v4, zero
284*c0909341SAndroid Build Coastguard Worker    vsetvli zero, zero, e8, m2, ta, ma
285*c0909341SAndroid Build Coastguard Worker    vnclipu.wi v0, v8,  \shift
286*c0909341SAndroid Build Coastguard Worker    vnclipu.wi v2, v12, \shift
287*c0909341SAndroid Build Coastguard Worker    add t3, t2, t0
288*c0909341SAndroid Build Coastguard Worker    vse8.v v0, (t0)
289*c0909341SAndroid Build Coastguard Worker    add t0, t2, t3
290*c0909341SAndroid Build Coastguard Worker    vse8.v v2, (t3)
291*c0909341SAndroid Build Coastguard Worker    bnez t4, 20b
292*c0909341SAndroid Build Coastguard Worker    bnez a5, 2b
293*c0909341SAndroid Build Coastguard Worker    ret
294*c0909341SAndroid Build Coastguard Worker4:
295*c0909341SAndroid Build Coastguard Worker    slli t0, a5, 2
296*c0909341SAndroid Build Coastguard Worker    vsetvli t1, t0, e16, m4, ta, ma
297*c0909341SAndroid Build Coastguard Worker    vle16.v v0, (a2)
298*c0909341SAndroid Build Coastguard Worker    sh1add a2, t1, a2
299*c0909341SAndroid Build Coastguard Worker    vle16.v v4, (a3)
300*c0909341SAndroid Build Coastguard Worker    sh1add a3, t1, a3
301*c0909341SAndroid Build Coastguard Worker.ifc \type, mask
302*c0909341SAndroid Build Coastguard Worker    vle8.v v16, (a6)
303*c0909341SAndroid Build Coastguard Worker    add a6, t1, a6
304*c0909341SAndroid Build Coastguard Worker    vzext.vf2 v8, v16
305*c0909341SAndroid Build Coastguard Worker.endif
306*c0909341SAndroid Build Coastguard Worker    \type v0, v4, v8
307*c0909341SAndroid Build Coastguard Worker    vmax.vx v8, v0, zero
308*c0909341SAndroid Build Coastguard Worker    vsetvli zero, zero, e8, m2, ta, ma
309*c0909341SAndroid Build Coastguard Worker    vnclipu.wi v0, v8, \shift
310*c0909341SAndroid Build Coastguard Worker    vsetvli t1, a5, e32, m2, ta, ma
311*c0909341SAndroid Build Coastguard Worker    vsse32.v v0, (a0), a1
312*c0909341SAndroid Build Coastguard Worker    ctz t0, t1
313*c0909341SAndroid Build Coastguard Worker    sub a5, a5, t1
314*c0909341SAndroid Build Coastguard Worker    sll t0, a1, t0
315*c0909341SAndroid Build Coastguard Worker    add a0, t0, a0
316*c0909341SAndroid Build Coastguard Worker    bnez a5, 4b
317*c0909341SAndroid Build Coastguard Worker    ret
318*c0909341SAndroid Build Coastguard Workerendfunc
319*c0909341SAndroid Build Coastguard Worker.endm
320*c0909341SAndroid Build Coastguard Worker
321*c0909341SAndroid Build Coastguard Workerbidir_fn avg,   5
322*c0909341SAndroid Build Coastguard Workerbidir_fn w_avg, 0
323*c0909341SAndroid Build Coastguard Workerbidir_fn mask,  0
324*c0909341SAndroid Build Coastguard Worker
325*c0909341SAndroid Build Coastguard Workerfunction warp_8x8_8bpc_rvv, export=1, ext="v"
326*c0909341SAndroid Build Coastguard Worker    csrw vxrm, zero
327*c0909341SAndroid Build Coastguard Worker
328*c0909341SAndroid Build Coastguard Worker    vsetivli zero, 8, e16, m1, ta, ma
329*c0909341SAndroid Build Coastguard Worker    addi sp, sp, -2*15*8
330*c0909341SAndroid Build Coastguard Worker    mv t5, sp
331*c0909341SAndroid Build Coastguard Worker    li t0, 3
332*c0909341SAndroid Build Coastguard Worker    mul t0, a3, t0
333*c0909341SAndroid Build Coastguard Worker    sub a2, a2, t0
334*c0909341SAndroid Build Coastguard Worker    addi a2, a2, -3
335*c0909341SAndroid Build Coastguard Worker
336*c0909341SAndroid Build Coastguard Worker    li t0, 64
337*c0909341SAndroid Build Coastguard Worker    addi a3, a3, -8
338*c0909341SAndroid Build Coastguard Worker    li t1, 15
339*c0909341SAndroid Build Coastguard Worker    la t2, dav1d_mc_warp_filter
340*c0909341SAndroid Build Coastguard Worker
341*c0909341SAndroid Build Coastguard Worker    lh t6, (a4)
342*c0909341SAndroid Build Coastguard Worker    lh t4, 2(a4)
343*c0909341SAndroid Build Coastguard Worker    vid.v v30
344*c0909341SAndroid Build Coastguard Worker    vwmul.vx v28, v30, t6
345*c0909341SAndroid Build Coastguard Worker1:
346*c0909341SAndroid Build Coastguard Worker    addi t1, t1, -1
347*c0909341SAndroid Build Coastguard Worker
348*c0909341SAndroid Build Coastguard Worker
349*c0909341SAndroid Build Coastguard Worker    vsetvli zero, zero, e32, m2, ta, ma
350*c0909341SAndroid Build Coastguard Worker    vadd.vx v4, v28, a5
351*c0909341SAndroid Build Coastguard Worker    add a5, a5, t4
352*c0909341SAndroid Build Coastguard Worker    vssra.vi v2, v4, 10
353*c0909341SAndroid Build Coastguard Worker    vadd.vx v2, v2, t0
354*c0909341SAndroid Build Coastguard Worker    vsll.vi v24, v2, 3
355*c0909341SAndroid Build Coastguard Worker    vsetvli zero, zero, e8, mf2, ta, ma
356*c0909341SAndroid Build Coastguard Worker
357*c0909341SAndroid Build Coastguard Worker    vluxseg8ei32.v v2, (t2), v24
358*c0909341SAndroid Build Coastguard Worker
359*c0909341SAndroid Build Coastguard Worker    vsetvli zero, zero, e16, m1, ta, ma
360*c0909341SAndroid Build Coastguard Worker.irp i, 2, 3, 4, 5, 6, 7, 8, 9
361*c0909341SAndroid Build Coastguard Worker    vle8.v v10, (a2)
362*c0909341SAndroid Build Coastguard Worker    addi a2, a2, 1
363*c0909341SAndroid Build Coastguard Worker
364*c0909341SAndroid Build Coastguard Worker    vsext.vf2 v14, v\i
365*c0909341SAndroid Build Coastguard Worker    vzext.vf2 v16, v10
366*c0909341SAndroid Build Coastguard Worker
367*c0909341SAndroid Build Coastguard Worker.if \i == 2
368*c0909341SAndroid Build Coastguard Worker    vwmulsu.vv v12, v14, v16
369*c0909341SAndroid Build Coastguard Worker.else
370*c0909341SAndroid Build Coastguard Worker    vwmaccsu.vv v12, v14, v16
371*c0909341SAndroid Build Coastguard Worker.endif
372*c0909341SAndroid Build Coastguard Worker.endr
373*c0909341SAndroid Build Coastguard Worker    vnclip.wi v10, v12, 3
374*c0909341SAndroid Build Coastguard Worker
375*c0909341SAndroid Build Coastguard Worker    add a2, a2, a3
376*c0909341SAndroid Build Coastguard Worker    vse16.v v10, (t5)
377*c0909341SAndroid Build Coastguard Worker    addi t5, t5, 16
378*c0909341SAndroid Build Coastguard Worker
379*c0909341SAndroid Build Coastguard Worker    bnez t1, 1b
380*c0909341SAndroid Build Coastguard Worker
381*c0909341SAndroid Build Coastguard Worker    mv t5, sp
382*c0909341SAndroid Build Coastguard Worker    li t1, 8
383*c0909341SAndroid Build Coastguard Worker
384*c0909341SAndroid Build Coastguard Worker    lh t6, 4(a4)
385*c0909341SAndroid Build Coastguard Worker    lh t4, 6(a4)
386*c0909341SAndroid Build Coastguard Worker    vwmul.vx v28, v30, t6
387*c0909341SAndroid Build Coastguard Worker2:
388*c0909341SAndroid Build Coastguard Worker    addi t1, t1, -1
389*c0909341SAndroid Build Coastguard Worker
390*c0909341SAndroid Build Coastguard Worker    vsetvli zero, zero, e32, m2, ta, ma
391*c0909341SAndroid Build Coastguard Worker    vadd.vx v4, v28, a6
392*c0909341SAndroid Build Coastguard Worker
393*c0909341SAndroid Build Coastguard Worker    add a6, a6, t4
394*c0909341SAndroid Build Coastguard Worker    vssra.vi v2, v4, 10
395*c0909341SAndroid Build Coastguard Worker    vadd.vx v2, v2, t0
396*c0909341SAndroid Build Coastguard Worker    vsll.vi v24, v2, 3
397*c0909341SAndroid Build Coastguard Worker    vsetvli zero, zero, e8, mf2, ta, ma
398*c0909341SAndroid Build Coastguard Worker
399*c0909341SAndroid Build Coastguard Worker    vluxseg8ei32.v v2, (t2), v24
400*c0909341SAndroid Build Coastguard Worker    vsetvli zero, zero, e16, m1, ta, ma
401*c0909341SAndroid Build Coastguard Worker
402*c0909341SAndroid Build Coastguard Worker.irp i, 2, 3, 4, 5, 6, 7, 8, 9
403*c0909341SAndroid Build Coastguard Worker    vle16.v v10, (t5)
404*c0909341SAndroid Build Coastguard Worker    addi t5, t5, 16
405*c0909341SAndroid Build Coastguard Worker
406*c0909341SAndroid Build Coastguard Worker    vsext.vf2 v14, v\i
407*c0909341SAndroid Build Coastguard Worker
408*c0909341SAndroid Build Coastguard Worker.if \i == 2
409*c0909341SAndroid Build Coastguard Worker    vwmul.vv v12, v14, v10
410*c0909341SAndroid Build Coastguard Worker.else
411*c0909341SAndroid Build Coastguard Worker    vwmacc.vv v12, v14, v10
412*c0909341SAndroid Build Coastguard Worker.endif
413*c0909341SAndroid Build Coastguard Worker.endr
414*c0909341SAndroid Build Coastguard Worker    addi t5, t5, -16*7
415*c0909341SAndroid Build Coastguard Worker    vnclip.wi v10, v12, 11
416*c0909341SAndroid Build Coastguard Worker
417*c0909341SAndroid Build Coastguard Worker    vmax.vx v10, v10, zero
418*c0909341SAndroid Build Coastguard Worker    vsetvli zero, zero, e8, mf2, ta, ma
419*c0909341SAndroid Build Coastguard Worker
420*c0909341SAndroid Build Coastguard Worker    vnclipu.wi v12, v10, 0
421*c0909341SAndroid Build Coastguard Worker
422*c0909341SAndroid Build Coastguard Worker    vse8.v v12, (a0)
423*c0909341SAndroid Build Coastguard Worker    add a0, a0, a1
424*c0909341SAndroid Build Coastguard Worker
425*c0909341SAndroid Build Coastguard Worker    bnez t1, 2b
426*c0909341SAndroid Build Coastguard Worker
427*c0909341SAndroid Build Coastguard Worker    addi sp, sp, 2*15*8
428*c0909341SAndroid Build Coastguard Worker
429*c0909341SAndroid Build Coastguard Worker    ret
430*c0909341SAndroid Build Coastguard Workerendfunc
431*c0909341SAndroid Build Coastguard Worker
432*c0909341SAndroid Build Coastguard Workerfunction warp_8x8t_8bpc_rvv, export=1, ext="v,zba"
433*c0909341SAndroid Build Coastguard Worker    csrw vxrm, zero
434*c0909341SAndroid Build Coastguard Worker
435*c0909341SAndroid Build Coastguard Worker    vsetivli zero, 8, e16, m1, ta, ma
436*c0909341SAndroid Build Coastguard Worker    addi sp, sp, -2*15*8
437*c0909341SAndroid Build Coastguard Worker    mv t5, sp
438*c0909341SAndroid Build Coastguard Worker    li t0, 3
439*c0909341SAndroid Build Coastguard Worker    mul t0, a3, t0
440*c0909341SAndroid Build Coastguard Worker    sub a2, a2, t0
441*c0909341SAndroid Build Coastguard Worker    addi a2, a2, -3
442*c0909341SAndroid Build Coastguard Worker
443*c0909341SAndroid Build Coastguard Worker    li t0, 64
444*c0909341SAndroid Build Coastguard Worker    addi a3, a3, -8
445*c0909341SAndroid Build Coastguard Worker    li t1, 15
446*c0909341SAndroid Build Coastguard Worker    la t2, dav1d_mc_warp_filter
447*c0909341SAndroid Build Coastguard Worker
448*c0909341SAndroid Build Coastguard Worker    lh t6, (a4)
449*c0909341SAndroid Build Coastguard Worker    lh t4, 2(a4)
450*c0909341SAndroid Build Coastguard Worker    vid.v v30
451*c0909341SAndroid Build Coastguard Worker    vwmul.vx v28, v30, t6
452*c0909341SAndroid Build Coastguard Worker1:
453*c0909341SAndroid Build Coastguard Worker    addi t1, t1, -1
454*c0909341SAndroid Build Coastguard Worker
455*c0909341SAndroid Build Coastguard Worker
456*c0909341SAndroid Build Coastguard Worker    vsetvli zero, zero, e32, m2, ta, ma
457*c0909341SAndroid Build Coastguard Worker    vadd.vx v4, v28, a5
458*c0909341SAndroid Build Coastguard Worker    add a5, a5, t4
459*c0909341SAndroid Build Coastguard Worker    vssra.vi v2, v4, 10
460*c0909341SAndroid Build Coastguard Worker    vadd.vx v2, v2, t0
461*c0909341SAndroid Build Coastguard Worker    vsll.vi v24, v2, 3
462*c0909341SAndroid Build Coastguard Worker    vsetvli zero, zero, e8, mf2, ta, ma
463*c0909341SAndroid Build Coastguard Worker
464*c0909341SAndroid Build Coastguard Worker    vluxseg8ei32.v v2, (t2), v24
465*c0909341SAndroid Build Coastguard Worker
466*c0909341SAndroid Build Coastguard Worker    vsetvli zero, zero, e16, m1, ta, ma
467*c0909341SAndroid Build Coastguard Worker.irp i, 2, 3, 4, 5, 6, 7, 8, 9
468*c0909341SAndroid Build Coastguard Worker    vle8.v v10, (a2)
469*c0909341SAndroid Build Coastguard Worker    addi a2, a2, 1
470*c0909341SAndroid Build Coastguard Worker
471*c0909341SAndroid Build Coastguard Worker    vsext.vf2 v14, v\i
472*c0909341SAndroid Build Coastguard Worker    vzext.vf2 v16, v10
473*c0909341SAndroid Build Coastguard Worker
474*c0909341SAndroid Build Coastguard Worker.if \i == 2
475*c0909341SAndroid Build Coastguard Worker    vwmulsu.vv v12, v14, v16
476*c0909341SAndroid Build Coastguard Worker.else
477*c0909341SAndroid Build Coastguard Worker    vwmaccsu.vv v12, v14, v16
478*c0909341SAndroid Build Coastguard Worker.endif
479*c0909341SAndroid Build Coastguard Worker.endr
480*c0909341SAndroid Build Coastguard Worker    vnclip.wi v10, v12, 3
481*c0909341SAndroid Build Coastguard Worker
482*c0909341SAndroid Build Coastguard Worker    add a2, a2, a3
483*c0909341SAndroid Build Coastguard Worker    vse16.v v10, (t5)
484*c0909341SAndroid Build Coastguard Worker    addi t5, t5, 16
485*c0909341SAndroid Build Coastguard Worker
486*c0909341SAndroid Build Coastguard Worker    bnez t1, 1b
487*c0909341SAndroid Build Coastguard Worker
488*c0909341SAndroid Build Coastguard Worker    mv t5, sp
489*c0909341SAndroid Build Coastguard Worker    li t1, 8
490*c0909341SAndroid Build Coastguard Worker
491*c0909341SAndroid Build Coastguard Worker    lh t6, 4(a4)
492*c0909341SAndroid Build Coastguard Worker    lh t4, 6(a4)
493*c0909341SAndroid Build Coastguard Worker    vwmul.vx v28, v30, t6
494*c0909341SAndroid Build Coastguard Worker2:
495*c0909341SAndroid Build Coastguard Worker    addi t1, t1, -1
496*c0909341SAndroid Build Coastguard Worker
497*c0909341SAndroid Build Coastguard Worker    vsetvli zero, zero, e32, m2, ta, ma
498*c0909341SAndroid Build Coastguard Worker    vadd.vx v4, v28, a6
499*c0909341SAndroid Build Coastguard Worker    add a6, a6, t4
500*c0909341SAndroid Build Coastguard Worker    vssra.vi v2, v4, 10
501*c0909341SAndroid Build Coastguard Worker    vadd.vx v2, v2, t0
502*c0909341SAndroid Build Coastguard Worker    vsll.vi v24, v2, 3
503*c0909341SAndroid Build Coastguard Worker    vsetvli zero, zero, e8, mf2, ta, ma
504*c0909341SAndroid Build Coastguard Worker
505*c0909341SAndroid Build Coastguard Worker    vluxseg8ei32.v v2, (t2), v24
506*c0909341SAndroid Build Coastguard Worker    vsetvli zero, zero, e16, m1, ta, ma
507*c0909341SAndroid Build Coastguard Worker
508*c0909341SAndroid Build Coastguard Worker.irp i, 2, 3, 4, 5, 6, 7, 8, 9
509*c0909341SAndroid Build Coastguard Worker    vle16.v v10, (t5)
510*c0909341SAndroid Build Coastguard Worker    addi t5, t5, 16
511*c0909341SAndroid Build Coastguard Worker
512*c0909341SAndroid Build Coastguard Worker    vsext.vf2 v14, v\i
513*c0909341SAndroid Build Coastguard Worker
514*c0909341SAndroid Build Coastguard Worker.if \i == 2
515*c0909341SAndroid Build Coastguard Worker    vwmul.vv v12, v14, v10
516*c0909341SAndroid Build Coastguard Worker.else
517*c0909341SAndroid Build Coastguard Worker    vwmacc.vv v12, v14, v10
518*c0909341SAndroid Build Coastguard Worker.endif
519*c0909341SAndroid Build Coastguard Worker
520*c0909341SAndroid Build Coastguard Worker.endr
521*c0909341SAndroid Build Coastguard Worker    addi t5, t5, -16*7
522*c0909341SAndroid Build Coastguard Worker    vnclip.wi v10, v12, 7
523*c0909341SAndroid Build Coastguard Worker
524*c0909341SAndroid Build Coastguard Worker    vse16.v v10, (a0)
525*c0909341SAndroid Build Coastguard Worker    sh1add a0, a1, a0
526*c0909341SAndroid Build Coastguard Worker
527*c0909341SAndroid Build Coastguard Worker    bnez t1, 2b
528*c0909341SAndroid Build Coastguard Worker
529*c0909341SAndroid Build Coastguard Worker    addi sp, sp, 2*15*8
530*c0909341SAndroid Build Coastguard Worker
531*c0909341SAndroid Build Coastguard Worker    ret
532*c0909341SAndroid Build Coastguard Workerendfunc
533