xref: /aosp_15_r20/external/libdav1d/src/riscv/64/ipred16.S (revision c09093415860a1c2373dacd84c4fde00c507cdfd)
1*c0909341SAndroid Build Coastguard Worker/******************************************************************************
2*c0909341SAndroid Build Coastguard Worker * Copyright © 2018, VideoLAN and dav1d authors
3*c0909341SAndroid Build Coastguard Worker * Copyright © 2024, Bogdan Gligorijevic
4*c0909341SAndroid Build Coastguard Worker * All rights reserved.
5*c0909341SAndroid Build Coastguard Worker *
6*c0909341SAndroid Build Coastguard Worker * Redistribution and use in source and binary forms, with or without
7*c0909341SAndroid Build Coastguard Worker * modification, are permitted provided that the following conditions are met:
8*c0909341SAndroid Build Coastguard Worker *
9*c0909341SAndroid Build Coastguard Worker * 1. Redistributions of source code must retain the above copyright notice, this
10*c0909341SAndroid Build Coastguard Worker *    list of conditions and the following disclaimer.
11*c0909341SAndroid Build Coastguard Worker *
12*c0909341SAndroid Build Coastguard Worker * 2. Redistributions in binary form must reproduce the above copyright notice,
13*c0909341SAndroid Build Coastguard Worker *    this list of conditions and the following disclaimer in the documentation
14*c0909341SAndroid Build Coastguard Worker *    and/or other materials provided with the distribution.
15*c0909341SAndroid Build Coastguard Worker *
16*c0909341SAndroid Build Coastguard Worker * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
17*c0909341SAndroid Build Coastguard Worker * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
18*c0909341SAndroid Build Coastguard Worker * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
19*c0909341SAndroid Build Coastguard Worker * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
20*c0909341SAndroid Build Coastguard Worker * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
21*c0909341SAndroid Build Coastguard Worker * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
22*c0909341SAndroid Build Coastguard Worker * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
23*c0909341SAndroid Build Coastguard Worker * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
24*c0909341SAndroid Build Coastguard Worker * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
25*c0909341SAndroid Build Coastguard Worker * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26*c0909341SAndroid Build Coastguard Worker *****************************************************************************/
27*c0909341SAndroid Build Coastguard Worker
28*c0909341SAndroid Build Coastguard Worker#include "src/riscv/asm.S"
29*c0909341SAndroid Build Coastguard Worker
30*c0909341SAndroid Build Coastguard Workerfunction dc_gen_16bpc_rvv, export=1, ext="v,zba,zbb"
31*c0909341SAndroid Build Coastguard Worker    .variant_cc dav1d_dc_gen_8bpc_rvv
32*c0909341SAndroid Build Coastguard Worker    add t1, a1, a2
33*c0909341SAndroid Build Coastguard Worker    srli t5, t1, 1
34*c0909341SAndroid Build Coastguard Worker    mv t1, a1
35*c0909341SAndroid Build Coastguard Worker    addi t2, a0, 2
36*c0909341SAndroid Build Coastguard Worker    vsetvli zero, t1, e32, m8, ta, ma
37*c0909341SAndroid Build Coastguard Worker    vmv.v.x v0, zero
38*c0909341SAndroid Build Coastguard Worker1:
39*c0909341SAndroid Build Coastguard Worker    vsetvli t3, t1, e16, m4, tu, ma
40*c0909341SAndroid Build Coastguard Worker    vle16.v v8, (t2)
41*c0909341SAndroid Build Coastguard Worker    vwaddu.wv v0, v0, v8
42*c0909341SAndroid Build Coastguard Worker    sub t1, t1, t3
43*c0909341SAndroid Build Coastguard Worker
44*c0909341SAndroid Build Coastguard Worker    sh1add t2, t3, t2
45*c0909341SAndroid Build Coastguard Worker    bnez t1, 1b
46*c0909341SAndroid Build Coastguard Worker
47*c0909341SAndroid Build Coastguard Worker    mv t1, a2
48*c0909341SAndroid Build Coastguard Worker    mv t2, a0
49*c0909341SAndroid Build Coastguard Worker    vsetvli zero, t1, e32, m8, ta, ma
50*c0909341SAndroid Build Coastguard Worker    vmv.v.x v16, zero
51*c0909341SAndroid Build Coastguard Worker2:
52*c0909341SAndroid Build Coastguard Worker    vsetvli t3, t1, e16, m4, tu, ma
53*c0909341SAndroid Build Coastguard Worker    sub t1, t1, t3
54*c0909341SAndroid Build Coastguard Worker    sll t3, t3, 1
55*c0909341SAndroid Build Coastguard Worker    sub t2, t2, t3
56*c0909341SAndroid Build Coastguard Worker    vle16.v v8, (t2)
57*c0909341SAndroid Build Coastguard Worker    vwaddu.wv v16, v16, v8
58*c0909341SAndroid Build Coastguard Worker
59*c0909341SAndroid Build Coastguard Worker    bnez t1, 2b
60*c0909341SAndroid Build Coastguard Worker
61*c0909341SAndroid Build Coastguard Worker    vsetvli zero, a1, e32, m8, ta, ma
62*c0909341SAndroid Build Coastguard Worker    vmv.s.x v24, t5
63*c0909341SAndroid Build Coastguard Worker    vmv.s.x v25, zero
64*c0909341SAndroid Build Coastguard Worker    vredsum.vs v8, v0, v24
65*c0909341SAndroid Build Coastguard Worker    vsetvli zero, a2, e32, m8, ta, ma
66*c0909341SAndroid Build Coastguard Worker    vredsum.vs v0, v16, v25
67*c0909341SAndroid Build Coastguard Worker    vmv.x.s t5, v8
68*c0909341SAndroid Build Coastguard Worker    vmv.x.s t1, v0
69*c0909341SAndroid Build Coastguard Worker    add t5, t5, t1
70*c0909341SAndroid Build Coastguard Worker
71*c0909341SAndroid Build Coastguard Worker    add t1, a1, a2
72*c0909341SAndroid Build Coastguard Worker    ctz t1, t1
73*c0909341SAndroid Build Coastguard Worker
74*c0909341SAndroid Build Coastguard Worker    srl a0, t5, t1
75*c0909341SAndroid Build Coastguard Worker
76*c0909341SAndroid Build Coastguard Worker    beq a1, a2, 5f
77*c0909341SAndroid Build Coastguard Worker    slli t1, a1, 1
78*c0909341SAndroid Build Coastguard Worker    sltu t2, t1, a2
79*c0909341SAndroid Build Coastguard Worker    slli t3, a2, 1
80*c0909341SAndroid Build Coastguard Worker    sltu t1, t3, a1
81*c0909341SAndroid Build Coastguard Worker    or t1, t1, t2
82*c0909341SAndroid Build Coastguard Worker    bnez t1, 3f
83*c0909341SAndroid Build Coastguard Worker
84*c0909341SAndroid Build Coastguard Worker    li t1, 0xAAAB
85*c0909341SAndroid Build Coastguard Worker    j 4f
86*c0909341SAndroid Build Coastguard Worker3:
87*c0909341SAndroid Build Coastguard Worker    li t1, 0x6667
88*c0909341SAndroid Build Coastguard Worker4:
89*c0909341SAndroid Build Coastguard Worker    mul a0, a0, t1
90*c0909341SAndroid Build Coastguard Worker    li t1, 17
91*c0909341SAndroid Build Coastguard Worker    srl a0, a0, t1
92*c0909341SAndroid Build Coastguard Worker5:
93*c0909341SAndroid Build Coastguard Worker    jr t0
94*c0909341SAndroid Build Coastguard Workerendfunc
95*c0909341SAndroid Build Coastguard Worker
96*c0909341SAndroid Build Coastguard Workerfunction dc_gen_top_16bpc_rvv, export=1, ext="v,zba,zbb"
97*c0909341SAndroid Build Coastguard Worker    .variant_cc dav1d_dc_gen_top_16bpc_rvv
98*c0909341SAndroid Build Coastguard Worker    mv t1, a1
99*c0909341SAndroid Build Coastguard Worker    srli t5, a1, 1
100*c0909341SAndroid Build Coastguard Worker    addi a0, a0, 2
101*c0909341SAndroid Build Coastguard Worker    vsetvli zero, t1, e32, m2, ta, ma
102*c0909341SAndroid Build Coastguard Worker    vmv.v.x v0, zero
103*c0909341SAndroid Build Coastguard Worker1:
104*c0909341SAndroid Build Coastguard Worker    vsetvli t3, t1, e16, m1, tu, ma
105*c0909341SAndroid Build Coastguard Worker    vle16.v v4, (a0)
106*c0909341SAndroid Build Coastguard Worker    vwaddu.wv v0, v0, v4
107*c0909341SAndroid Build Coastguard Worker
108*c0909341SAndroid Build Coastguard Worker    sh1add a0, t3, a0
109*c0909341SAndroid Build Coastguard Worker    sub t1, t1, t3
110*c0909341SAndroid Build Coastguard Worker    bnez t1, 1b
111*c0909341SAndroid Build Coastguard Worker
112*c0909341SAndroid Build Coastguard Worker    j dc_gen_sum_up_16bpc_rvv
113*c0909341SAndroid Build Coastguard Workerendfunc
114*c0909341SAndroid Build Coastguard Worker
115*c0909341SAndroid Build Coastguard Workerfunction dc_gen_left_16bpc_rvv, export=1, ext="v,zba,zbb"
116*c0909341SAndroid Build Coastguard Worker    .variant_cc dav1d_dc_gen_left_16bpc_rvv
117*c0909341SAndroid Build Coastguard Worker    mv t1, a1
118*c0909341SAndroid Build Coastguard Worker    srli t5, a1, 1
119*c0909341SAndroid Build Coastguard Worker    vsetvli zero, t1, e32, m2, ta, ma
120*c0909341SAndroid Build Coastguard Worker    vmv.v.x v0, zero
121*c0909341SAndroid Build Coastguard Worker1:
122*c0909341SAndroid Build Coastguard Worker    vsetvli t3, t1, e16, m1, tu, ma
123*c0909341SAndroid Build Coastguard Worker    sub t1, t1, t3
124*c0909341SAndroid Build Coastguard Worker    slli t3, t3, 1
125*c0909341SAndroid Build Coastguard Worker    sub a0, a0, t3
126*c0909341SAndroid Build Coastguard Worker    vle16.v v4, (a0)
127*c0909341SAndroid Build Coastguard Worker    vwaddu.wv v0, v0, v4
128*c0909341SAndroid Build Coastguard Worker
129*c0909341SAndroid Build Coastguard Worker    bnez t1, 1b
130*c0909341SAndroid Build Coastguard Worker
131*c0909341SAndroid Build Coastguard Worker    j dc_gen_sum_up_16bpc_rvv
132*c0909341SAndroid Build Coastguard Workerendfunc
133*c0909341SAndroid Build Coastguard Worker
134*c0909341SAndroid Build Coastguard Workerfunction dc_gen_sum_up_16bpc_rvv, export=1, ext="v,zba,zbb"
135*c0909341SAndroid Build Coastguard Worker    .variant_cc dav1d_dc_gen_sum_up_16bpc_rvv
136*c0909341SAndroid Build Coastguard Worker
137*c0909341SAndroid Build Coastguard Worker    vsetvli zero, a1, e32, m2, ta, ma
138*c0909341SAndroid Build Coastguard Worker    vmv.s.x v4, t5
139*c0909341SAndroid Build Coastguard Worker    vredsum.vs v8, v0, v4
140*c0909341SAndroid Build Coastguard Worker    vmv.x.s t5, v8
141*c0909341SAndroid Build Coastguard Worker
142*c0909341SAndroid Build Coastguard Worker    ctz t1, a1
143*c0909341SAndroid Build Coastguard Worker
144*c0909341SAndroid Build Coastguard Worker    srl a0, t5, t1
145*c0909341SAndroid Build Coastguard Worker    jr t0
146*c0909341SAndroid Build Coastguard Workerendfunc
147*c0909341SAndroid Build Coastguard Worker
148*c0909341SAndroid Build Coastguard Workerfunction cfl_pred_16bpc_rvv, export=1, ext="v,zba"
149*c0909341SAndroid Build Coastguard Worker    csrw vxrm, zero
150*c0909341SAndroid Build Coastguard Worker1:
151*c0909341SAndroid Build Coastguard Worker    li t2, 0
152*c0909341SAndroid Build Coastguard Worker    mv t3, a2
153*c0909341SAndroid Build Coastguard Worker2:
154*c0909341SAndroid Build Coastguard Worker    vsetvli t0, t3, e16, m2, ta, ma
155*c0909341SAndroid Build Coastguard Worker    sh1add t4, t2, a0
156*c0909341SAndroid Build Coastguard Worker    vle16.v v0, (a5)
157*c0909341SAndroid Build Coastguard Worker    sh1add a5, t0, a5
158*c0909341SAndroid Build Coastguard Worker
159*c0909341SAndroid Build Coastguard Worker    vwmul.vx v4, v0, a6
160*c0909341SAndroid Build Coastguard Worker    vsetvli zero, zero, e32, m4, ta, mu
161*c0909341SAndroid Build Coastguard Worker    vneg.v v8, v4
162*c0909341SAndroid Build Coastguard Worker    vmslt.vx v0, v4, x0
163*c0909341SAndroid Build Coastguard Worker    vmax.vv v12, v8, v4
164*c0909341SAndroid Build Coastguard Worker    vssra.vi v16, v12, 6
165*c0909341SAndroid Build Coastguard Worker    vneg.v v16, v16, v0.t
166*c0909341SAndroid Build Coastguard Worker    vadd.vx v20, v16, a4
167*c0909341SAndroid Build Coastguard Worker    vmax.vx v0, v20, zero
168*c0909341SAndroid Build Coastguard Worker    vmin.vx v0, v0, a7
169*c0909341SAndroid Build Coastguard Worker    vsetvli zero, zero, e16, m2, ta, ma
170*c0909341SAndroid Build Coastguard Worker    vnclipu.wi v4, v0, 0
171*c0909341SAndroid Build Coastguard Worker    vse16.v v4, (t4)
172*c0909341SAndroid Build Coastguard Worker    add t2, t0, t2
173*c0909341SAndroid Build Coastguard Worker    sub t3, t3, t0
174*c0909341SAndroid Build Coastguard Worker    bnez t3, 2b
175*c0909341SAndroid Build Coastguard Worker    addi a3, a3, -1
176*c0909341SAndroid Build Coastguard Worker    add a0, a0, a1
177*c0909341SAndroid Build Coastguard Worker
178*c0909341SAndroid Build Coastguard Worker    bnez a3, 1b
179*c0909341SAndroid Build Coastguard Worker    ret
180*c0909341SAndroid Build Coastguard Workerendfunc
181*c0909341SAndroid Build Coastguard Worker
182*c0909341SAndroid Build Coastguard Workerfunction ipred_cfl_16bpc_rvv, export=1, ext=v
183*c0909341SAndroid Build Coastguard Worker    mv t6, a0 # dst
184*c0909341SAndroid Build Coastguard Worker    mv a0, a2 # topleft
185*c0909341SAndroid Build Coastguard Worker    mv t4, a1 # stride
186*c0909341SAndroid Build Coastguard Worker    mv a1, a3 # width
187*c0909341SAndroid Build Coastguard Worker    mv a2, a4 # height
188*c0909341SAndroid Build Coastguard Worker    jal t0, dc_gen_16bpc_rvv
189*c0909341SAndroid Build Coastguard Worker    mv a2, a3 # width
190*c0909341SAndroid Build Coastguard Worker    mv a3, a4 # height
191*c0909341SAndroid Build Coastguard Worker    mv a4, a0 # dc_get_top
192*c0909341SAndroid Build Coastguard Worker    mv a0, t6 # dst
193*c0909341SAndroid Build Coastguard Worker    mv a1, t4 # stride
194*c0909341SAndroid Build Coastguard Worker    j cfl_pred_16bpc_rvv
195*c0909341SAndroid Build Coastguard Workerendfunc
196*c0909341SAndroid Build Coastguard Worker
197*c0909341SAndroid Build Coastguard Workerfunction ipred_cfl_128_16bpc_rvv, export=1, ext="v,zba"
198*c0909341SAndroid Build Coastguard Worker    # dc = (bitdepth_max + 1) >> 1, then just rearrange registers
199*c0909341SAndroid Build Coastguard Worker    mv a2, a3
200*c0909341SAndroid Build Coastguard Worker    mv a3, a4
201*c0909341SAndroid Build Coastguard Worker    addi a4, a7, 1
202*c0909341SAndroid Build Coastguard Worker    srli a4, a4, 1
203*c0909341SAndroid Build Coastguard Worker
204*c0909341SAndroid Build Coastguard Worker    j cfl_pred_16bpc_rvv
205*c0909341SAndroid Build Coastguard Workerendfunc
206*c0909341SAndroid Build Coastguard Worker
207*c0909341SAndroid Build Coastguard Workerfunction ipred_cfl_top_16bpc_rvv, export=1, ext=v
208*c0909341SAndroid Build Coastguard Worker    mv t6, a0 # dst
209*c0909341SAndroid Build Coastguard Worker    mv a0, a2 # topleft
210*c0909341SAndroid Build Coastguard Worker    mv t4, a1 # stride
211*c0909341SAndroid Build Coastguard Worker    mv a1, a3 # width
212*c0909341SAndroid Build Coastguard Worker    jal t0, dc_gen_top_16bpc_rvv
213*c0909341SAndroid Build Coastguard Worker    mv a3, a4 # height
214*c0909341SAndroid Build Coastguard Worker    mv a4, a0 # dc_get_top
215*c0909341SAndroid Build Coastguard Worker    mv a0, t6 # dst
216*c0909341SAndroid Build Coastguard Worker    mv a2, a1 # width
217*c0909341SAndroid Build Coastguard Worker    mv a1, t4 # stride
218*c0909341SAndroid Build Coastguard Worker    j cfl_pred_16bpc_rvv
219*c0909341SAndroid Build Coastguard Workerendfunc
220*c0909341SAndroid Build Coastguard Worker
221*c0909341SAndroid Build Coastguard Workerfunction ipred_cfl_left_16bpc_rvv, export=1, ext=v
222*c0909341SAndroid Build Coastguard Worker    mv t6, a0 # dst
223*c0909341SAndroid Build Coastguard Worker    mv a0, a2 # topleft
224*c0909341SAndroid Build Coastguard Worker    mv t4, a1 # stride
225*c0909341SAndroid Build Coastguard Worker    mv a1, a4 # height
226*c0909341SAndroid Build Coastguard Worker    mv a2, a3 # width
227*c0909341SAndroid Build Coastguard Worker    jal t0, dc_gen_left_16bpc_rvv
228*c0909341SAndroid Build Coastguard Worker    mv a3, a4 # height
229*c0909341SAndroid Build Coastguard Worker    mv a4, a0 # dc_get_top
230*c0909341SAndroid Build Coastguard Worker    mv a1, t4 # stride
231*c0909341SAndroid Build Coastguard Worker    mv a0, t6 # dst
232*c0909341SAndroid Build Coastguard Worker    j cfl_pred_16bpc_rvv
233*c0909341SAndroid Build Coastguard Workerendfunc
234*c0909341SAndroid Build Coastguard Worker
235*c0909341SAndroid Build Coastguard Workerfunction ipred_paeth_16bpc_rvv, export=1, ext="v,zba"
236*c0909341SAndroid Build Coastguard Worker    csrw vxrm, zero
237*c0909341SAndroid Build Coastguard Worker    li t0, 0
238*c0909341SAndroid Build Coastguard Worker    mv t3, a2
239*c0909341SAndroid Build Coastguard Worker    lhu t1, (a2)
240*c0909341SAndroid Build Coastguard Worker    addi a6, a2, -2
241*c0909341SAndroid Build Coastguard Worker    addi a2, a2, 2
242*c0909341SAndroid Build Coastguard Worker1:
243*c0909341SAndroid Build Coastguard Worker    lhu t2, (a6)
244*c0909341SAndroid Build Coastguard Worker    mv t3, a3
245*c0909341SAndroid Build Coastguard Worker2:
246*c0909341SAndroid Build Coastguard Worker    sub t5, a3, t3
247*c0909341SAndroid Build Coastguard Worker    sh1add t5, t5, a2
248*c0909341SAndroid Build Coastguard Worker    vsetvli t6, t3, e16, m2, ta, ma
249*c0909341SAndroid Build Coastguard Worker    vle16.v v2, (t5)
250*c0909341SAndroid Build Coastguard Worker    vwaddu.vx v4, v2, t2
251*c0909341SAndroid Build Coastguard Worker
252*c0909341SAndroid Build Coastguard Worker    vsetvli zero, zero, e32, m4, ta, mu
253*c0909341SAndroid Build Coastguard Worker    vsub.vx v8, v4, t1
254*c0909341SAndroid Build Coastguard Worker    vzext.vf2 v24, v2
255*c0909341SAndroid Build Coastguard Worker    vsub.vx v12, v8, t1
256*c0909341SAndroid Build Coastguard Worker    vmslt.vx v0, v12, zero
257*c0909341SAndroid Build Coastguard Worker    vneg.v v12, v12, v0.t
258*c0909341SAndroid Build Coastguard Worker    vsub.vx v16, v8, t2
259*c0909341SAndroid Build Coastguard Worker    vmslt.vx v0, v16, zero
260*c0909341SAndroid Build Coastguard Worker    vneg.v v16, v16, v0.t
261*c0909341SAndroid Build Coastguard Worker    vsub.vv v20, v8, v24
262*c0909341SAndroid Build Coastguard Worker    vmslt.vx v0, v20, zero
263*c0909341SAndroid Build Coastguard Worker    vneg.v v20, v20, v0.t
264*c0909341SAndroid Build Coastguard Worker
265*c0909341SAndroid Build Coastguard Worker    sub t5, a3, t3
266*c0909341SAndroid Build Coastguard Worker    vmsleu.vv v4, v16, v20
267*c0909341SAndroid Build Coastguard Worker    vmsleu.vv v5, v16, v12
268*c0909341SAndroid Build Coastguard Worker    vmsgtu.vv v0, v20, v12
269*c0909341SAndroid Build Coastguard Worker    vmand.mm v6, v4, v5
270*c0909341SAndroid Build Coastguard Worker
271*c0909341SAndroid Build Coastguard Worker    vsetvli zero, zero, e16, m2, ta, ma
272*c0909341SAndroid Build Coastguard Worker    vmerge.vxm v8, v2, t1, v0
273*c0909341SAndroid Build Coastguard Worker    vmmv.m v0, v6
274*c0909341SAndroid Build Coastguard Worker    sh1add t5, t5, a0
275*c0909341SAndroid Build Coastguard Worker    sub t3, t3, t6
276*c0909341SAndroid Build Coastguard Worker    vmerge.vxm v4, v8, t2, v0
277*c0909341SAndroid Build Coastguard Worker
278*c0909341SAndroid Build Coastguard Worker    vse16.v v4, (t5)
279*c0909341SAndroid Build Coastguard Worker
280*c0909341SAndroid Build Coastguard Worker    bnez t3, 2b
281*c0909341SAndroid Build Coastguard Worker
282*c0909341SAndroid Build Coastguard Worker    addi a4, a4, -1
283*c0909341SAndroid Build Coastguard Worker    addi a6, a6, -2
284*c0909341SAndroid Build Coastguard Worker    add a0, a0, a1
285*c0909341SAndroid Build Coastguard Worker    bnez a4, 1b
286*c0909341SAndroid Build Coastguard Worker    ret
287*c0909341SAndroid Build Coastguard Workerendfunc
288*c0909341SAndroid Build Coastguard Worker
289*c0909341SAndroid Build Coastguard Workerfunction ipred_smooth_16bpc_rvv, export=1, ext="v,zba"
290*c0909341SAndroid Build Coastguard Worker    csrw vxrm, zero
291*c0909341SAndroid Build Coastguard Worker    la t0, dav1d_sm_weights
292*c0909341SAndroid Build Coastguard Worker    add t1, t0, a3
293*c0909341SAndroid Build Coastguard Worker    sh1add t2, a3, a2
294*c0909341SAndroid Build Coastguard Worker    slli t3, a4, 1
295*c0909341SAndroid Build Coastguard Worker    add t0, t0, a4
296*c0909341SAndroid Build Coastguard Worker    lhu t2, (t2)
297*c0909341SAndroid Build Coastguard Worker    sub t3, a2, t3
298*c0909341SAndroid Build Coastguard Worker    addi a6, a2, -2
299*c0909341SAndroid Build Coastguard Worker    addi a2, a2, 2
300*c0909341SAndroid Build Coastguard Worker    lhu t3, (t3)
301*c0909341SAndroid Build Coastguard Worker1:
302*c0909341SAndroid Build Coastguard Worker    mv t6, a3
303*c0909341SAndroid Build Coastguard Worker
304*c0909341SAndroid Build Coastguard Worker    lhu a7, (a6)
305*c0909341SAndroid Build Coastguard Worker    lbu t4, (t0)
306*c0909341SAndroid Build Coastguard Worker2:
307*c0909341SAndroid Build Coastguard Worker    li a5, 256
308*c0909341SAndroid Build Coastguard Worker    vsetvli t5, t6, e16, m2, ta, ma
309*c0909341SAndroid Build Coastguard Worker    vle8.v v2, (t1)
310*c0909341SAndroid Build Coastguard Worker    add t1, t1, t5
311*c0909341SAndroid Build Coastguard Worker    vle16.v v4, (a2)
312*c0909341SAndroid Build Coastguard Worker    sh1add a2, t5, a2
313*c0909341SAndroid Build Coastguard Worker    sub a5, a5, t4
314*c0909341SAndroid Build Coastguard Worker
315*c0909341SAndroid Build Coastguard Worker    vwmul.vx v8, v4, t4
316*c0909341SAndroid Build Coastguard Worker    mul a5, a5, t3
317*c0909341SAndroid Build Coastguard Worker
318*c0909341SAndroid Build Coastguard Worker    vsetvli zero, zero, e32, m4, ta, ma
319*c0909341SAndroid Build Coastguard Worker    vadd.vx v4, v8, a5
320*c0909341SAndroid Build Coastguard Worker
321*c0909341SAndroid Build Coastguard Worker    li a5, 256
322*c0909341SAndroid Build Coastguard Worker    vzext.vf4 v12, v2
323*c0909341SAndroid Build Coastguard Worker    vmul.vx v8, v12, a7
324*c0909341SAndroid Build Coastguard Worker
325*c0909341SAndroid Build Coastguard Worker    vrsub.vx v12, v12, a5
326*c0909341SAndroid Build Coastguard Worker    vmacc.vx v8, t2, v12
327*c0909341SAndroid Build Coastguard Worker    vadd.vv v12, v4, v8
328*c0909341SAndroid Build Coastguard Worker    vsetvli zero, zero, e32, m4, ta, ma
329*c0909341SAndroid Build Coastguard Worker
330*c0909341SAndroid Build Coastguard Worker    sub a5, a3, t6
331*c0909341SAndroid Build Coastguard Worker    sub t6, t6, t5
332*c0909341SAndroid Build Coastguard Worker    sh1add a5, a5, a0
333*c0909341SAndroid Build Coastguard Worker    vsetvli zero, zero, e16, m2, ta, ma
334*c0909341SAndroid Build Coastguard Worker    vnclipu.wi v2, v12, 9
335*c0909341SAndroid Build Coastguard Worker    vse16.v v2, (a5)
336*c0909341SAndroid Build Coastguard Worker
337*c0909341SAndroid Build Coastguard Worker    bnez t6, 2b
338*c0909341SAndroid Build Coastguard Worker
339*c0909341SAndroid Build Coastguard Worker    sub t1, t1, a3
340*c0909341SAndroid Build Coastguard Worker    slli t6, a3, 1
341*c0909341SAndroid Build Coastguard Worker    add a0, a0, a1
342*c0909341SAndroid Build Coastguard Worker    sub a2, a2, t6
343*c0909341SAndroid Build Coastguard Worker    addi a4, a4, -1
344*c0909341SAndroid Build Coastguard Worker    addi t0, t0, 1
345*c0909341SAndroid Build Coastguard Worker    addi a6, a6, -2
346*c0909341SAndroid Build Coastguard Worker    bnez a4, 1b
347*c0909341SAndroid Build Coastguard Worker
348*c0909341SAndroid Build Coastguard Worker    ret
349*c0909341SAndroid Build Coastguard Workerendfunc
350*c0909341SAndroid Build Coastguard Worker
351*c0909341SAndroid Build Coastguard Workerfunction ipred_smooth_v_16bpc_rvv, export=1, ext="v,zba"
352*c0909341SAndroid Build Coastguard Worker    csrw vxrm, zero
353*c0909341SAndroid Build Coastguard Worker    la t0, dav1d_sm_weights
354*c0909341SAndroid Build Coastguard Worker    slli t3, a4, 1
355*c0909341SAndroid Build Coastguard Worker    add t0, t0, a4
356*c0909341SAndroid Build Coastguard Worker    sub t3, a2, t3
357*c0909341SAndroid Build Coastguard Worker    addi a2, a2, 2
358*c0909341SAndroid Build Coastguard Worker    lhu t3, (t3)
359*c0909341SAndroid Build Coastguard Worker1:
360*c0909341SAndroid Build Coastguard Worker    mv t6, a3
361*c0909341SAndroid Build Coastguard Worker
362*c0909341SAndroid Build Coastguard Worker    lbu t4, (t0)
363*c0909341SAndroid Build Coastguard Worker2:
364*c0909341SAndroid Build Coastguard Worker    li a5, 256
365*c0909341SAndroid Build Coastguard Worker    vsetvli t5, t6, e16, m2, ta, ma
366*c0909341SAndroid Build Coastguard Worker    vle16.v v4, (a2)
367*c0909341SAndroid Build Coastguard Worker    sh1add a2, t5, a2
368*c0909341SAndroid Build Coastguard Worker    sub a5, a5, t4
369*c0909341SAndroid Build Coastguard Worker
370*c0909341SAndroid Build Coastguard Worker    vwmul.vx v8, v4, t4
371*c0909341SAndroid Build Coastguard Worker    mul a5, a5, t3
372*c0909341SAndroid Build Coastguard Worker
373*c0909341SAndroid Build Coastguard Worker    vsetvli zero, zero, e32, m4, ta, ma
374*c0909341SAndroid Build Coastguard Worker    vadd.vx v4, v8, a5
375*c0909341SAndroid Build Coastguard Worker    vsetvli zero, zero, e32, m4, ta, ma
376*c0909341SAndroid Build Coastguard Worker
377*c0909341SAndroid Build Coastguard Worker    sub a5, a3, t6
378*c0909341SAndroid Build Coastguard Worker    sub t6, t6, t5
379*c0909341SAndroid Build Coastguard Worker    sh1add a5, a5, a0
380*c0909341SAndroid Build Coastguard Worker    vsetvli zero, zero, e16, m2, ta, ma
381*c0909341SAndroid Build Coastguard Worker    vnclipu.wi v2, v4, 8
382*c0909341SAndroid Build Coastguard Worker    vse16.v v2, (a5)
383*c0909341SAndroid Build Coastguard Worker
384*c0909341SAndroid Build Coastguard Worker    bnez t6, 2b
385*c0909341SAndroid Build Coastguard Worker
386*c0909341SAndroid Build Coastguard Worker    slli t6, a3, 1
387*c0909341SAndroid Build Coastguard Worker    add a0, a0, a1
388*c0909341SAndroid Build Coastguard Worker    sub a2, a2, t6
389*c0909341SAndroid Build Coastguard Worker    addi a4, a4, -1
390*c0909341SAndroid Build Coastguard Worker    addi t0, t0, 1
391*c0909341SAndroid Build Coastguard Worker    bnez a4, 1b
392*c0909341SAndroid Build Coastguard Worker
393*c0909341SAndroid Build Coastguard Worker    ret
394*c0909341SAndroid Build Coastguard Workerendfunc
395*c0909341SAndroid Build Coastguard Worker
396*c0909341SAndroid Build Coastguard Workerfunction ipred_smooth_h_16bpc_rvv, export=1, ext="v,zba"
397*c0909341SAndroid Build Coastguard Worker    csrw vxrm, zero
398*c0909341SAndroid Build Coastguard Worker    la t0, dav1d_sm_weights
399*c0909341SAndroid Build Coastguard Worker    add t1, t0, a3
400*c0909341SAndroid Build Coastguard Worker    sh1add t2, a3, a2
401*c0909341SAndroid Build Coastguard Worker    lhu t2, (t2)
402*c0909341SAndroid Build Coastguard Worker    addi a6, a2, -2
403*c0909341SAndroid Build Coastguard Worker1:
404*c0909341SAndroid Build Coastguard Worker    mv t6, a3
405*c0909341SAndroid Build Coastguard Worker
406*c0909341SAndroid Build Coastguard Worker    lhu a7, (a6)
407*c0909341SAndroid Build Coastguard Worker2:
408*c0909341SAndroid Build Coastguard Worker    vsetvli t5, t6, e16, m2, ta, ma
409*c0909341SAndroid Build Coastguard Worker    vle8.v v2, (t1)
410*c0909341SAndroid Build Coastguard Worker    add t1, t1, t5
411*c0909341SAndroid Build Coastguard Worker
412*c0909341SAndroid Build Coastguard Worker    li a5, 256
413*c0909341SAndroid Build Coastguard Worker    vsetvli zero, zero, e32, m4, ta, ma
414*c0909341SAndroid Build Coastguard Worker    vzext.vf4 v12, v2
415*c0909341SAndroid Build Coastguard Worker    vmul.vx v8, v12, a7
416*c0909341SAndroid Build Coastguard Worker
417*c0909341SAndroid Build Coastguard Worker    vrsub.vx v12, v12, a5
418*c0909341SAndroid Build Coastguard Worker    vmacc.vx v8, t2, v12
419*c0909341SAndroid Build Coastguard Worker
420*c0909341SAndroid Build Coastguard Worker    sub a5, a3, t6
421*c0909341SAndroid Build Coastguard Worker    sub t6, t6, t5
422*c0909341SAndroid Build Coastguard Worker    sh1add a5, a5, a0
423*c0909341SAndroid Build Coastguard Worker    vsetvli zero, zero, e16, m2, ta, ma
424*c0909341SAndroid Build Coastguard Worker    vnclipu.wi v2, v8, 8
425*c0909341SAndroid Build Coastguard Worker    vse16.v v2, (a5)
426*c0909341SAndroid Build Coastguard Worker
427*c0909341SAndroid Build Coastguard Worker    bnez t6, 2b
428*c0909341SAndroid Build Coastguard Worker
429*c0909341SAndroid Build Coastguard Worker    sub t1, t1, a3
430*c0909341SAndroid Build Coastguard Worker    add a0, a0, a1
431*c0909341SAndroid Build Coastguard Worker    addi a4, a4, -1
432*c0909341SAndroid Build Coastguard Worker    addi a6, a6, -2
433*c0909341SAndroid Build Coastguard Worker    bnez a4, 1b
434*c0909341SAndroid Build Coastguard Worker
435*c0909341SAndroid Build Coastguard Worker    ret
436*c0909341SAndroid Build Coastguard Workerendfunc
437*c0909341SAndroid Build Coastguard Worker
438*c0909341SAndroid Build Coastguard Workerfunction pal_pred_16bpc_rvv, export=1, ext="v,zba"
439*c0909341SAndroid Build Coastguard Worker    csrw vxrm, zero
440*c0909341SAndroid Build Coastguard Worker    vsetivli t5, 8, e16, m1, ta, ma
441*c0909341SAndroid Build Coastguard Worker    vle16.v v30, (a2)
442*c0909341SAndroid Build Coastguard Worker    li t0, 4
443*c0909341SAndroid Build Coastguard Worker    srli t1, a4, 1
444*c0909341SAndroid Build Coastguard Worker    li t2, 1
445*c0909341SAndroid Build Coastguard Worker1:
446*c0909341SAndroid Build Coastguard Worker    mv t4, a4
447*c0909341SAndroid Build Coastguard Worker2:
448*c0909341SAndroid Build Coastguard Worker    vsetvli t5, t1, e8, mf2, ta, ma
449*c0909341SAndroid Build Coastguard Worker    vle8.v v0, (a3)
450*c0909341SAndroid Build Coastguard Worker    add a3, a3, t5
451*c0909341SAndroid Build Coastguard Worker    vand.vi v1, v0, 7
452*c0909341SAndroid Build Coastguard Worker    sub t6, a4, t4
453*c0909341SAndroid Build Coastguard Worker    vsrl.vi v2, v0, 4
454*c0909341SAndroid Build Coastguard Worker    vwmul.vx v4, v1, t2
455*c0909341SAndroid Build Coastguard Worker    vwmul.vx v6, v2, t2
456*c0909341SAndroid Build Coastguard Worker    vsetvli zero, zero, e16, m1, ta, ma
457*c0909341SAndroid Build Coastguard Worker    sh1add t6, t6, a0
458*c0909341SAndroid Build Coastguard Worker    vrgather.vv v8, v30, v4
459*c0909341SAndroid Build Coastguard Worker    addi t3, t6, 2
460*c0909341SAndroid Build Coastguard Worker    vrgather.vv v10, v30, v6
461*c0909341SAndroid Build Coastguard Worker    slli t5, t5, 1
462*c0909341SAndroid Build Coastguard Worker    vsse16.v v8, (t6), t0
463*c0909341SAndroid Build Coastguard Worker    vsse16.v v10, (t3), t0
464*c0909341SAndroid Build Coastguard Worker
465*c0909341SAndroid Build Coastguard Worker    sub t4, t4, t5
466*c0909341SAndroid Build Coastguard Worker    bnez t4, 2b
467*c0909341SAndroid Build Coastguard Worker    add a0, a0, a1
468*c0909341SAndroid Build Coastguard Worker    addi a5, a5, -1
469*c0909341SAndroid Build Coastguard Worker    bnez a5, 1b
470*c0909341SAndroid Build Coastguard Worker    ret
471*c0909341SAndroid Build Coastguard Workerendfunc
472