xref: /aosp_15_r20/external/libdav1d/src/riscv/64/cdef.S (revision c09093415860a1c2373dacd84c4fde00c507cdfd)
1*c0909341SAndroid Build Coastguard Worker/******************************************************************************
2*c0909341SAndroid Build Coastguard Worker * Copyright © 2018, VideoLAN and dav1d authors
3*c0909341SAndroid Build Coastguard Worker * Copyright © 2024, Bogdan Gligorijevic
4*c0909341SAndroid Build Coastguard Worker * All rights reserved.
5*c0909341SAndroid Build Coastguard Worker *
6*c0909341SAndroid Build Coastguard Worker * Redistribution and use in source and binary forms, with or without
7*c0909341SAndroid Build Coastguard Worker * modification, are permitted provided that the following conditions are met:
8*c0909341SAndroid Build Coastguard Worker *
9*c0909341SAndroid Build Coastguard Worker * 1. Redistributions of source code must retain the above copyright notice, this
10*c0909341SAndroid Build Coastguard Worker *    list of conditions and the following disclaimer.
11*c0909341SAndroid Build Coastguard Worker *
12*c0909341SAndroid Build Coastguard Worker * 2. Redistributions in binary form must reproduce the above copyright notice,
13*c0909341SAndroid Build Coastguard Worker *    this list of conditions and the following disclaimer in the documentation
14*c0909341SAndroid Build Coastguard Worker *    and/or other materials provided with the distribution.
15*c0909341SAndroid Build Coastguard Worker *
16*c0909341SAndroid Build Coastguard Worker * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
17*c0909341SAndroid Build Coastguard Worker * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
18*c0909341SAndroid Build Coastguard Worker * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
19*c0909341SAndroid Build Coastguard Worker * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
20*c0909341SAndroid Build Coastguard Worker * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
21*c0909341SAndroid Build Coastguard Worker * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
22*c0909341SAndroid Build Coastguard Worker * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
23*c0909341SAndroid Build Coastguard Worker * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
24*c0909341SAndroid Build Coastguard Worker * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
25*c0909341SAndroid Build Coastguard Worker * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26*c0909341SAndroid Build Coastguard Worker *****************************************************************************/
27*c0909341SAndroid Build Coastguard Worker
28*c0909341SAndroid Build Coastguard Worker#include "src/riscv/asm.S"
29*c0909341SAndroid Build Coastguard Worker
30*c0909341SAndroid Build Coastguard Worker.macro constrain_vectors vec1, vec2, vec_sub, strength, shift, vec_tmp1, vec_tmp2
31*c0909341SAndroid Build Coastguard Worker    vmslt.vx v0, \vec_tmp1, zero
32*c0909341SAndroid Build Coastguard Worker    vneg.v \vec_tmp1, \vec_tmp1, v0.t
33*c0909341SAndroid Build Coastguard Worker    vmmv.m v1, v0
34*c0909341SAndroid Build Coastguard Worker
35*c0909341SAndroid Build Coastguard Worker    vmslt.vx v0, \vec_tmp2, zero
36*c0909341SAndroid Build Coastguard Worker    vneg.v \vec_tmp2, \vec_tmp2, v0.t
37*c0909341SAndroid Build Coastguard Worker
38*c0909341SAndroid Build Coastguard Worker    vsra.vx \vec1, \vec_tmp1, \shift
39*c0909341SAndroid Build Coastguard Worker    vsra.vx \vec2, \vec_tmp2, \shift
40*c0909341SAndroid Build Coastguard Worker
41*c0909341SAndroid Build Coastguard Worker    vrsub.vx \vec1, \vec1, \strength
42*c0909341SAndroid Build Coastguard Worker    vrsub.vx \vec2, \vec2, \strength
43*c0909341SAndroid Build Coastguard Worker
44*c0909341SAndroid Build Coastguard Worker    vmax.vx \vec1, \vec1, zero
45*c0909341SAndroid Build Coastguard Worker    vmax.vx \vec2, \vec2, zero
46*c0909341SAndroid Build Coastguard Worker
47*c0909341SAndroid Build Coastguard Worker    vmin.vv \vec_tmp1, \vec1, \vec_tmp1
48*c0909341SAndroid Build Coastguard Worker    vmin.vv \vec_tmp2, \vec2, \vec_tmp2
49*c0909341SAndroid Build Coastguard Worker
50*c0909341SAndroid Build Coastguard Worker    vneg.v \vec_tmp2, \vec_tmp2, v0.t
51*c0909341SAndroid Build Coastguard Worker
52*c0909341SAndroid Build Coastguard Worker    vmmv.m v0, v1
53*c0909341SAndroid Build Coastguard Worker    vneg.v \vec_tmp1, \vec_tmp1, v0.t
54*c0909341SAndroid Build Coastguard Worker.endm
55*c0909341SAndroid Build Coastguard Worker
56*c0909341SAndroid Build Coastguard Worker.macro padding_fn w, h
57*c0909341SAndroid Build Coastguard Worker    li t5, -32768 # INT16_MIN
58*c0909341SAndroid Build Coastguard Worker
59*c0909341SAndroid Build Coastguard Worker    andi t4, a7, 4
60*c0909341SAndroid Build Coastguard Worker    li t2, -2 # y_start
61*c0909341SAndroid Build Coastguard Worker
62*c0909341SAndroid Build Coastguard Worker.if \w == 4
63*c0909341SAndroid Build Coastguard Worker    vsetivli zero, \w + 4, e16, m1, ta, ma
64*c0909341SAndroid Build Coastguard Worker.else
65*c0909341SAndroid Build Coastguard Worker    vsetivli zero, \w + 4, e16, m2, ta, ma
66*c0909341SAndroid Build Coastguard Worker.endif
67*c0909341SAndroid Build Coastguard Worker    vmv.v.x v0, t5
68*c0909341SAndroid Build Coastguard Worker    bnez t4, L(top_done_\w\()x\h)
69*c0909341SAndroid Build Coastguard Worker
70*c0909341SAndroid Build Coastguard Worker    slli t5, a1, 1
71*c0909341SAndroid Build Coastguard Worker    addi t5, t5, 2
72*c0909341SAndroid Build Coastguard Worker    slli t5, t5, 1
73*c0909341SAndroid Build Coastguard Worker    sub t5, a0, t5
74*c0909341SAndroid Build Coastguard Worker
75*c0909341SAndroid Build Coastguard Worker    sh1add t4, a1, t5
76*c0909341SAndroid Build Coastguard Worker    vse16.v v0, (t5)
77*c0909341SAndroid Build Coastguard Worker    vse16.v v0, (t4)
78*c0909341SAndroid Build Coastguard Worker    li t2, 0
79*c0909341SAndroid Build Coastguard Worker
80*c0909341SAndroid Build Coastguard WorkerL(top_done_\w\()x\h):
81*c0909341SAndroid Build Coastguard Worker    andi t4, a7, 8
82*c0909341SAndroid Build Coastguard Worker    li t3, 2 + \h # y_end
83*c0909341SAndroid Build Coastguard Worker    bnez t4, L(bottom_done_\w\()x\h)
84*c0909341SAndroid Build Coastguard Worker
85*c0909341SAndroid Build Coastguard Worker    li t5, \h
86*c0909341SAndroid Build Coastguard Worker    mul t5, a1, t5
87*c0909341SAndroid Build Coastguard Worker    addi t5, t5, -2
88*c0909341SAndroid Build Coastguard Worker    sh1add t5, t5, a0
89*c0909341SAndroid Build Coastguard Worker
90*c0909341SAndroid Build Coastguard Worker    sh1add t4, a1, t5
91*c0909341SAndroid Build Coastguard Worker    vse16.v v0, (t5)
92*c0909341SAndroid Build Coastguard Worker    vse16.v v0, (t4)
93*c0909341SAndroid Build Coastguard Worker    addi t3, t3, -2
94*c0909341SAndroid Build Coastguard Worker
95*c0909341SAndroid Build Coastguard WorkerL(bottom_done_\w\()x\h):
96*c0909341SAndroid Build Coastguard Worker    andi t4, a7, 1
97*c0909341SAndroid Build Coastguard Worker    li t0, -2 # x_start
98*c0909341SAndroid Build Coastguard Worker
99*c0909341SAndroid Build Coastguard Worker.if \w == 4
100*c0909341SAndroid Build Coastguard Worker    vsetivli zero, 2, e16, m1, ta, ma
101*c0909341SAndroid Build Coastguard Worker.else
102*c0909341SAndroid Build Coastguard Worker    vsetivli zero, 2, e16, m2, ta, ma
103*c0909341SAndroid Build Coastguard Worker.endif
104*c0909341SAndroid Build Coastguard Worker
105*c0909341SAndroid Build Coastguard Worker    bnez t4, L(left_done_\w\()x\h)
106*c0909341SAndroid Build Coastguard Worker
107*c0909341SAndroid Build Coastguard Worker    mul t5, a1, t2
108*c0909341SAndroid Build Coastguard Worker    addi t5, t5, -2
109*c0909341SAndroid Build Coastguard Worker    sh1add t5, t5, a0
110*c0909341SAndroid Build Coastguard Worker
111*c0909341SAndroid Build Coastguard Worker    sub t0, t3, t2
112*c0909341SAndroid Build Coastguard Worker
113*c0909341SAndroid Build Coastguard Worker3:
114*c0909341SAndroid Build Coastguard Worker    vse16.v v0, (t5)
115*c0909341SAndroid Build Coastguard Worker    sh1add t5, a1, t5
116*c0909341SAndroid Build Coastguard Worker    addi t0, t0, -1
117*c0909341SAndroid Build Coastguard Worker    bnez t0, 3b
118*c0909341SAndroid Build Coastguard Worker
119*c0909341SAndroid Build Coastguard WorkerL(left_done_\w\()x\h):
120*c0909341SAndroid Build Coastguard Worker
121*c0909341SAndroid Build Coastguard Worker    andi t4, a7, 2
122*c0909341SAndroid Build Coastguard Worker    li t1, 2 + \w # x_end
123*c0909341SAndroid Build Coastguard Worker    bnez t4, L(right_done_\w\()x\h)
124*c0909341SAndroid Build Coastguard Worker
125*c0909341SAndroid Build Coastguard Worker    mul t5, t2, a1
126*c0909341SAndroid Build Coastguard Worker    addi t5, t5, \w
127*c0909341SAndroid Build Coastguard Worker    sh1add t5, t5, a0
128*c0909341SAndroid Build Coastguard Worker
129*c0909341SAndroid Build Coastguard Worker    sub t1, t3, t2
130*c0909341SAndroid Build Coastguard Worker
131*c0909341SAndroid Build Coastguard Worker4:
132*c0909341SAndroid Build Coastguard Worker    vse16.v v0, (t5)
133*c0909341SAndroid Build Coastguard Worker    sh1add t5, a1, t5
134*c0909341SAndroid Build Coastguard Worker    addi t1, t1, -1
135*c0909341SAndroid Build Coastguard Worker    bnez t1, 4b
136*c0909341SAndroid Build Coastguard Worker
137*c0909341SAndroid Build Coastguard Worker    li t1, \w
138*c0909341SAndroid Build Coastguard Worker
139*c0909341SAndroid Build Coastguard WorkerL(right_done_\w\()x\h):
140*c0909341SAndroid Build Coastguard Worker
141*c0909341SAndroid Build Coastguard Worker    beqz t2, L(top_skip_\w\()x\h)
142*c0909341SAndroid Build Coastguard Worker
143*c0909341SAndroid Build Coastguard Worker    mul t5, a1, t2
144*c0909341SAndroid Build Coastguard Worker    add t5, t0, t5
145*c0909341SAndroid Build Coastguard Worker    sh1add a0, t5, a0 # tmp += y_start * tmp_stride + x_start
146*c0909341SAndroid Build Coastguard Worker    add a5, a5, t0
147*c0909341SAndroid Build Coastguard Worker
148*c0909341SAndroid Build Coastguard Worker    sub t5, t1, t0 # x_end - x_start
149*c0909341SAndroid Build Coastguard Worker    slli t6, t0, 1
150*c0909341SAndroid Build Coastguard Worker.if \w == 4
151*c0909341SAndroid Build Coastguard Worker    vsetvli zero, t5, e16, m1, ta, ma
152*c0909341SAndroid Build Coastguard Worker.else
153*c0909341SAndroid Build Coastguard Worker    vsetvli zero, t5, e16, m2, ta, ma
154*c0909341SAndroid Build Coastguard Worker.endif
155*c0909341SAndroid Build Coastguard Worker
156*c0909341SAndroid Build Coastguard Worker5:
157*c0909341SAndroid Build Coastguard Worker    vle8.v v0, (a5)
158*c0909341SAndroid Build Coastguard Worker    addi t2, t2, 1
159*c0909341SAndroid Build Coastguard Worker    vzext.vf2 v2, v0
160*c0909341SAndroid Build Coastguard Worker    add a5, a3, a5
161*c0909341SAndroid Build Coastguard Worker    vse16.v v2, (a0)
162*c0909341SAndroid Build Coastguard Worker    sh1add a0, a1, a0
163*c0909341SAndroid Build Coastguard Worker    bnez t2, 5b
164*c0909341SAndroid Build Coastguard Worker
165*c0909341SAndroid Build Coastguard Worker    sub a0, a0, t6 # tmp -= x_start
166*c0909341SAndroid Build Coastguard Worker
167*c0909341SAndroid Build Coastguard WorkerL(top_skip_\w\()x\h):
168*c0909341SAndroid Build Coastguard Worker
169*c0909341SAndroid Build Coastguard Worker    li a5, \h
170*c0909341SAndroid Build Coastguard Worker    beqz t0, L(left_skip_\w\()x\h)
171*c0909341SAndroid Build Coastguard Worker
172*c0909341SAndroid Build Coastguard Worker    sh1add a0, t0, a0 # tmp += x_start
173*c0909341SAndroid Build Coastguard Worker
174*c0909341SAndroid Build Coastguard Worker7:
175*c0909341SAndroid Build Coastguard Worker.if \w == 4
176*c0909341SAndroid Build Coastguard Worker    vsetivli zero, 2, e16, m1, ta, ma
177*c0909341SAndroid Build Coastguard Worker.else
178*c0909341SAndroid Build Coastguard Worker    vsetivli zero, 2, e16, m2, ta, ma
179*c0909341SAndroid Build Coastguard Worker.endif
180*c0909341SAndroid Build Coastguard Worker
181*c0909341SAndroid Build Coastguard Worker    vle8.v v0, (a4)
182*c0909341SAndroid Build Coastguard Worker    addi a5, a5, -1
183*c0909341SAndroid Build Coastguard Worker    vzext.vf2 v2, v0
184*c0909341SAndroid Build Coastguard Worker    addi a4, a4, 2
185*c0909341SAndroid Build Coastguard Worker    vse16.v v2, (a0)
186*c0909341SAndroid Build Coastguard Worker    sh1add a0, a1, a0
187*c0909341SAndroid Build Coastguard Worker    bnez a5, 7b
188*c0909341SAndroid Build Coastguard Worker
189*c0909341SAndroid Build Coastguard Worker    li a5, \h
190*c0909341SAndroid Build Coastguard Worker    mul t5, a1, a5
191*c0909341SAndroid Build Coastguard Worker    add t5, t5, t0
192*c0909341SAndroid Build Coastguard Worker    slli t5, t5, 1
193*c0909341SAndroid Build Coastguard Worker    sub a0, a0, t5 # tmp -= h * tmp_stride + x_start
194*c0909341SAndroid Build Coastguard Worker
195*c0909341SAndroid Build Coastguard WorkerL(left_skip_\w\()x\h):
196*c0909341SAndroid Build Coastguard Worker
197*c0909341SAndroid Build Coastguard Worker8:
198*c0909341SAndroid Build Coastguard Worker.if \w == 4
199*c0909341SAndroid Build Coastguard Worker    vsetvli zero, t1, e16, m1, ta, ma
200*c0909341SAndroid Build Coastguard Worker.else
201*c0909341SAndroid Build Coastguard Worker    vsetvli zero, t1, e16, m2, ta, ma
202*c0909341SAndroid Build Coastguard Worker.endif
203*c0909341SAndroid Build Coastguard Worker
204*c0909341SAndroid Build Coastguard Worker    vle8.v v0, (a2)
205*c0909341SAndroid Build Coastguard Worker    vzext.vf2 v2, v0
206*c0909341SAndroid Build Coastguard Worker    vse16.v v2, (a0)
207*c0909341SAndroid Build Coastguard Worker    add a2, a3, a2
208*c0909341SAndroid Build Coastguard Worker    sh1add a0, a1, a0
209*c0909341SAndroid Build Coastguard Worker    addi a5, a5, -1
210*c0909341SAndroid Build Coastguard Worker    bnez a5, 8b
211*c0909341SAndroid Build Coastguard Worker
212*c0909341SAndroid Build Coastguard Worker
213*c0909341SAndroid Build Coastguard Worker    li a5, \h
214*c0909341SAndroid Build Coastguard Worker    sh1add a0, t0, a0 # tmp += x_start
215*c0909341SAndroid Build Coastguard Worker    add a6, a6, t0 # bottom += x_start
216*c0909341SAndroid Build Coastguard Worker    beq a5, t3, L(bottom_skip_\w\()x\h)
217*c0909341SAndroid Build Coastguard Worker
218*c0909341SAndroid Build Coastguard Worker    sub t5, t1, t0
219*c0909341SAndroid Build Coastguard Worker.if \w == 4
220*c0909341SAndroid Build Coastguard Worker    vsetvli zero, t5, e16, m1, ta, ma
221*c0909341SAndroid Build Coastguard Worker.else
222*c0909341SAndroid Build Coastguard Worker    vsetvli zero, t5, e16, m2, ta, ma
223*c0909341SAndroid Build Coastguard Worker.endif
224*c0909341SAndroid Build Coastguard Worker
225*c0909341SAndroid Build Coastguard Worker9:
226*c0909341SAndroid Build Coastguard Worker    vle8.v v0, (a6)
227*c0909341SAndroid Build Coastguard Worker    add a6, a3, a6
228*c0909341SAndroid Build Coastguard Worker    vzext.vf2 v2, v0
229*c0909341SAndroid Build Coastguard Worker    addi a5, a5, 1
230*c0909341SAndroid Build Coastguard Worker    vse16.v v2, (a0)
231*c0909341SAndroid Build Coastguard Worker    sh1add a0, a1, a0
232*c0909341SAndroid Build Coastguard Worker    bne a5, t3, 9b
233*c0909341SAndroid Build Coastguard Worker
234*c0909341SAndroid Build Coastguard WorkerL(bottom_skip_\w\()x\h):
235*c0909341SAndroid Build Coastguard Worker    li t6, \h
236*c0909341SAndroid Build Coastguard Worker    mul t6, a3, t6
237*c0909341SAndroid Build Coastguard Worker    sub a2, a2, t6 # src -= h * src_stride
238*c0909341SAndroid Build Coastguard Worker    mul t5, a1, t3
239*c0909341SAndroid Build Coastguard Worker    add t5, t5, t0
240*c0909341SAndroid Build Coastguard Worker    slli t5, t5, 1
241*c0909341SAndroid Build Coastguard Worker    sub a0, a0, t5 # tmp -= y_end * tmp_stride + x_start
242*c0909341SAndroid Build Coastguard Worker.endm
243*c0909341SAndroid Build Coastguard Worker
244*c0909341SAndroid Build Coastguard Worker
245*c0909341SAndroid Build Coastguard Worker.macro cdef_fn w, h
246*c0909341SAndroid Build Coastguard Workerfunction cdef_filter_block_\w\()x\h\()_8bpc_rvv, export=1, ext="v,zba,zbb"
247*c0909341SAndroid Build Coastguard Worker    csrw vxrm, zero
248*c0909341SAndroid Build Coastguard Worker
249*c0909341SAndroid Build Coastguard Worker    addi sp, sp, -32 - 144*2
250*c0909341SAndroid Build Coastguard Worker    sd a5, 24(sp) # pri_strength
251*c0909341SAndroid Build Coastguard Worker    sd a6, 16(sp) # sec_strength
252*c0909341SAndroid Build Coastguard Worker    sd a7, 8(sp) # dir
253*c0909341SAndroid Build Coastguard Worker
254*c0909341SAndroid Build Coastguard Worker
255*c0909341SAndroid Build Coastguard Worker    ld a7, 8 + 32 + 144*2(sp) # edges
256*c0909341SAndroid Build Coastguard Worker    mv a6, a4 # bottom
257*c0909341SAndroid Build Coastguard Worker    mv a5, a3 # top
258*c0909341SAndroid Build Coastguard Worker    mv a4, a2 # left
259*c0909341SAndroid Build Coastguard Worker    mv a3, a1 # dst_stride
260*c0909341SAndroid Build Coastguard Worker    mv a2, a0 # dst
261*c0909341SAndroid Build Coastguard Worker    li a1, 12 # tmp_stride
262*c0909341SAndroid Build Coastguard Worker    addi a0, sp, 32 + 2*(2*12+2)
263*c0909341SAndroid Build Coastguard Worker    padding_fn \w, \h
264*c0909341SAndroid Build Coastguard Worker
265*c0909341SAndroid Build Coastguard Worker    ld a4, 32 + 2*144(sp) # damping
266*c0909341SAndroid Build Coastguard Worker    ld a5, 24(sp) # pri_strength
267*c0909341SAndroid Build Coastguard Worker    ld a6, 16(sp) # sec_strength
268*c0909341SAndroid Build Coastguard Worker    ld a7, 8(sp) # dir
269*c0909341SAndroid Build Coastguard Worker
270*c0909341SAndroid Build Coastguard Worker    beqz a5, cdef_filter_sec_only_\w\()x\h
271*c0909341SAndroid Build Coastguard Worker
272*c0909341SAndroid Build Coastguard Worker    bnez a6, cdef_filter_pri_sec_\w\()x\h
273*c0909341SAndroid Build Coastguard Worker
274*c0909341SAndroid Build Coastguard Worker    andi t0, a5, 1
275*c0909341SAndroid Build Coastguard Worker    li t1, 4
276*c0909341SAndroid Build Coastguard Worker    sub t4, t1, t0
277*c0909341SAndroid Build Coastguard Worker
278*c0909341SAndroid Build Coastguard Worker    li t1, 63
279*c0909341SAndroid Build Coastguard Worker    clz t2, a5
280*c0909341SAndroid Build Coastguard Worker    sub t1, t1, t2
281*c0909341SAndroid Build Coastguard Worker    sub t1, a4, t1
282*c0909341SAndroid Build Coastguard Worker
283*c0909341SAndroid Build Coastguard Worker    li t0, \h
284*c0909341SAndroid Build Coastguard Worker
285*c0909341SAndroid Build Coastguard Worker    la t2, dav1d_cdef_directions
286*c0909341SAndroid Build Coastguard Worker    addi t3, a7, 2
287*c0909341SAndroid Build Coastguard Worker    sh1add t2, t3, t2
288*c0909341SAndroid Build Coastguard Worker
289*c0909341SAndroid Build Coastguard Worker    blt zero, t1, 1f
290*c0909341SAndroid Build Coastguard Worker    mv t1, zero
291*c0909341SAndroid Build Coastguard Worker1:
292*c0909341SAndroid Build Coastguard Worker    vsetivli zero, \w, e16, m1, ta, mu
293*c0909341SAndroid Build Coastguard Worker
294*c0909341SAndroid Build Coastguard Worker    lb t3, 0(t2)
295*c0909341SAndroid Build Coastguard Worker
296*c0909341SAndroid Build Coastguard Worker    vle8.v v0, (a2)
297*c0909341SAndroid Build Coastguard Worker    vzext.vf2 v2, v0
298*c0909341SAndroid Build Coastguard Worker
299*c0909341SAndroid Build Coastguard Worker    sh1add t6, t3, a0
300*c0909341SAndroid Build Coastguard Worker    slli t3, t3, 1
301*c0909341SAndroid Build Coastguard Worker    sub t3, a0, t3
302*c0909341SAndroid Build Coastguard Worker
303*c0909341SAndroid Build Coastguard Worker    vle16.v v4, (t6)
304*c0909341SAndroid Build Coastguard Worker    vle16.v v6, (t3)
305*c0909341SAndroid Build Coastguard Worker
306*c0909341SAndroid Build Coastguard Worker    vwsub.vv v8, v4, v2
307*c0909341SAndroid Build Coastguard Worker    vwsub.vv v16, v6, v2
308*c0909341SAndroid Build Coastguard Worker
309*c0909341SAndroid Build Coastguard Worker    vsetvli zero, zero, e32, m2, ta, mu
310*c0909341SAndroid Build Coastguard Worker
311*c0909341SAndroid Build Coastguard Worker    constrain_vectors v4, v6, v12, a5, t1, v8, v16
312*c0909341SAndroid Build Coastguard Worker
313*c0909341SAndroid Build Coastguard Worker    vmul.vx v28, v16, t4
314*c0909341SAndroid Build Coastguard Worker    vmacc.vx v28, t4, v8
315*c0909341SAndroid Build Coastguard Worker
316*c0909341SAndroid Build Coastguard Worker    lb t3, 1(t2)
317*c0909341SAndroid Build Coastguard Worker
318*c0909341SAndroid Build Coastguard Worker    andi t5, t4, 3
319*c0909341SAndroid Build Coastguard Worker    ori t5, t5, 2
320*c0909341SAndroid Build Coastguard Worker
321*c0909341SAndroid Build Coastguard Worker    sh1add t6, t3, a0
322*c0909341SAndroid Build Coastguard Worker    slli t3, t3, 1
323*c0909341SAndroid Build Coastguard Worker    sub t3, a0, t3
324*c0909341SAndroid Build Coastguard Worker
325*c0909341SAndroid Build Coastguard Worker    vsetvli zero, zero, e16, m1, ta, mu
326*c0909341SAndroid Build Coastguard Worker
327*c0909341SAndroid Build Coastguard Worker    vle16.v v4, (t6)
328*c0909341SAndroid Build Coastguard Worker    vle16.v v6, (t3)
329*c0909341SAndroid Build Coastguard Worker
330*c0909341SAndroid Build Coastguard Worker    vwsub.vv v8, v4, v2
331*c0909341SAndroid Build Coastguard Worker    vwsub.vv v16, v6, v2
332*c0909341SAndroid Build Coastguard Worker
333*c0909341SAndroid Build Coastguard Worker    vsetvli zero, zero, e32, m2, ta, mu
334*c0909341SAndroid Build Coastguard Worker
335*c0909341SAndroid Build Coastguard Worker    constrain_vectors v4, v6, v12, a5, t1, v8, v16
336*c0909341SAndroid Build Coastguard Worker
337*c0909341SAndroid Build Coastguard Worker    vmacc.vx v28, t5, v16
338*c0909341SAndroid Build Coastguard Worker    vmacc.vx v28, t5, v8
339*c0909341SAndroid Build Coastguard Worker
340*c0909341SAndroid Build Coastguard Worker    vmslt.vx v0, v28, zero
341*c0909341SAndroid Build Coastguard Worker    vadd.vi v28, v28, -1, v0.t
342*c0909341SAndroid Build Coastguard Worker
343*c0909341SAndroid Build Coastguard Worker    vsetvli zero, zero, e16, m1, ta, ma
344*c0909341SAndroid Build Coastguard Worker
345*c0909341SAndroid Build Coastguard Worker    vnclip.wi v24, v28, 4
346*c0909341SAndroid Build Coastguard Worker
347*c0909341SAndroid Build Coastguard Worker    vadd.vv v28, v2, v24
348*c0909341SAndroid Build Coastguard Worker
349*c0909341SAndroid Build Coastguard Worker    vsetvli zero, zero, e8, mf2, ta, ma
350*c0909341SAndroid Build Coastguard Worker
351*c0909341SAndroid Build Coastguard Worker    vnclipu.wi v24, v28, 0
352*c0909341SAndroid Build Coastguard Worker
353*c0909341SAndroid Build Coastguard Worker    vse8.v v24, (a2)
354*c0909341SAndroid Build Coastguard Worker
355*c0909341SAndroid Build Coastguard Worker    addi t0, t0, -1
356*c0909341SAndroid Build Coastguard Worker    add a2, a2, a3
357*c0909341SAndroid Build Coastguard Worker    sh1add a0, a1, a0
358*c0909341SAndroid Build Coastguard Worker
359*c0909341SAndroid Build Coastguard Worker    bnez t0, 1b
360*c0909341SAndroid Build Coastguard Worker
361*c0909341SAndroid Build Coastguard Worker    addi sp, sp, 32 + 144*2
362*c0909341SAndroid Build Coastguard Worker    ret
363*c0909341SAndroid Build Coastguard Worker
364*c0909341SAndroid Build Coastguard Workercdef_filter_sec_only_\w\()x\h:
365*c0909341SAndroid Build Coastguard Worker    li t1, 63
366*c0909341SAndroid Build Coastguard Worker    clz t2, a6
367*c0909341SAndroid Build Coastguard Worker    sub t1, t1, t2
368*c0909341SAndroid Build Coastguard Worker    sub t1, a4, t1
369*c0909341SAndroid Build Coastguard Worker
370*c0909341SAndroid Build Coastguard Worker    li t0, \h
371*c0909341SAndroid Build Coastguard Worker
372*c0909341SAndroid Build Coastguard Worker    la t2, dav1d_cdef_directions
373*c0909341SAndroid Build Coastguard Worker    addi t3, a7, 4
374*c0909341SAndroid Build Coastguard Worker    sh1add t3, t3, t2
375*c0909341SAndroid Build Coastguard Worker    sh1add t2, a7, t2
376*c0909341SAndroid Build Coastguard Worker
377*c0909341SAndroid Build Coastguard Worker2:
378*c0909341SAndroid Build Coastguard Worker    vsetivli zero, \w, e16, m1, ta, mu
379*c0909341SAndroid Build Coastguard Worker
380*c0909341SAndroid Build Coastguard Worker    lb t4, 0(t3)
381*c0909341SAndroid Build Coastguard Worker    lb t5, 0(t2)
382*c0909341SAndroid Build Coastguard Worker
383*c0909341SAndroid Build Coastguard Worker    vle8.v v0, (a2)
384*c0909341SAndroid Build Coastguard Worker    vzext.vf2 v2, v0
385*c0909341SAndroid Build Coastguard Worker
386*c0909341SAndroid Build Coastguard Worker    sh1add t6, t4, a0
387*c0909341SAndroid Build Coastguard Worker    slli t4, t4, 1
388*c0909341SAndroid Build Coastguard Worker    sub t4, a0, t4
389*c0909341SAndroid Build Coastguard Worker
390*c0909341SAndroid Build Coastguard Worker    vle16.v v4, (t6)
391*c0909341SAndroid Build Coastguard Worker    vle16.v v6, (t4)
392*c0909341SAndroid Build Coastguard Worker
393*c0909341SAndroid Build Coastguard Worker    sh1add t4, t5, a0
394*c0909341SAndroid Build Coastguard Worker    slli t5, t5, 1
395*c0909341SAndroid Build Coastguard Worker    sub t5, a0, t5
396*c0909341SAndroid Build Coastguard Worker
397*c0909341SAndroid Build Coastguard Worker    vle16.v v8, (t4)
398*c0909341SAndroid Build Coastguard Worker    vle16.v v10, (t5)
399*c0909341SAndroid Build Coastguard Worker
400*c0909341SAndroid Build Coastguard Worker    vwsub.vv v12, v4, v2
401*c0909341SAndroid Build Coastguard Worker    vwsub.vv v14, v6, v2
402*c0909341SAndroid Build Coastguard Worker    vwsub.vv v16, v8, v2
403*c0909341SAndroid Build Coastguard Worker    vwsub.vv v18, v10, v2
404*c0909341SAndroid Build Coastguard Worker
405*c0909341SAndroid Build Coastguard Worker    vsetvli zero, zero, e32, m2, ta, mu
406*c0909341SAndroid Build Coastguard Worker
407*c0909341SAndroid Build Coastguard Worker    li t4, 2
408*c0909341SAndroid Build Coastguard Worker    constrain_vectors v4, v6, v12, a6, t1, v12, v14
409*c0909341SAndroid Build Coastguard Worker    constrain_vectors v8, v10, v14, a6, t1, v16, v18
410*c0909341SAndroid Build Coastguard Worker
411*c0909341SAndroid Build Coastguard Worker    vmul.vx v28, v18, t4
412*c0909341SAndroid Build Coastguard Worker    vmacc.vx v28, t4, v16
413*c0909341SAndroid Build Coastguard Worker    vmacc.vx v28, t4, v14
414*c0909341SAndroid Build Coastguard Worker    vmacc.vx v28, t4, v12
415*c0909341SAndroid Build Coastguard Worker
416*c0909341SAndroid Build Coastguard Worker
417*c0909341SAndroid Build Coastguard Worker    lb t4, 1(t3)
418*c0909341SAndroid Build Coastguard Worker    lb t5, 1(t2)
419*c0909341SAndroid Build Coastguard Worker
420*c0909341SAndroid Build Coastguard Worker    sh1add t6, t4, a0
421*c0909341SAndroid Build Coastguard Worker    slli t4, t4, 1
422*c0909341SAndroid Build Coastguard Worker    sub t4, a0, t4
423*c0909341SAndroid Build Coastguard Worker
424*c0909341SAndroid Build Coastguard Worker    vsetvli zero, zero, e16, m1, ta, mu
425*c0909341SAndroid Build Coastguard Worker
426*c0909341SAndroid Build Coastguard Worker    vle16.v v4, (t6)
427*c0909341SAndroid Build Coastguard Worker    vle16.v v6, (t4)
428*c0909341SAndroid Build Coastguard Worker
429*c0909341SAndroid Build Coastguard Worker    sh1add t4, t5, a0
430*c0909341SAndroid Build Coastguard Worker    slli t5, t5, 1
431*c0909341SAndroid Build Coastguard Worker    sub t5, a0, t5
432*c0909341SAndroid Build Coastguard Worker
433*c0909341SAndroid Build Coastguard Worker    vle16.v v8, (t4)
434*c0909341SAndroid Build Coastguard Worker    vle16.v v10, (t5)
435*c0909341SAndroid Build Coastguard Worker
436*c0909341SAndroid Build Coastguard Worker    vwsub.vv v12, v4, v2
437*c0909341SAndroid Build Coastguard Worker    vwsub.vv v14, v6, v2
438*c0909341SAndroid Build Coastguard Worker    vwsub.vv v16, v8, v2
439*c0909341SAndroid Build Coastguard Worker    vwsub.vv v18, v10, v2
440*c0909341SAndroid Build Coastguard Worker
441*c0909341SAndroid Build Coastguard Worker    vsetvli zero, zero, e32, m2, ta, mu
442*c0909341SAndroid Build Coastguard Worker
443*c0909341SAndroid Build Coastguard Worker    constrain_vectors v4, v6, v12, a6, t1, v12, v14
444*c0909341SAndroid Build Coastguard Worker    constrain_vectors v8, v10, v14, a6, t1, v16, v18
445*c0909341SAndroid Build Coastguard Worker
446*c0909341SAndroid Build Coastguard Worker    vadd.vv v4, v28, v12
447*c0909341SAndroid Build Coastguard Worker    vadd.vv v28, v4, v14
448*c0909341SAndroid Build Coastguard Worker    vadd.vv v4, v28, v16
449*c0909341SAndroid Build Coastguard Worker    vadd.vv v28, v4, v18
450*c0909341SAndroid Build Coastguard Worker
451*c0909341SAndroid Build Coastguard Worker    vmslt.vx v0, v28, zero
452*c0909341SAndroid Build Coastguard Worker    vadd.vi v28, v28, -1, v0.t
453*c0909341SAndroid Build Coastguard Worker
454*c0909341SAndroid Build Coastguard Worker    vsetvli zero, zero, e16, m1, ta, ma
455*c0909341SAndroid Build Coastguard Worker
456*c0909341SAndroid Build Coastguard Worker    vnclip.wi v24, v28, 4
457*c0909341SAndroid Build Coastguard Worker
458*c0909341SAndroid Build Coastguard Worker    vadd.vv v28, v2, v24
459*c0909341SAndroid Build Coastguard Worker
460*c0909341SAndroid Build Coastguard Worker    vsetvli zero, zero, e8, mf2, ta, ma
461*c0909341SAndroid Build Coastguard Worker
462*c0909341SAndroid Build Coastguard Worker    vnclipu.wi v24, v28, 0
463*c0909341SAndroid Build Coastguard Worker
464*c0909341SAndroid Build Coastguard Worker    vse8.v v24, (a2)
465*c0909341SAndroid Build Coastguard Worker
466*c0909341SAndroid Build Coastguard Worker    addi t0, t0, -1
467*c0909341SAndroid Build Coastguard Worker    add a2, a2, a3
468*c0909341SAndroid Build Coastguard Worker    sh1add a0, a1, a0
469*c0909341SAndroid Build Coastguard Worker
470*c0909341SAndroid Build Coastguard Worker    bnez t0, 2b
471*c0909341SAndroid Build Coastguard Worker
472*c0909341SAndroid Build Coastguard Worker    addi sp, sp, 32 + 144*2
473*c0909341SAndroid Build Coastguard Worker    ret
474*c0909341SAndroid Build Coastguard Workercdef_filter_pri_sec_\w\()x\h:
475*c0909341SAndroid Build Coastguard Worker
476*c0909341SAndroid Build Coastguard Worker    li t1, 63
477*c0909341SAndroid Build Coastguard Worker    clz t2, a5
478*c0909341SAndroid Build Coastguard Worker    clz t3, a6
479*c0909341SAndroid Build Coastguard Worker    sub t2, t1, t2
480*c0909341SAndroid Build Coastguard Worker    sub t3, t1, t3
481*c0909341SAndroid Build Coastguard Worker    sub t1, a4, t2
482*c0909341SAndroid Build Coastguard Worker    sub t2, a4, t3
483*c0909341SAndroid Build Coastguard Worker
484*c0909341SAndroid Build Coastguard Worker    li t0, \h
485*c0909341SAndroid Build Coastguard Worker
486*c0909341SAndroid Build Coastguard Worker    la t3, dav1d_cdef_directions
487*c0909341SAndroid Build Coastguard Worker
488*c0909341SAndroid Build Coastguard Worker    blt zero, t1, 3f
489*c0909341SAndroid Build Coastguard Worker    mv t1, zero
490*c0909341SAndroid Build Coastguard Worker3:
491*c0909341SAndroid Build Coastguard Worker    vsetivli zero, \w, e16, m1, ta, ma
492*c0909341SAndroid Build Coastguard Worker
493*c0909341SAndroid Build Coastguard Worker    li t4, 4
494*c0909341SAndroid Build Coastguard Worker    andi t6, a5, 1
495*c0909341SAndroid Build Coastguard Worker    addi t5, a7, 2
496*c0909341SAndroid Build Coastguard Worker    sub t4, t4, t6
497*c0909341SAndroid Build Coastguard Worker
498*c0909341SAndroid Build Coastguard Worker    sh1add t5, t5, t3
499*c0909341SAndroid Build Coastguard Worker
500*c0909341SAndroid Build Coastguard Worker    vle8.v v0, (a2)
501*c0909341SAndroid Build Coastguard Worker
502*c0909341SAndroid Build Coastguard Worker    lb t6, 0(t5)
503*c0909341SAndroid Build Coastguard Worker
504*c0909341SAndroid Build Coastguard Worker    vzext.vf2 v2, v0
505*c0909341SAndroid Build Coastguard Worker
506*c0909341SAndroid Build Coastguard Worker    sh1add a4, t6, a0
507*c0909341SAndroid Build Coastguard Worker    slli t6, t6, 1
508*c0909341SAndroid Build Coastguard Worker    sub t6, a0, t6
509*c0909341SAndroid Build Coastguard Worker
510*c0909341SAndroid Build Coastguard Worker    vle16.v v4, (a4)
511*c0909341SAndroid Build Coastguard Worker    vle16.v v6, (t6)
512*c0909341SAndroid Build Coastguard Worker
513*c0909341SAndroid Build Coastguard Worker    vminu.vv v20, v4, v2
514*c0909341SAndroid Build Coastguard Worker    vmax.vv v24, v4, v2
515*c0909341SAndroid Build Coastguard Worker    vminu.vv v20, v6, v20
516*c0909341SAndroid Build Coastguard Worker    vmax.vv v24, v6, v24
517*c0909341SAndroid Build Coastguard Worker
518*c0909341SAndroid Build Coastguard Worker    vwsub.vv v8, v4, v2
519*c0909341SAndroid Build Coastguard Worker    vwsub.vv v16, v6, v2
520*c0909341SAndroid Build Coastguard Worker
521*c0909341SAndroid Build Coastguard Worker    vsetvli zero, zero, e32, m2, ta, mu
522*c0909341SAndroid Build Coastguard Worker
523*c0909341SAndroid Build Coastguard Worker    constrain_vectors v4, v6, v12, a5, t1, v8, v16
524*c0909341SAndroid Build Coastguard Worker
525*c0909341SAndroid Build Coastguard Worker    vmul.vx v28, v16, t4
526*c0909341SAndroid Build Coastguard Worker    vmacc.vx v28, t4, v8
527*c0909341SAndroid Build Coastguard Worker
528*c0909341SAndroid Build Coastguard Worker    lb t6, 1(t5)
529*c0909341SAndroid Build Coastguard Worker
530*c0909341SAndroid Build Coastguard Worker    andi t4, t4, 3
531*c0909341SAndroid Build Coastguard Worker    ori t4, t4, 2
532*c0909341SAndroid Build Coastguard Worker
533*c0909341SAndroid Build Coastguard Worker
534*c0909341SAndroid Build Coastguard Worker    sh1add a4, t6, a0
535*c0909341SAndroid Build Coastguard Worker    slli t6, t6, 1
536*c0909341SAndroid Build Coastguard Worker    sub t6, a0, t6
537*c0909341SAndroid Build Coastguard Worker
538*c0909341SAndroid Build Coastguard Worker    vsetvli zero, zero, e16, m1, ta, ma
539*c0909341SAndroid Build Coastguard Worker
540*c0909341SAndroid Build Coastguard Worker    vle16.v v4, (a4)
541*c0909341SAndroid Build Coastguard Worker    vle16.v v6, (t6)
542*c0909341SAndroid Build Coastguard Worker
543*c0909341SAndroid Build Coastguard Worker    vminu.vv v20, v4, v20
544*c0909341SAndroid Build Coastguard Worker    vmax.vv v24, v4, v24
545*c0909341SAndroid Build Coastguard Worker    vminu.vv v20, v6, v20
546*c0909341SAndroid Build Coastguard Worker    vmax.vv v24, v6, v24
547*c0909341SAndroid Build Coastguard Worker
548*c0909341SAndroid Build Coastguard Worker    vwsub.vv v8, v4, v2
549*c0909341SAndroid Build Coastguard Worker    vwsub.vv v16, v6, v2
550*c0909341SAndroid Build Coastguard Worker
551*c0909341SAndroid Build Coastguard Worker    vsetvli zero, zero, e32, m2, ta, mu
552*c0909341SAndroid Build Coastguard Worker
553*c0909341SAndroid Build Coastguard Worker    constrain_vectors v4, v6, v12, a5, t1, v8, v16
554*c0909341SAndroid Build Coastguard Worker
555*c0909341SAndroid Build Coastguard Worker    addi t5, a7, 4
556*c0909341SAndroid Build Coastguard Worker    vmacc.vx v28, t4, v16
557*c0909341SAndroid Build Coastguard Worker    vmacc.vx v28, t4, v8
558*c0909341SAndroid Build Coastguard Worker
559*c0909341SAndroid Build Coastguard Worker    sh1add t5, t5, t3
560*c0909341SAndroid Build Coastguard Worker
561*c0909341SAndroid Build Coastguard Worker    lb t6, 0(t5)
562*c0909341SAndroid Build Coastguard Worker
563*c0909341SAndroid Build Coastguard Worker    sh1add a4, t6, a0
564*c0909341SAndroid Build Coastguard Worker    slli t6, t6, 1
565*c0909341SAndroid Build Coastguard Worker    sub t6, a0, t6
566*c0909341SAndroid Build Coastguard Worker
567*c0909341SAndroid Build Coastguard Worker    vsetvli zero, zero, e16, m1, ta, ma
568*c0909341SAndroid Build Coastguard Worker
569*c0909341SAndroid Build Coastguard Worker    vle16.v v4, (a4)
570*c0909341SAndroid Build Coastguard Worker    vle16.v v6, (t6)
571*c0909341SAndroid Build Coastguard Worker
572*c0909341SAndroid Build Coastguard Worker    vminu.vv v20, v4, v20
573*c0909341SAndroid Build Coastguard Worker    vmax.vv v24, v4, v24
574*c0909341SAndroid Build Coastguard Worker    vminu.vv v20, v6, v20
575*c0909341SAndroid Build Coastguard Worker    vmax.vv v24, v6, v24
576*c0909341SAndroid Build Coastguard Worker
577*c0909341SAndroid Build Coastguard Worker    vwsub.vv v8, v4, v2
578*c0909341SAndroid Build Coastguard Worker    vwsub.vv v16, v6, v2
579*c0909341SAndroid Build Coastguard Worker
580*c0909341SAndroid Build Coastguard Worker    vsetvli zero, zero, e32, m2, ta, mu
581*c0909341SAndroid Build Coastguard Worker
582*c0909341SAndroid Build Coastguard Worker    li t6, 2
583*c0909341SAndroid Build Coastguard Worker    constrain_vectors v4, v6, v12, a6, t2, v8, v16
584*c0909341SAndroid Build Coastguard Worker
585*c0909341SAndroid Build Coastguard Worker    vmacc.vx v28, t6, v16
586*c0909341SAndroid Build Coastguard Worker    vmacc.vx v28, t6, v8
587*c0909341SAndroid Build Coastguard Worker
588*c0909341SAndroid Build Coastguard Worker    lb t6, 1(t5)
589*c0909341SAndroid Build Coastguard Worker
590*c0909341SAndroid Build Coastguard Worker    sh1add a4, t6, a0
591*c0909341SAndroid Build Coastguard Worker    slli t6, t6, 1
592*c0909341SAndroid Build Coastguard Worker    sub t6, a0, t6
593*c0909341SAndroid Build Coastguard Worker
594*c0909341SAndroid Build Coastguard Worker    vsetvli zero, zero, e16, m1, ta, ma
595*c0909341SAndroid Build Coastguard Worker
596*c0909341SAndroid Build Coastguard Worker    vle16.v v4, (a4)
597*c0909341SAndroid Build Coastguard Worker    vle16.v v6, (t6)
598*c0909341SAndroid Build Coastguard Worker
599*c0909341SAndroid Build Coastguard Worker    vminu.vv v20, v4, v20
600*c0909341SAndroid Build Coastguard Worker    vmax.vv v24, v4, v24
601*c0909341SAndroid Build Coastguard Worker    vminu.vv v20, v6, v20
602*c0909341SAndroid Build Coastguard Worker    vmax.vv v24, v6, v24
603*c0909341SAndroid Build Coastguard Worker
604*c0909341SAndroid Build Coastguard Worker    vwsub.vv v8, v4, v2
605*c0909341SAndroid Build Coastguard Worker    vwsub.vv v16, v6, v2
606*c0909341SAndroid Build Coastguard Worker
607*c0909341SAndroid Build Coastguard Worker    vsetvli zero, zero, e32, m2, ta, mu
608*c0909341SAndroid Build Coastguard Worker
609*c0909341SAndroid Build Coastguard Worker    constrain_vectors v4, v6, v12, a6, t2, v8, v16
610*c0909341SAndroid Build Coastguard Worker
611*c0909341SAndroid Build Coastguard Worker    sh1add t5, a7, t3
612*c0909341SAndroid Build Coastguard Worker
613*c0909341SAndroid Build Coastguard Worker    vadd.vv v4, v28, v8
614*c0909341SAndroid Build Coastguard Worker    vadd.vv v28, v4, v16
615*c0909341SAndroid Build Coastguard Worker
616*c0909341SAndroid Build Coastguard Worker    vsetvli zero, zero, e16, m1, ta, ma
617*c0909341SAndroid Build Coastguard Worker
618*c0909341SAndroid Build Coastguard Worker    lb t6, 0(t5)
619*c0909341SAndroid Build Coastguard Worker
620*c0909341SAndroid Build Coastguard Worker    sh1add a4, t6, a0
621*c0909341SAndroid Build Coastguard Worker    slli t6, t6, 1
622*c0909341SAndroid Build Coastguard Worker    sub t6, a0, t6
623*c0909341SAndroid Build Coastguard Worker
624*c0909341SAndroid Build Coastguard Worker    vle16.v v4, (a4)
625*c0909341SAndroid Build Coastguard Worker    vle16.v v6, (t6)
626*c0909341SAndroid Build Coastguard Worker
627*c0909341SAndroid Build Coastguard Worker    vminu.vv v20, v4, v20
628*c0909341SAndroid Build Coastguard Worker    vmax.vv v24, v4, v24
629*c0909341SAndroid Build Coastguard Worker    vminu.vv v20, v6, v20
630*c0909341SAndroid Build Coastguard Worker    vmax.vv v24, v6, v24
631*c0909341SAndroid Build Coastguard Worker
632*c0909341SAndroid Build Coastguard Worker    vwsub.vv v8, v4, v2
633*c0909341SAndroid Build Coastguard Worker    vwsub.vv v16, v6, v2
634*c0909341SAndroid Build Coastguard Worker
635*c0909341SAndroid Build Coastguard Worker    vsetvli zero, zero, e32, m2, ta, mu
636*c0909341SAndroid Build Coastguard Worker
637*c0909341SAndroid Build Coastguard Worker    li t6, 2
638*c0909341SAndroid Build Coastguard Worker    constrain_vectors v4, v6, v12, a6, t2, v8, v16
639*c0909341SAndroid Build Coastguard Worker
640*c0909341SAndroid Build Coastguard Worker    vmacc.vx v28, t6, v16
641*c0909341SAndroid Build Coastguard Worker    vmacc.vx v28, t6, v8
642*c0909341SAndroid Build Coastguard Worker
643*c0909341SAndroid Build Coastguard Worker    lb t6, 1(t5)
644*c0909341SAndroid Build Coastguard Worker
645*c0909341SAndroid Build Coastguard Worker    sh1add a4, t6, a0
646*c0909341SAndroid Build Coastguard Worker    slli t6, t6, 1
647*c0909341SAndroid Build Coastguard Worker    sub t6, a0, t6
648*c0909341SAndroid Build Coastguard Worker
649*c0909341SAndroid Build Coastguard Worker    vsetvli zero, zero, e16, m1, ta, ma
650*c0909341SAndroid Build Coastguard Worker
651*c0909341SAndroid Build Coastguard Worker    vle16.v v4, (a4)
652*c0909341SAndroid Build Coastguard Worker    vle16.v v6, (t6)
653*c0909341SAndroid Build Coastguard Worker
654*c0909341SAndroid Build Coastguard Worker    vminu.vv v20, v4, v20
655*c0909341SAndroid Build Coastguard Worker    vmax.vv v24, v4, v24
656*c0909341SAndroid Build Coastguard Worker    vminu.vv v20, v6, v20
657*c0909341SAndroid Build Coastguard Worker    vmax.vv v24, v6, v24
658*c0909341SAndroid Build Coastguard Worker
659*c0909341SAndroid Build Coastguard Worker    vwsub.vv v8, v4, v2
660*c0909341SAndroid Build Coastguard Worker    vwsub.vv v16, v6, v2
661*c0909341SAndroid Build Coastguard Worker
662*c0909341SAndroid Build Coastguard Worker    vsetvli zero, zero, e32, m2, ta, mu
663*c0909341SAndroid Build Coastguard Worker
664*c0909341SAndroid Build Coastguard Worker    constrain_vectors v4, v6, v12, a6, t2, v8, v16
665*c0909341SAndroid Build Coastguard Worker
666*c0909341SAndroid Build Coastguard Worker    vadd.vv v4, v28, v8
667*c0909341SAndroid Build Coastguard Worker    vadd.vv v28, v4, v16
668*c0909341SAndroid Build Coastguard Worker
669*c0909341SAndroid Build Coastguard Worker    vmslt.vx v0, v28, zero
670*c0909341SAndroid Build Coastguard Worker    vadd.vi v28, v28, -1, v0.t
671*c0909341SAndroid Build Coastguard Worker
672*c0909341SAndroid Build Coastguard Worker    vsetvli zero, zero, e16, m1, ta, mu
673*c0909341SAndroid Build Coastguard Worker
674*c0909341SAndroid Build Coastguard Worker    vnclip.wi v16, v28, 4
675*c0909341SAndroid Build Coastguard Worker
676*c0909341SAndroid Build Coastguard Worker    vadd.vv v28, v2, v16
677*c0909341SAndroid Build Coastguard Worker
678*c0909341SAndroid Build Coastguard Worker    vmslt.vv v0, v20, v28
679*c0909341SAndroid Build Coastguard Worker    vmerge.vvm v4, v20, v28, v0
680*c0909341SAndroid Build Coastguard Worker
681*c0909341SAndroid Build Coastguard Worker    vmslt.vv v0, v4, v24
682*c0909341SAndroid Build Coastguard Worker    vmerge.vvm v28, v24, v4, v0
683*c0909341SAndroid Build Coastguard Worker
684*c0909341SAndroid Build Coastguard Worker    vsetvli zero, zero, e8, mf2, ta, ma
685*c0909341SAndroid Build Coastguard Worker
686*c0909341SAndroid Build Coastguard Worker    vnclipu.wi v24, v28, 0
687*c0909341SAndroid Build Coastguard Worker
688*c0909341SAndroid Build Coastguard Worker    vse8.v v24, (a2)
689*c0909341SAndroid Build Coastguard Worker
690*c0909341SAndroid Build Coastguard Worker    addi t0, t0, -1
691*c0909341SAndroid Build Coastguard Worker    add a2, a2, a3
692*c0909341SAndroid Build Coastguard Worker    sh1add a0, a1, a0
693*c0909341SAndroid Build Coastguard Worker
694*c0909341SAndroid Build Coastguard Worker    bnez t0, 3b
695*c0909341SAndroid Build Coastguard Worker
696*c0909341SAndroid Build Coastguard Worker    addi sp, sp, 32 + 144*2
697*c0909341SAndroid Build Coastguard Worker    ret
698*c0909341SAndroid Build Coastguard Workerendfunc
699*c0909341SAndroid Build Coastguard Worker.endm
700*c0909341SAndroid Build Coastguard Worker
701*c0909341SAndroid Build Coastguard Workercdef_fn 4, 4
702*c0909341SAndroid Build Coastguard Workercdef_fn 4, 8
703*c0909341SAndroid Build Coastguard Workercdef_fn 8, 8
704