xref: /aosp_15_r20/external/libdav1d/src/loongarch/looprestoration.S (revision c09093415860a1c2373dacd84c4fde00c507cdfd)
1*c0909341SAndroid Build Coastguard Worker/*
2*c0909341SAndroid Build Coastguard Worker * Copyright © 2023, VideoLAN and dav1d authors
3*c0909341SAndroid Build Coastguard Worker * Copyright © 2023, Loongson Technology Corporation Limited
4*c0909341SAndroid Build Coastguard Worker * All rights reserved.
5*c0909341SAndroid Build Coastguard Worker *
6*c0909341SAndroid Build Coastguard Worker * Redistribution and use in source and binary forms, with or without
7*c0909341SAndroid Build Coastguard Worker * modification, are permitted provided that the following conditions are met:
8*c0909341SAndroid Build Coastguard Worker *
9*c0909341SAndroid Build Coastguard Worker * 1. Redistributions of source code must retain the above copyright notice, this
10*c0909341SAndroid Build Coastguard Worker *    list of conditions and the following disclaimer.
11*c0909341SAndroid Build Coastguard Worker *
12*c0909341SAndroid Build Coastguard Worker * 2. Redistributions in binary form must reproduce the above copyright notice,
13*c0909341SAndroid Build Coastguard Worker *    this list of conditions and the following disclaimer in the documentation
14*c0909341SAndroid Build Coastguard Worker *    and/or other materials provided with the distribution.
15*c0909341SAndroid Build Coastguard Worker *
16*c0909341SAndroid Build Coastguard Worker * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
17*c0909341SAndroid Build Coastguard Worker * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
18*c0909341SAndroid Build Coastguard Worker * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
19*c0909341SAndroid Build Coastguard Worker * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
20*c0909341SAndroid Build Coastguard Worker * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
21*c0909341SAndroid Build Coastguard Worker * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
22*c0909341SAndroid Build Coastguard Worker * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
23*c0909341SAndroid Build Coastguard Worker * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
24*c0909341SAndroid Build Coastguard Worker * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
25*c0909341SAndroid Build Coastguard Worker * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26*c0909341SAndroid Build Coastguard Worker */
27*c0909341SAndroid Build Coastguard Worker
28*c0909341SAndroid Build Coastguard Worker#include "src/loongarch/loongson_asm.S"
29*c0909341SAndroid Build Coastguard Worker
30*c0909341SAndroid Build Coastguard Worker#define REST_UNIT_STRIDE (400)
31*c0909341SAndroid Build Coastguard Worker
32*c0909341SAndroid Build Coastguard Worker.macro MADD_HU_BU in0, in1, out0, out1
33*c0909341SAndroid Build Coastguard Worker    vsllwil.hu.bu vr12,     \in0,     0
34*c0909341SAndroid Build Coastguard Worker    vexth.hu.bu   vr13,     \in0
35*c0909341SAndroid Build Coastguard Worker    vmadd.h       \out0,    vr12,     \in1
36*c0909341SAndroid Build Coastguard Worker    vmadd.h       \out1,    vr13,     \in1
37*c0909341SAndroid Build Coastguard Worker.endm
38*c0909341SAndroid Build Coastguard Worker
39*c0909341SAndroid Build Coastguard Workerconst wiener_shuf
40*c0909341SAndroid Build Coastguard Worker.byte 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18
41*c0909341SAndroid Build Coastguard Workerendconst
42*c0909341SAndroid Build Coastguard Worker
43*c0909341SAndroid Build Coastguard Worker/*
44*c0909341SAndroid Build Coastguard Workervoid wiener_filter_h_lsx(int32_t *hor_ptr,
45*c0909341SAndroid Build Coastguard Worker                         uint8_t *tmp_ptr,
46*c0909341SAndroid Build Coastguard Worker                         const int16_t filterh[8],
47*c0909341SAndroid Build Coastguard Worker                         const int w, const int h)
48*c0909341SAndroid Build Coastguard Worker*/
49*c0909341SAndroid Build Coastguard Workerfunction wiener_filter_h_8bpc_lsx
50*c0909341SAndroid Build Coastguard Worker    addi.d        sp,       sp,       -40
51*c0909341SAndroid Build Coastguard Worker    fst.d         f24,      sp,       0
52*c0909341SAndroid Build Coastguard Worker    fst.d         f25,      sp,       8
53*c0909341SAndroid Build Coastguard Worker    fst.d         f26,      sp,       16
54*c0909341SAndroid Build Coastguard Worker    fst.d         f27,      sp,       24
55*c0909341SAndroid Build Coastguard Worker    fst.d         f28,      sp,       32
56*c0909341SAndroid Build Coastguard Worker    li.w          t7,       1<<14          // clip_limit
57*c0909341SAndroid Build Coastguard Worker
58*c0909341SAndroid Build Coastguard Worker    la.local      t1,       wiener_shuf
59*c0909341SAndroid Build Coastguard Worker    vld           vr4,      t1,       0
60*c0909341SAndroid Build Coastguard Worker    vld           vr14,     a2,       0    // filter[0][k]
61*c0909341SAndroid Build Coastguard Worker    vreplvei.h    vr21,     vr14,     0
62*c0909341SAndroid Build Coastguard Worker    vreplvei.h    vr22,     vr14,     1
63*c0909341SAndroid Build Coastguard Worker    vreplvei.h    vr23,     vr14,     2
64*c0909341SAndroid Build Coastguard Worker    vreplvei.h    vr24,     vr14,     3
65*c0909341SAndroid Build Coastguard Worker    vreplvei.h    vr25,     vr14,     4
66*c0909341SAndroid Build Coastguard Worker    vreplvei.h    vr26,     vr14,     5
67*c0909341SAndroid Build Coastguard Worker    vreplvei.h    vr27,     vr14,     6
68*c0909341SAndroid Build Coastguard Worker    vreplgr2vr.w  vr0,      t7
69*c0909341SAndroid Build Coastguard Worker
70*c0909341SAndroid Build Coastguard Worker.WIENER_FILTER_H_H:
71*c0909341SAndroid Build Coastguard Worker    addi.w        a4,       a4,       -1    // h
72*c0909341SAndroid Build Coastguard Worker    addi.w        t0,       a3,       0     // w
73*c0909341SAndroid Build Coastguard Worker    addi.d        t1,       a1,       0     // tmp_ptr
74*c0909341SAndroid Build Coastguard Worker    addi.d        t2,       a0,       0     // hor_ptr
75*c0909341SAndroid Build Coastguard Worker
76*c0909341SAndroid Build Coastguard Worker.WIENER_FILTER_H_W:
77*c0909341SAndroid Build Coastguard Worker    addi.w        t0,       t0,       -16
78*c0909341SAndroid Build Coastguard Worker    vld           vr5,      t1,       0
79*c0909341SAndroid Build Coastguard Worker    vld           vr13,     t1,       16
80*c0909341SAndroid Build Coastguard Worker
81*c0909341SAndroid Build Coastguard Worker    vsubi.bu      vr14,     vr4,      2
82*c0909341SAndroid Build Coastguard Worker    vsubi.bu      vr15,     vr4,      1
83*c0909341SAndroid Build Coastguard Worker    vshuf.b       vr6,      vr13,     vr5,     vr14  // 1 ... 8, 9 ... 16
84*c0909341SAndroid Build Coastguard Worker    vshuf.b       vr7,      vr13,     vr5,     vr15  // 2 ... 9, 10 ... 17
85*c0909341SAndroid Build Coastguard Worker    vshuf.b       vr8,      vr13,     vr5,     vr4   // 3 ... 10, 11 ... 18
86*c0909341SAndroid Build Coastguard Worker    vaddi.bu      vr14,     vr4,      1
87*c0909341SAndroid Build Coastguard Worker    vaddi.bu      vr15,     vr4,      2
88*c0909341SAndroid Build Coastguard Worker    vshuf.b       vr9,      vr13,     vr5,     vr14  // 4 ... 11, 12 ... 19
89*c0909341SAndroid Build Coastguard Worker    vshuf.b       vr10,     vr13,     vr5,     vr15  // 5 ... 12, 13 ... 20
90*c0909341SAndroid Build Coastguard Worker    vaddi.bu      vr14,     vr4,      3
91*c0909341SAndroid Build Coastguard Worker    vshuf.b       vr11,     vr13,     vr5,     vr14  // 6 ... 13, 14 ... 21
92*c0909341SAndroid Build Coastguard Worker
93*c0909341SAndroid Build Coastguard Worker    vsllwil.hu.bu vr15,     vr8,      0    //  3  4  5  6  7  8  9 10
94*c0909341SAndroid Build Coastguard Worker    vexth.hu.bu   vr16,     vr8            // 11 12 13 14 15 16 17 18
95*c0909341SAndroid Build Coastguard Worker    vsllwil.wu.hu vr17,     vr15,     7    //  3  4  5  6
96*c0909341SAndroid Build Coastguard Worker    vexth.wu.hu   vr18,     vr15           //  7  8  9 10
97*c0909341SAndroid Build Coastguard Worker    vsllwil.wu.hu vr19,     vr16,     7    // 11 12 13 14
98*c0909341SAndroid Build Coastguard Worker    vexth.wu.hu   vr20,     vr16           // 15 16 17 18
99*c0909341SAndroid Build Coastguard Worker    vslli.w       vr18,     vr18,     7
100*c0909341SAndroid Build Coastguard Worker    vslli.w       vr20,     vr20,     7
101*c0909341SAndroid Build Coastguard Worker    vxor.v        vr15,     vr15,     vr15
102*c0909341SAndroid Build Coastguard Worker    vxor.v        vr14,     vr14,     vr14
103*c0909341SAndroid Build Coastguard Worker
104*c0909341SAndroid Build Coastguard Worker    MADD_HU_BU    vr5,   vr21,  vr14,  vr15
105*c0909341SAndroid Build Coastguard Worker    MADD_HU_BU    vr6,   vr22,  vr14,  vr15
106*c0909341SAndroid Build Coastguard Worker    MADD_HU_BU    vr7,   vr23,  vr14,  vr15
107*c0909341SAndroid Build Coastguard Worker    MADD_HU_BU    vr8,   vr24,  vr14,  vr15
108*c0909341SAndroid Build Coastguard Worker    MADD_HU_BU    vr9,   vr25,  vr14,  vr15
109*c0909341SAndroid Build Coastguard Worker    MADD_HU_BU    vr10,  vr26,  vr14,  vr15
110*c0909341SAndroid Build Coastguard Worker    MADD_HU_BU    vr11,  vr27,  vr14,  vr15
111*c0909341SAndroid Build Coastguard Worker
112*c0909341SAndroid Build Coastguard Worker    vsllwil.w.h   vr5,      vr14,     0   //  0  1  2  3
113*c0909341SAndroid Build Coastguard Worker    vexth.w.h     vr6,      vr14          //  4  5  6  7
114*c0909341SAndroid Build Coastguard Worker    vsllwil.w.h   vr7,      vr15,     0   //  8  9 10 11
115*c0909341SAndroid Build Coastguard Worker    vexth.w.h     vr8,      vr15          // 12 13 14 15
116*c0909341SAndroid Build Coastguard Worker    vadd.w        vr17,     vr17,     vr5
117*c0909341SAndroid Build Coastguard Worker    vadd.w        vr18,     vr18,     vr6
118*c0909341SAndroid Build Coastguard Worker    vadd.w        vr19,     vr19,     vr7
119*c0909341SAndroid Build Coastguard Worker    vadd.w        vr20,     vr20,     vr8
120*c0909341SAndroid Build Coastguard Worker    vadd.w        vr17,     vr17,     vr0
121*c0909341SAndroid Build Coastguard Worker    vadd.w        vr18,     vr18,     vr0
122*c0909341SAndroid Build Coastguard Worker    vadd.w        vr19,     vr19,     vr0
123*c0909341SAndroid Build Coastguard Worker    vadd.w        vr20,     vr20,     vr0
124*c0909341SAndroid Build Coastguard Worker
125*c0909341SAndroid Build Coastguard Worker    vsrli.w       vr1,      vr0,      1
126*c0909341SAndroid Build Coastguard Worker    vsubi.wu      vr1,      vr1,      1
127*c0909341SAndroid Build Coastguard Worker    vxor.v        vr3,      vr3,      vr3
128*c0909341SAndroid Build Coastguard Worker    vsrari.w      vr17,     vr17,     3
129*c0909341SAndroid Build Coastguard Worker    vsrari.w      vr18,     vr18,     3
130*c0909341SAndroid Build Coastguard Worker    vsrari.w      vr19,     vr19,     3
131*c0909341SAndroid Build Coastguard Worker    vsrari.w      vr20,     vr20,     3
132*c0909341SAndroid Build Coastguard Worker    vclip.w       vr17,     vr17,     vr3,     vr1
133*c0909341SAndroid Build Coastguard Worker    vclip.w       vr18,     vr18,     vr3,     vr1
134*c0909341SAndroid Build Coastguard Worker    vclip.w       vr19,     vr19,     vr3,     vr1
135*c0909341SAndroid Build Coastguard Worker    vclip.w       vr20,     vr20,     vr3,     vr1
136*c0909341SAndroid Build Coastguard Worker
137*c0909341SAndroid Build Coastguard Worker    vst           vr17,     t2,       0
138*c0909341SAndroid Build Coastguard Worker    vst           vr18,     t2,       16
139*c0909341SAndroid Build Coastguard Worker    vst           vr19,     t2,       32
140*c0909341SAndroid Build Coastguard Worker    vst           vr20,     t2,       48
141*c0909341SAndroid Build Coastguard Worker    addi.d        t1,       t1,       16
142*c0909341SAndroid Build Coastguard Worker    addi.d        t2,       t2,       64
143*c0909341SAndroid Build Coastguard Worker    blt           zero,     t0,       .WIENER_FILTER_H_W
144*c0909341SAndroid Build Coastguard Worker
145*c0909341SAndroid Build Coastguard Worker    addi.d        a1,       a1,       REST_UNIT_STRIDE
146*c0909341SAndroid Build Coastguard Worker    addi.d        a0,       a0,       (REST_UNIT_STRIDE << 2)
147*c0909341SAndroid Build Coastguard Worker    bnez          a4,       .WIENER_FILTER_H_H
148*c0909341SAndroid Build Coastguard Worker
149*c0909341SAndroid Build Coastguard Worker    fld.d         f24,      sp,       0
150*c0909341SAndroid Build Coastguard Worker    fld.d         f25,      sp,       8
151*c0909341SAndroid Build Coastguard Worker    fld.d         f26,      sp,       16
152*c0909341SAndroid Build Coastguard Worker    fld.d         f27,      sp,       24
153*c0909341SAndroid Build Coastguard Worker    fld.d         f28,      sp,       32
154*c0909341SAndroid Build Coastguard Worker    addi.d        sp,       sp,       40
155*c0909341SAndroid Build Coastguard Workerendfunc
156*c0909341SAndroid Build Coastguard Worker
157*c0909341SAndroid Build Coastguard Worker.macro APPLY_FILTER in0, in1, in2
158*c0909341SAndroid Build Coastguard Worker    alsl.d         t7,      \in0,     \in1,    2
159*c0909341SAndroid Build Coastguard Worker    vld            vr10,    t7,       0
160*c0909341SAndroid Build Coastguard Worker    vld            vr11,    t7,       16
161*c0909341SAndroid Build Coastguard Worker    vld            vr12,    t7,       32
162*c0909341SAndroid Build Coastguard Worker    vld            vr13,    t7,       48
163*c0909341SAndroid Build Coastguard Worker    vmadd.w        vr14,    vr10,     \in2
164*c0909341SAndroid Build Coastguard Worker    vmadd.w        vr15,    vr11,     \in2
165*c0909341SAndroid Build Coastguard Worker    vmadd.w        vr16,    vr12,     \in2
166*c0909341SAndroid Build Coastguard Worker    vmadd.w        vr17,    vr13,     \in2
167*c0909341SAndroid Build Coastguard Worker.endm
168*c0909341SAndroid Build Coastguard Worker
169*c0909341SAndroid Build Coastguard Worker.macro wiener_filter_v_8bpc_core_lsx
170*c0909341SAndroid Build Coastguard Worker    vreplgr2vr.w  vr14,     t6
171*c0909341SAndroid Build Coastguard Worker    vreplgr2vr.w  vr15,     t6
172*c0909341SAndroid Build Coastguard Worker    vreplgr2vr.w  vr16,     t6
173*c0909341SAndroid Build Coastguard Worker    vreplgr2vr.w  vr17,     t6
174*c0909341SAndroid Build Coastguard Worker
175*c0909341SAndroid Build Coastguard Worker    addi.w        t7,       t2,       0      // j + index k
176*c0909341SAndroid Build Coastguard Worker    mul.w         t7,       t7,       t8     // (j + index) * REST_UNIT_STRIDE
177*c0909341SAndroid Build Coastguard Worker    add.w         t7,       t7,       t4     // (j + index) * REST_UNIT_STRIDE + i
178*c0909341SAndroid Build Coastguard Worker
179*c0909341SAndroid Build Coastguard Worker    APPLY_FILTER  t7, a2, vr2
180*c0909341SAndroid Build Coastguard Worker    APPLY_FILTER  t8, t7, vr3
181*c0909341SAndroid Build Coastguard Worker    APPLY_FILTER  t8, t7, vr4
182*c0909341SAndroid Build Coastguard Worker    APPLY_FILTER  t8, t7, vr5
183*c0909341SAndroid Build Coastguard Worker    APPLY_FILTER  t8, t7, vr6
184*c0909341SAndroid Build Coastguard Worker    APPLY_FILTER  t8, t7, vr7
185*c0909341SAndroid Build Coastguard Worker    APPLY_FILTER  t8, t7, vr8
186*c0909341SAndroid Build Coastguard Worker    vssrarni.hu.w vr15,     vr14,     11
187*c0909341SAndroid Build Coastguard Worker    vssrarni.hu.w vr17,     vr16,     11
188*c0909341SAndroid Build Coastguard Worker    vssrlni.bu.h  vr17,     vr15,     0
189*c0909341SAndroid Build Coastguard Worker.endm
190*c0909341SAndroid Build Coastguard Worker
191*c0909341SAndroid Build Coastguard Worker/*
192*c0909341SAndroid Build Coastguard Workervoid wiener_filter_v_lsx(uint8_t *p,
193*c0909341SAndroid Build Coastguard Worker                         const ptrdiff_t p_stride,
194*c0909341SAndroid Build Coastguard Worker                         const int32_t *hor,
195*c0909341SAndroid Build Coastguard Worker                         const int16_t filterv[8],
196*c0909341SAndroid Build Coastguard Worker                         const int w, const int h)
197*c0909341SAndroid Build Coastguard Worker*/
198*c0909341SAndroid Build Coastguard Workerfunction wiener_filter_v_8bpc_lsx
199*c0909341SAndroid Build Coastguard Worker    li.w          t6,       -(1 << 18)
200*c0909341SAndroid Build Coastguard Worker
201*c0909341SAndroid Build Coastguard Worker    li.w          t8,       REST_UNIT_STRIDE
202*c0909341SAndroid Build Coastguard Worker    ld.h          t0,       a3,       0
203*c0909341SAndroid Build Coastguard Worker    ld.h          t1,       a3,       2
204*c0909341SAndroid Build Coastguard Worker    vreplgr2vr.w  vr2,      t0
205*c0909341SAndroid Build Coastguard Worker    vreplgr2vr.w  vr3,      t1
206*c0909341SAndroid Build Coastguard Worker    ld.h          t0,       a3,       4
207*c0909341SAndroid Build Coastguard Worker    ld.h          t1,       a3,       6
208*c0909341SAndroid Build Coastguard Worker    vreplgr2vr.w  vr4,      t0
209*c0909341SAndroid Build Coastguard Worker    vreplgr2vr.w  vr5,      t1
210*c0909341SAndroid Build Coastguard Worker    ld.h          t0,       a3,       8
211*c0909341SAndroid Build Coastguard Worker    ld.h          t1,       a3,       10
212*c0909341SAndroid Build Coastguard Worker    vreplgr2vr.w  vr6,      t0
213*c0909341SAndroid Build Coastguard Worker    vreplgr2vr.w  vr7,      t1
214*c0909341SAndroid Build Coastguard Worker    ld.h          t0,       a3,       12
215*c0909341SAndroid Build Coastguard Worker    vreplgr2vr.w  vr8,      t0
216*c0909341SAndroid Build Coastguard Worker
217*c0909341SAndroid Build Coastguard Worker    andi          t1,       a4,       0xf
218*c0909341SAndroid Build Coastguard Worker    sub.w         t0,       a4,       t1    // w-w%16
219*c0909341SAndroid Build Coastguard Worker    or            t2,       zero,     zero  // j
220*c0909341SAndroid Build Coastguard Worker    or            t4,       zero,     zero
221*c0909341SAndroid Build Coastguard Worker    beqz          t0,       .WIENER_FILTER_V_W_LT16
222*c0909341SAndroid Build Coastguard Worker
223*c0909341SAndroid Build Coastguard Worker.WIENER_FILTER_V_H:
224*c0909341SAndroid Build Coastguard Worker    andi          t1,       a4,       0xf
225*c0909341SAndroid Build Coastguard Worker    add.d         t3,       zero,     a0     // p
226*c0909341SAndroid Build Coastguard Worker    or            t4,       zero,     zero   // i
227*c0909341SAndroid Build Coastguard Worker
228*c0909341SAndroid Build Coastguard Worker.WIENER_FILTER_V_W:
229*c0909341SAndroid Build Coastguard Worker
230*c0909341SAndroid Build Coastguard Worker    wiener_filter_v_8bpc_core_lsx
231*c0909341SAndroid Build Coastguard Worker
232*c0909341SAndroid Build Coastguard Worker    mul.w         t5,       t2,       a1   // j * stride
233*c0909341SAndroid Build Coastguard Worker    add.w         t5,       t5,       t4   // j * stride + i
234*c0909341SAndroid Build Coastguard Worker    add.d         t3,       a0,       t5
235*c0909341SAndroid Build Coastguard Worker    addi.w        t4,       t4,       16
236*c0909341SAndroid Build Coastguard Worker    vst           vr17,     t3,       0
237*c0909341SAndroid Build Coastguard Worker    bne           t0,       t4,       .WIENER_FILTER_V_W
238*c0909341SAndroid Build Coastguard Worker
239*c0909341SAndroid Build Coastguard Worker    beqz          t1,       .WIENER_FILTER_V_W_EQ16
240*c0909341SAndroid Build Coastguard Worker
241*c0909341SAndroid Build Coastguard Worker    wiener_filter_v_8bpc_core_lsx
242*c0909341SAndroid Build Coastguard Worker
243*c0909341SAndroid Build Coastguard Worker    addi.d        t3,       t3,       16
244*c0909341SAndroid Build Coastguard Worker    andi          t1,       a4,       0xf
245*c0909341SAndroid Build Coastguard Worker
246*c0909341SAndroid Build Coastguard Worker.WIENER_FILTER_V_ST_REM:
247*c0909341SAndroid Build Coastguard Worker    vstelm.b      vr17,     t3,       0,    0
248*c0909341SAndroid Build Coastguard Worker    vbsrl.v       vr17,     vr17,     1
249*c0909341SAndroid Build Coastguard Worker    addi.d        t3,       t3,       1
250*c0909341SAndroid Build Coastguard Worker    addi.w        t1,       t1,       -1
251*c0909341SAndroid Build Coastguard Worker    bnez          t1,       .WIENER_FILTER_V_ST_REM
252*c0909341SAndroid Build Coastguard Worker.WIENER_FILTER_V_W_EQ16:
253*c0909341SAndroid Build Coastguard Worker    addi.w        t2,       t2,       1
254*c0909341SAndroid Build Coastguard Worker    blt           t2,       a5,       .WIENER_FILTER_V_H
255*c0909341SAndroid Build Coastguard Worker    b              .WIENER_FILTER_V_END
256*c0909341SAndroid Build Coastguard Worker
257*c0909341SAndroid Build Coastguard Worker.WIENER_FILTER_V_W_LT16:
258*c0909341SAndroid Build Coastguard Worker    andi          t1,       a4,       0xf
259*c0909341SAndroid Build Coastguard Worker    add.d         t3,       zero,     a0
260*c0909341SAndroid Build Coastguard Worker
261*c0909341SAndroid Build Coastguard Worker    wiener_filter_v_8bpc_core_lsx
262*c0909341SAndroid Build Coastguard Worker
263*c0909341SAndroid Build Coastguard Worker    mul.w         t5,       t2,       a1   // j * stride
264*c0909341SAndroid Build Coastguard Worker    add.d         t3,       a0,       t5
265*c0909341SAndroid Build Coastguard Worker
266*c0909341SAndroid Build Coastguard Worker.WIENER_FILTER_V_ST_REM_1:
267*c0909341SAndroid Build Coastguard Worker    vstelm.b      vr17,     t3,       0,    0
268*c0909341SAndroid Build Coastguard Worker    vbsrl.v       vr17,     vr17,     1
269*c0909341SAndroid Build Coastguard Worker    addi.d        t3,       t3,       1
270*c0909341SAndroid Build Coastguard Worker    addi.w        t1,       t1,       -1
271*c0909341SAndroid Build Coastguard Worker    bnez          t1,       .WIENER_FILTER_V_ST_REM_1
272*c0909341SAndroid Build Coastguard Worker
273*c0909341SAndroid Build Coastguard Worker    addi.w        t2,       t2,       1
274*c0909341SAndroid Build Coastguard Worker    blt           t2,       a5,       .WIENER_FILTER_V_W_LT16
275*c0909341SAndroid Build Coastguard Worker
276*c0909341SAndroid Build Coastguard Worker.WIENER_FILTER_V_END:
277*c0909341SAndroid Build Coastguard Workerendfunc
278*c0909341SAndroid Build Coastguard Worker
279*c0909341SAndroid Build Coastguard Worker/*
280*c0909341SAndroid Build Coastguard Workervoid boxsum3_h(int32_t *sumsq, coef *sum, const pixel *src,
281*c0909341SAndroid Build Coastguard Worker               const int w, const int h)
282*c0909341SAndroid Build Coastguard Worker*/
283*c0909341SAndroid Build Coastguard Workerfunction boxsum3_h_8bpc_lsx
284*c0909341SAndroid Build Coastguard Worker    addi.d         a2,      a2,      REST_UNIT_STRIDE
285*c0909341SAndroid Build Coastguard Worker    li.w           t0,      1
286*c0909341SAndroid Build Coastguard Worker    addi.w         a3,      a3,      -2
287*c0909341SAndroid Build Coastguard Worker    addi.w         a4,      a4,      -4
288*c0909341SAndroid Build Coastguard Worker
289*c0909341SAndroid Build Coastguard Worker.LBS3_H_H:
290*c0909341SAndroid Build Coastguard Worker    alsl.d         t1,      t0,      a1,    1     // sum_v    *sum_v = sum + x
291*c0909341SAndroid Build Coastguard Worker    alsl.d         t2,      t0,      a0,    2     // sumsq_v  *sumsq_v = sumsq + x
292*c0909341SAndroid Build Coastguard Worker    add.d          t3,      t0,      a2           // s
293*c0909341SAndroid Build Coastguard Worker    addi.w         t5,      a3,      0
294*c0909341SAndroid Build Coastguard Worker.LBS3_H_W:
295*c0909341SAndroid Build Coastguard Worker    vld            vr0,     t3,      0
296*c0909341SAndroid Build Coastguard Worker    vld            vr1,     t3,      REST_UNIT_STRIDE
297*c0909341SAndroid Build Coastguard Worker    vld            vr2,     t3,      (REST_UNIT_STRIDE<<1)
298*c0909341SAndroid Build Coastguard Worker
299*c0909341SAndroid Build Coastguard Worker    vilvl.b        vr3,     vr1,     vr0
300*c0909341SAndroid Build Coastguard Worker    vhaddw.hu.bu   vr4,     vr3,     vr3
301*c0909341SAndroid Build Coastguard Worker    vilvh.b        vr5,     vr1,     vr0
302*c0909341SAndroid Build Coastguard Worker    vhaddw.hu.bu   vr6,     vr5,     vr5
303*c0909341SAndroid Build Coastguard Worker    vsllwil.hu.bu  vr7,     vr2,     0
304*c0909341SAndroid Build Coastguard Worker    vexth.hu.bu    vr8,     vr2
305*c0909341SAndroid Build Coastguard Worker    // sum_v
306*c0909341SAndroid Build Coastguard Worker    vadd.h         vr4,     vr4,     vr7
307*c0909341SAndroid Build Coastguard Worker    vadd.h         vr6,     vr6,     vr8
308*c0909341SAndroid Build Coastguard Worker    vst            vr4,     t1,      REST_UNIT_STRIDE<<1
309*c0909341SAndroid Build Coastguard Worker    vst            vr6,     t1,      (REST_UNIT_STRIDE<<1)+16
310*c0909341SAndroid Build Coastguard Worker    addi.d         t1,      t1,      32
311*c0909341SAndroid Build Coastguard Worker    // sumsq
312*c0909341SAndroid Build Coastguard Worker    vmulwev.h.bu   vr9,     vr3,     vr3
313*c0909341SAndroid Build Coastguard Worker    vmulwod.h.bu   vr10,    vr3,     vr3
314*c0909341SAndroid Build Coastguard Worker    vmulwev.h.bu   vr11,    vr5,     vr5
315*c0909341SAndroid Build Coastguard Worker    vmulwod.h.bu   vr12,    vr5,     vr5
316*c0909341SAndroid Build Coastguard Worker    vaddwev.w.hu   vr13,    vr10,    vr9
317*c0909341SAndroid Build Coastguard Worker    vaddwod.w.hu   vr14,    vr10,    vr9
318*c0909341SAndroid Build Coastguard Worker    vaddwev.w.hu   vr15,    vr12,    vr11
319*c0909341SAndroid Build Coastguard Worker    vaddwod.w.hu   vr16,    vr12,    vr11
320*c0909341SAndroid Build Coastguard Worker    vmaddwev.w.hu  vr13,    vr7,     vr7
321*c0909341SAndroid Build Coastguard Worker    vmaddwod.w.hu  vr14,    vr7,     vr7
322*c0909341SAndroid Build Coastguard Worker    vmaddwev.w.hu  vr15,    vr8,     vr8
323*c0909341SAndroid Build Coastguard Worker    vmaddwod.w.hu  vr16,    vr8,     vr8
324*c0909341SAndroid Build Coastguard Worker    vilvl.w        vr9,     vr14,    vr13
325*c0909341SAndroid Build Coastguard Worker    vilvh.w        vr10,    vr14,    vr13
326*c0909341SAndroid Build Coastguard Worker    vilvl.w        vr11,    vr16,    vr15
327*c0909341SAndroid Build Coastguard Worker    vilvh.w        vr12,    vr16,    vr15
328*c0909341SAndroid Build Coastguard Worker    vst            vr9,     t2,      REST_UNIT_STRIDE<<2
329*c0909341SAndroid Build Coastguard Worker    vst            vr10,    t2,      (REST_UNIT_STRIDE<<2)+16
330*c0909341SAndroid Build Coastguard Worker    vst            vr11,    t2,      (REST_UNIT_STRIDE<<2)+32
331*c0909341SAndroid Build Coastguard Worker    vst            vr12,    t2,      (REST_UNIT_STRIDE<<2)+48
332*c0909341SAndroid Build Coastguard Worker
333*c0909341SAndroid Build Coastguard Worker    addi.d         t2,      t2,      64
334*c0909341SAndroid Build Coastguard Worker    addi.w         t5,      t5,      -16
335*c0909341SAndroid Build Coastguard Worker    addi.d         t3,      t3,      16
336*c0909341SAndroid Build Coastguard Worker    blt            zero,    t5,      .LBS3_H_W
337*c0909341SAndroid Build Coastguard Worker
338*c0909341SAndroid Build Coastguard Worker    addi.d         a0,      a0,      REST_UNIT_STRIDE<<2
339*c0909341SAndroid Build Coastguard Worker    addi.d         a1,      a1,      REST_UNIT_STRIDE<<1
340*c0909341SAndroid Build Coastguard Worker    addi.d         a2,      a2,      REST_UNIT_STRIDE
341*c0909341SAndroid Build Coastguard Worker    addi.d         a4,      a4,      -1
342*c0909341SAndroid Build Coastguard Worker    blt            zero,    a4,      .LBS3_H_H
343*c0909341SAndroid Build Coastguard Workerendfunc
344*c0909341SAndroid Build Coastguard Worker
345*c0909341SAndroid Build Coastguard Worker/*
346*c0909341SAndroid Build Coastguard Workervoid boxsum3_v(int32_t *sumsq, coef *sum,
347*c0909341SAndroid Build Coastguard Worker               const int w, const int h)
348*c0909341SAndroid Build Coastguard Worker*/
349*c0909341SAndroid Build Coastguard Workerfunction boxsum3_v_8bpc_lsx
350*c0909341SAndroid Build Coastguard Worker    addi.d         a0,      a0,      (REST_UNIT_STRIDE<<2)
351*c0909341SAndroid Build Coastguard Worker    addi.d         a1,      a1,      (REST_UNIT_STRIDE<<1)
352*c0909341SAndroid Build Coastguard Worker    addi.w         a3,      a3,      -4
353*c0909341SAndroid Build Coastguard Worker    addi.w         a2,      a2,      -4
354*c0909341SAndroid Build Coastguard Worker
355*c0909341SAndroid Build Coastguard Worker.LBS3_V_H:
356*c0909341SAndroid Build Coastguard Worker    sub.w          t3,      a2,      zero
357*c0909341SAndroid Build Coastguard Worker    addi.d         t0,      a0,      4
358*c0909341SAndroid Build Coastguard Worker    addi.d         t1,      a1,      2
359*c0909341SAndroid Build Coastguard Worker    addi.d         t5,      a0,      8
360*c0909341SAndroid Build Coastguard Worker    addi.d         t6,      a1,      4
361*c0909341SAndroid Build Coastguard Worker
362*c0909341SAndroid Build Coastguard Worker    vld            vr0,      t1,      0   // a 0 1 2 3 4 5 6 7
363*c0909341SAndroid Build Coastguard Worker    vld            vr1,      t1,      2   // b 1 2 3 4 5 6 7 8
364*c0909341SAndroid Build Coastguard Worker    vld            vr2,      t1,      4   // c 2 3 4 5 6 7 8 9
365*c0909341SAndroid Build Coastguard Worker    vld            vr3,      t0,      0   // a2 0 1 2 3
366*c0909341SAndroid Build Coastguard Worker    vld            vr4,      t0,      4   // b2 1 2 3 4
367*c0909341SAndroid Build Coastguard Worker    vld            vr5,      t0,      8   // c2 2 3 4 5
368*c0909341SAndroid Build Coastguard Worker    vld            vr6,      t0,      16  //    3 4 5 6
369*c0909341SAndroid Build Coastguard Worker    vld            vr7,      t0,      20  //    4 5 6 7
370*c0909341SAndroid Build Coastguard Worker    vld            vr8,      t0,      24  //    5 6 7 8
371*c0909341SAndroid Build Coastguard Worker    vadd.h         vr9,      vr0,     vr1
372*c0909341SAndroid Build Coastguard Worker    vadd.w         vr10,     vr3,     vr4
373*c0909341SAndroid Build Coastguard Worker    vadd.w         vr11,     vr6,     vr7
374*c0909341SAndroid Build Coastguard Worker    vadd.h         vr9,      vr9,     vr2
375*c0909341SAndroid Build Coastguard Worker    vadd.w         vr10,     vr10,    vr5
376*c0909341SAndroid Build Coastguard Worker    vadd.w         vr11,     vr11,    vr8
377*c0909341SAndroid Build Coastguard Worker    vpickve2gr.h   t7,       vr2,     6
378*c0909341SAndroid Build Coastguard Worker    vpickve2gr.w   t8,       vr8,     2
379*c0909341SAndroid Build Coastguard Worker    vst            vr9,      t6,      0
380*c0909341SAndroid Build Coastguard Worker    vst            vr10,     t5,      0
381*c0909341SAndroid Build Coastguard Worker    vst            vr11,     t5,      16
382*c0909341SAndroid Build Coastguard Worker
383*c0909341SAndroid Build Coastguard Worker    addi.d         t1,       t1,      16
384*c0909341SAndroid Build Coastguard Worker    addi.d         t0,       t0,      32
385*c0909341SAndroid Build Coastguard Worker    addi.d         t5,       t5,      32
386*c0909341SAndroid Build Coastguard Worker    addi.d         t6,       t6,      16
387*c0909341SAndroid Build Coastguard Worker    addi.d         t3,       t3,      -8
388*c0909341SAndroid Build Coastguard Worker    ble            t3,       zero,    .LBS3_V_H0
389*c0909341SAndroid Build Coastguard Worker
390*c0909341SAndroid Build Coastguard Worker.LBS3_V_W8:
391*c0909341SAndroid Build Coastguard Worker    vld            vr0,      t1,      0   // a 0 1 2 3 4 5 6 7
392*c0909341SAndroid Build Coastguard Worker    vld            vr1,      t1,      2   // b 1 2 3 4 5 6 7 8
393*c0909341SAndroid Build Coastguard Worker    vld            vr2,      t1,      4   // c 2 3 4 5 6 7 8 9
394*c0909341SAndroid Build Coastguard Worker    vld            vr3,      t0,      0   // a2 0 1 2 3
395*c0909341SAndroid Build Coastguard Worker    vld            vr4,      t0,      4   // b2 1 2 3 4
396*c0909341SAndroid Build Coastguard Worker    vld            vr5,      t0,      8   // c2 2 3 4 5
397*c0909341SAndroid Build Coastguard Worker    vld            vr6,      t0,      16  //    3 4 5 6
398*c0909341SAndroid Build Coastguard Worker    vld            vr7,      t0,      20  //    4 5 6 7
399*c0909341SAndroid Build Coastguard Worker    vld            vr8,      t0,      24  //    5 6 7 8
400*c0909341SAndroid Build Coastguard Worker    vinsgr2vr.h    vr0,      t7,      0
401*c0909341SAndroid Build Coastguard Worker    vinsgr2vr.w    vr3,      t8,      0
402*c0909341SAndroid Build Coastguard Worker    vpickve2gr.h   t7,       vr2,     6
403*c0909341SAndroid Build Coastguard Worker    vpickve2gr.w   t8,       vr8,     2
404*c0909341SAndroid Build Coastguard Worker    vadd.h         vr9,      vr0,     vr1
405*c0909341SAndroid Build Coastguard Worker    vadd.w         vr10,     vr3,     vr4
406*c0909341SAndroid Build Coastguard Worker    vadd.w         vr11,     vr6,     vr7
407*c0909341SAndroid Build Coastguard Worker    vadd.h         vr9,      vr9,     vr2
408*c0909341SAndroid Build Coastguard Worker    vadd.w         vr10,     vr10,    vr5
409*c0909341SAndroid Build Coastguard Worker    vadd.w         vr11,     vr11,    vr8
410*c0909341SAndroid Build Coastguard Worker    vst            vr9,      t6,      0
411*c0909341SAndroid Build Coastguard Worker    vst            vr10,     t5,      0
412*c0909341SAndroid Build Coastguard Worker    vst            vr11,     t5,      16
413*c0909341SAndroid Build Coastguard Worker    addi.d         t3,       t3,      -8
414*c0909341SAndroid Build Coastguard Worker    addi.d         t1,       t1,      16
415*c0909341SAndroid Build Coastguard Worker    addi.d         t0,       t0,      32
416*c0909341SAndroid Build Coastguard Worker    addi.d         t5,       t5,      32
417*c0909341SAndroid Build Coastguard Worker    addi.d         t6,       t6,      16
418*c0909341SAndroid Build Coastguard Worker    blt            zero,     t3,      .LBS3_V_W8
419*c0909341SAndroid Build Coastguard Worker
420*c0909341SAndroid Build Coastguard Worker.LBS3_V_H0:
421*c0909341SAndroid Build Coastguard Worker    addi.d         a1,       a1,      REST_UNIT_STRIDE<<1
422*c0909341SAndroid Build Coastguard Worker    addi.d         a0,       a0,      REST_UNIT_STRIDE<<2
423*c0909341SAndroid Build Coastguard Worker    addi.w         a3,       a3,      -1
424*c0909341SAndroid Build Coastguard Worker    bnez           a3,       .LBS3_V_H
425*c0909341SAndroid Build Coastguard Workerendfunc
426*c0909341SAndroid Build Coastguard Worker
427*c0909341SAndroid Build Coastguard Worker/*
428*c0909341SAndroid Build Coastguard Workerboxsum3_selfguided_filter(int32_t *sumsq, coef *sum,
429*c0909341SAndroid Build Coastguard Worker                          const int w, const int h,
430*c0909341SAndroid Build Coastguard Worker                          const unsigned s)
431*c0909341SAndroid Build Coastguard Worker*/
432*c0909341SAndroid Build Coastguard Workerfunction boxsum3_sgf_h_8bpc_lsx
433*c0909341SAndroid Build Coastguard Worker    addi.d        a0,       a0,        REST_UNIT_STRIDE<<2
434*c0909341SAndroid Build Coastguard Worker    addi.d        a0,       a0,        12   // AA
435*c0909341SAndroid Build Coastguard Worker    addi.d        a1,       a1,        REST_UNIT_STRIDE<<1
436*c0909341SAndroid Build Coastguard Worker    addi.d        a1,       a1,        6    // BB
437*c0909341SAndroid Build Coastguard Worker    la.local      t8,       dav1d_sgr_x_by_x
438*c0909341SAndroid Build Coastguard Worker    li.w          t6,       455
439*c0909341SAndroid Build Coastguard Worker    vreplgr2vr.w  vr20,     t6
440*c0909341SAndroid Build Coastguard Worker    li.w          t6,       255
441*c0909341SAndroid Build Coastguard Worker    vreplgr2vr.w  vr22,     t6
442*c0909341SAndroid Build Coastguard Worker    vaddi.wu      vr21,     vr22,      1  // 256
443*c0909341SAndroid Build Coastguard Worker    vreplgr2vr.w  vr6,      a4
444*c0909341SAndroid Build Coastguard Worker    vldi          vr19,     0x809
445*c0909341SAndroid Build Coastguard Worker    addi.w        a2,       a2,        2  // w + 2
446*c0909341SAndroid Build Coastguard Worker    addi.w        a3,       a3,        2  // h + 2
447*c0909341SAndroid Build Coastguard Worker
448*c0909341SAndroid Build Coastguard Worker.LBS3SGF_H_H:
449*c0909341SAndroid Build Coastguard Worker    addi.w        t2,       a2,        0
450*c0909341SAndroid Build Coastguard Worker    addi.d        t0,       a0,        -4
451*c0909341SAndroid Build Coastguard Worker    addi.d        t1,       a1,        -2
452*c0909341SAndroid Build Coastguard Worker
453*c0909341SAndroid Build Coastguard Worker.LBS3SGF_H_W:
454*c0909341SAndroid Build Coastguard Worker    addi.w        t2,       t2,        -8
455*c0909341SAndroid Build Coastguard Worker    vld           vr0,      t0,        0   // AA[i]
456*c0909341SAndroid Build Coastguard Worker    vld           vr1,      t0,        16
457*c0909341SAndroid Build Coastguard Worker    vld           vr2,      t1,        0   // BB[i]
458*c0909341SAndroid Build Coastguard Worker
459*c0909341SAndroid Build Coastguard Worker    vmul.w        vr4,      vr0,       vr19 // a * n
460*c0909341SAndroid Build Coastguard Worker    vmul.w        vr5,      vr1,       vr19 // a * n
461*c0909341SAndroid Build Coastguard Worker    vsllwil.w.h   vr9,      vr2,       0
462*c0909341SAndroid Build Coastguard Worker    vexth.w.h     vr10,     vr2
463*c0909341SAndroid Build Coastguard Worker    vmsub.w       vr4,      vr9,       vr9   // p
464*c0909341SAndroid Build Coastguard Worker    vmsub.w       vr5,      vr10,      vr10   // p
465*c0909341SAndroid Build Coastguard Worker    vmaxi.w       vr4,      vr4,       0
466*c0909341SAndroid Build Coastguard Worker    vmaxi.w       vr5,      vr5,       0    // p
467*c0909341SAndroid Build Coastguard Worker    vmul.w        vr4,      vr4,       vr6  // p * s
468*c0909341SAndroid Build Coastguard Worker    vmul.w        vr5,      vr5,       vr6  // p * s
469*c0909341SAndroid Build Coastguard Worker    vsrlri.w      vr4,      vr4,       20
470*c0909341SAndroid Build Coastguard Worker    vsrlri.w      vr5,      vr5,       20   // z
471*c0909341SAndroid Build Coastguard Worker    vmin.w        vr4,      vr4,       vr22
472*c0909341SAndroid Build Coastguard Worker    vmin.w        vr5,      vr5,       vr22
473*c0909341SAndroid Build Coastguard Worker
474*c0909341SAndroid Build Coastguard Worker    vpickve2gr.w  t6,       vr4,       0
475*c0909341SAndroid Build Coastguard Worker    ldx.bu        t7,       t8,        t6
476*c0909341SAndroid Build Coastguard Worker    vinsgr2vr.w   vr7,      t7,        0
477*c0909341SAndroid Build Coastguard Worker    vpickve2gr.w  t6,       vr4,       1
478*c0909341SAndroid Build Coastguard Worker    ldx.bu        t7,       t8,        t6
479*c0909341SAndroid Build Coastguard Worker    vinsgr2vr.w   vr7,      t7,        1
480*c0909341SAndroid Build Coastguard Worker    vpickve2gr.w  t6,       vr4,       2
481*c0909341SAndroid Build Coastguard Worker    ldx.bu        t7,       t8,        t6
482*c0909341SAndroid Build Coastguard Worker    vinsgr2vr.w   vr7,      t7,        2
483*c0909341SAndroid Build Coastguard Worker    vpickve2gr.w  t6,       vr4,       3
484*c0909341SAndroid Build Coastguard Worker    ldx.bu        t7,       t8,        t6
485*c0909341SAndroid Build Coastguard Worker    vinsgr2vr.w   vr7,      t7,        3
486*c0909341SAndroid Build Coastguard Worker
487*c0909341SAndroid Build Coastguard Worker    vpickve2gr.w  t6,       vr5,       0
488*c0909341SAndroid Build Coastguard Worker    ldx.bu        t7,       t8,        t6
489*c0909341SAndroid Build Coastguard Worker    vinsgr2vr.w   vr8,      t7,        0
490*c0909341SAndroid Build Coastguard Worker    vpickve2gr.w  t6,       vr5,       1
491*c0909341SAndroid Build Coastguard Worker    ldx.bu        t7,       t8,        t6
492*c0909341SAndroid Build Coastguard Worker    vinsgr2vr.w   vr8,      t7,        1
493*c0909341SAndroid Build Coastguard Worker    vpickve2gr.w  t6,       vr5,       2
494*c0909341SAndroid Build Coastguard Worker    ldx.bu        t7,       t8,        t6
495*c0909341SAndroid Build Coastguard Worker    vinsgr2vr.w   vr8,      t7,        2
496*c0909341SAndroid Build Coastguard Worker    vpickve2gr.w  t6,       vr5,       3
497*c0909341SAndroid Build Coastguard Worker    ldx.bu        t7,       t8,        t6
498*c0909341SAndroid Build Coastguard Worker    vinsgr2vr.w   vr8,      t7,        3     // x
499*c0909341SAndroid Build Coastguard Worker
500*c0909341SAndroid Build Coastguard Worker    vmul.w        vr9,      vr7,       vr9   // x * BB[i]
501*c0909341SAndroid Build Coastguard Worker    vmul.w        vr10,     vr8,       vr10
502*c0909341SAndroid Build Coastguard Worker    vmul.w        vr9,      vr9,       vr20  // x * BB[i] * sgr_one_by_x
503*c0909341SAndroid Build Coastguard Worker    vmul.w        vr10,     vr10,      vr20
504*c0909341SAndroid Build Coastguard Worker    vsrlri.w      vr9,      vr9,       12
505*c0909341SAndroid Build Coastguard Worker    vsrlri.w      vr10,     vr10,      12
506*c0909341SAndroid Build Coastguard Worker    vsub.w        vr7,      vr21,      vr7
507*c0909341SAndroid Build Coastguard Worker    vsub.w        vr8,      vr21,      vr8
508*c0909341SAndroid Build Coastguard Worker    vpickev.h     vr8,      vr8,       vr7
509*c0909341SAndroid Build Coastguard Worker
510*c0909341SAndroid Build Coastguard Worker    vst           vr9,      t0,        0
511*c0909341SAndroid Build Coastguard Worker    vst           vr10,     t0,        16
512*c0909341SAndroid Build Coastguard Worker    vst           vr8,      t1,        0
513*c0909341SAndroid Build Coastguard Worker    addi.d        t0,       t0,        32
514*c0909341SAndroid Build Coastguard Worker    addi.d        t1,       t1,        16
515*c0909341SAndroid Build Coastguard Worker    blt           zero,     t2,        .LBS3SGF_H_W
516*c0909341SAndroid Build Coastguard Worker
517*c0909341SAndroid Build Coastguard Worker    addi.d        a0,       a0,        REST_UNIT_STRIDE<<2
518*c0909341SAndroid Build Coastguard Worker    addi.d        a1,       a1,        REST_UNIT_STRIDE<<1
519*c0909341SAndroid Build Coastguard Worker    addi.w        a3,       a3,        -1
520*c0909341SAndroid Build Coastguard Worker    bnez          a3,       .LBS3SGF_H_H
521*c0909341SAndroid Build Coastguard Workerendfunc
522*c0909341SAndroid Build Coastguard Worker
523*c0909341SAndroid Build Coastguard Worker/*
524*c0909341SAndroid Build Coastguard Workerboxsum3_selfguided_filter(coef *dst, pixel *src,
525*c0909341SAndroid Build Coastguard Worker                  int32_t *sumsq, coef *sum,
526*c0909341SAndroid Build Coastguard Worker                  const int w, const int h)
527*c0909341SAndroid Build Coastguard Worker*/
528*c0909341SAndroid Build Coastguard Workerfunction boxsum3_sgf_v_8bpc_lsx
529*c0909341SAndroid Build Coastguard Worker    addi.d        a1,        a1,      (3*REST_UNIT_STRIDE+3)   // src
530*c0909341SAndroid Build Coastguard Worker    addi.d        a2,        a2,      REST_UNIT_STRIDE<<2
531*c0909341SAndroid Build Coastguard Worker    addi.d        a2,        a2,      (REST_UNIT_STRIDE<<2)+12
532*c0909341SAndroid Build Coastguard Worker    addi.d        a3,        a3,      REST_UNIT_STRIDE<<2
533*c0909341SAndroid Build Coastguard Worker    addi.d        a3,        a3,      6
534*c0909341SAndroid Build Coastguard Worker.LBS3SGF_V_H:
535*c0909341SAndroid Build Coastguard Worker    // A int32_t *sumsq
536*c0909341SAndroid Build Coastguard Worker    addi.d        t0,        a2,      -(REST_UNIT_STRIDE<<2)   // -stride
537*c0909341SAndroid Build Coastguard Worker    addi.d        t1,        a2,      0    // sumsq
538*c0909341SAndroid Build Coastguard Worker    addi.d        t2,        a2,      REST_UNIT_STRIDE<<2      // +stride
539*c0909341SAndroid Build Coastguard Worker    addi.d        t6,        a1,      0
540*c0909341SAndroid Build Coastguard Worker    addi.w        t7,        a4,      0
541*c0909341SAndroid Build Coastguard Worker    addi.d        t8,        a0,      0
542*c0909341SAndroid Build Coastguard Worker    // B coef *sum
543*c0909341SAndroid Build Coastguard Worker    addi.d        t3,        a3,      -(REST_UNIT_STRIDE<<1)   // -stride
544*c0909341SAndroid Build Coastguard Worker    addi.d        t4,        a3,      0
545*c0909341SAndroid Build Coastguard Worker    addi.d        t5,        a3,      REST_UNIT_STRIDE<<1
546*c0909341SAndroid Build Coastguard Worker
547*c0909341SAndroid Build Coastguard Worker.LBS3SGF_V_W:
548*c0909341SAndroid Build Coastguard Worker    vld           vr0,       t0,      0   // P[i - REST_UNIT_STRIDE]
549*c0909341SAndroid Build Coastguard Worker    vld           vr1,       t0,      16
550*c0909341SAndroid Build Coastguard Worker    vld           vr2,       t1,      -4  // P[i-1]  -1 0 1 2
551*c0909341SAndroid Build Coastguard Worker    vld           vr3,       t1,      12           // 3 4 5 6
552*c0909341SAndroid Build Coastguard Worker    vld           vr4,       t2,      0   // P[i + REST_UNIT_STRIDE]
553*c0909341SAndroid Build Coastguard Worker    vld           vr5,       t2,      16
554*c0909341SAndroid Build Coastguard Worker    vld           vr6,       t1,      0   // p[i]     0 1 2 3
555*c0909341SAndroid Build Coastguard Worker    vld           vr7,       t1,      16           // 4 5 6 7
556*c0909341SAndroid Build Coastguard Worker    vld           vr8,       t1,      4   // p[i+1]   1 2 3 4
557*c0909341SAndroid Build Coastguard Worker    vld           vr9,       t1,      20           // 5 6 7 8
558*c0909341SAndroid Build Coastguard Worker
559*c0909341SAndroid Build Coastguard Worker    vld           vr10,      t0,      -4  // P[i - 1 - REST_UNIT_STRIDE]
560*c0909341SAndroid Build Coastguard Worker    vld           vr11,      t0,      12
561*c0909341SAndroid Build Coastguard Worker    vld           vr12,      t2,      -4  // P[i - 1 + REST_UNIT_STRIDE]
562*c0909341SAndroid Build Coastguard Worker    vld           vr13,      t2,      12
563*c0909341SAndroid Build Coastguard Worker    vld           vr14,      t0,      4   // P[i + 1 - REST_UNIT_STRIDE]
564*c0909341SAndroid Build Coastguard Worker    vld           vr15,      t0,      20
565*c0909341SAndroid Build Coastguard Worker    vld           vr16,      t2,      4   // P[i + 1 + REST_UNIT_STRIDE]
566*c0909341SAndroid Build Coastguard Worker    vld           vr17,      t2,      20
567*c0909341SAndroid Build Coastguard Worker
568*c0909341SAndroid Build Coastguard Worker    vadd.w        vr0,       vr2,     vr0
569*c0909341SAndroid Build Coastguard Worker    vadd.w        vr4,       vr6,     vr4
570*c0909341SAndroid Build Coastguard Worker    vadd.w        vr0,       vr0,     vr8
571*c0909341SAndroid Build Coastguard Worker    vadd.w        vr20,      vr0,     vr4
572*c0909341SAndroid Build Coastguard Worker    vslli.w       vr20,      vr20,    2      // 0 1 2 3
573*c0909341SAndroid Build Coastguard Worker    vadd.w        vr0,       vr1,     vr3
574*c0909341SAndroid Build Coastguard Worker    vadd.w        vr4,       vr5,     vr7
575*c0909341SAndroid Build Coastguard Worker    vadd.w        vr0,       vr0,     vr9
576*c0909341SAndroid Build Coastguard Worker    vadd.w        vr21,      vr0,     vr4
577*c0909341SAndroid Build Coastguard Worker    vslli.w       vr21,      vr21,    2      // 4 5 6 7
578*c0909341SAndroid Build Coastguard Worker    vadd.w        vr12,      vr10,    vr12
579*c0909341SAndroid Build Coastguard Worker    vadd.w        vr16,      vr14,    vr16
580*c0909341SAndroid Build Coastguard Worker    vadd.w        vr22,      vr12,    vr16
581*c0909341SAndroid Build Coastguard Worker    vslli.w       vr23,      vr22,    1
582*c0909341SAndroid Build Coastguard Worker    vadd.w        vr22,      vr23,    vr22
583*c0909341SAndroid Build Coastguard Worker    vadd.w        vr11,      vr11,    vr13
584*c0909341SAndroid Build Coastguard Worker    vadd.w        vr15,      vr15,    vr17
585*c0909341SAndroid Build Coastguard Worker    vadd.w        vr0,       vr11,    vr15
586*c0909341SAndroid Build Coastguard Worker    vslli.w       vr23,      vr0,     1
587*c0909341SAndroid Build Coastguard Worker    vadd.w        vr23,      vr23,    vr0
588*c0909341SAndroid Build Coastguard Worker    vadd.w        vr20,      vr20,    vr22   // b
589*c0909341SAndroid Build Coastguard Worker    vadd.w        vr21,      vr21,    vr23
590*c0909341SAndroid Build Coastguard Worker
591*c0909341SAndroid Build Coastguard Worker    // B coef *sum
592*c0909341SAndroid Build Coastguard Worker    vld           vr0,       t3,      0   // P[i - REST_UNIT_STRIDE]
593*c0909341SAndroid Build Coastguard Worker    vld           vr1,       t4,      -2  // p[i - 1]
594*c0909341SAndroid Build Coastguard Worker    vld           vr2,       t4,      0   // p[i]
595*c0909341SAndroid Build Coastguard Worker    vld           vr3,       t4,      2   // p[i + 1]
596*c0909341SAndroid Build Coastguard Worker    vld           vr4,       t5,      0   // P[i + REST_UNIT_STRIDE]
597*c0909341SAndroid Build Coastguard Worker    vld           vr5,       t3,      -2  // P[i - 1 - REST_UNIT_STRIDE]
598*c0909341SAndroid Build Coastguard Worker    vld           vr6,       t5,      -2  // P[i - 1 + REST_UNIT_STRIDE]
599*c0909341SAndroid Build Coastguard Worker    vld           vr7,       t3,      2   // P[i + 1 - REST_UNIT_STRIDE]
600*c0909341SAndroid Build Coastguard Worker    vld           vr8,       t5,      2   // P[i + 1 + REST_UNIT_STRIDE]
601*c0909341SAndroid Build Coastguard Worker    vaddwev.w.h   vr9,       vr0,     vr1
602*c0909341SAndroid Build Coastguard Worker    vaddwod.w.h   vr10,      vr0,     vr1
603*c0909341SAndroid Build Coastguard Worker    vaddwev.w.h   vr11,      vr2,     vr3
604*c0909341SAndroid Build Coastguard Worker    vaddwod.w.h   vr12,      vr2,     vr3
605*c0909341SAndroid Build Coastguard Worker    vadd.w        vr9,       vr11,    vr9
606*c0909341SAndroid Build Coastguard Worker    vadd.w        vr10,      vr12,    vr10
607*c0909341SAndroid Build Coastguard Worker    vilvl.w       vr11,      vr10,    vr9    // 0 1 2 3
608*c0909341SAndroid Build Coastguard Worker    vilvh.w       vr12,      vr10,    vr9    // 4 5 6 7
609*c0909341SAndroid Build Coastguard Worker    vsllwil.w.h   vr0,       vr4,     0
610*c0909341SAndroid Build Coastguard Worker    vexth.w.h     vr1,       vr4
611*c0909341SAndroid Build Coastguard Worker    vadd.w        vr0,       vr11,    vr0
612*c0909341SAndroid Build Coastguard Worker    vadd.w        vr1,       vr12,    vr1
613*c0909341SAndroid Build Coastguard Worker    vslli.w       vr0,       vr0,     2
614*c0909341SAndroid Build Coastguard Worker    vslli.w       vr1,       vr1,     2
615*c0909341SAndroid Build Coastguard Worker    vaddwev.w.h   vr9,       vr5,     vr6
616*c0909341SAndroid Build Coastguard Worker    vaddwod.w.h   vr10,      vr5,     vr6
617*c0909341SAndroid Build Coastguard Worker    vaddwev.w.h   vr11,      vr7,     vr8
618*c0909341SAndroid Build Coastguard Worker    vaddwod.w.h   vr12,      vr7,     vr8
619*c0909341SAndroid Build Coastguard Worker    vadd.w        vr9,       vr11,    vr9
620*c0909341SAndroid Build Coastguard Worker    vadd.w        vr10,      vr12,    vr10
621*c0909341SAndroid Build Coastguard Worker    vilvl.w       vr13,      vr10,    vr9
622*c0909341SAndroid Build Coastguard Worker    vilvh.w       vr14,      vr10,    vr9
623*c0909341SAndroid Build Coastguard Worker    vslli.w       vr15,      vr13,    1
624*c0909341SAndroid Build Coastguard Worker    vslli.w       vr16,      vr14,    1
625*c0909341SAndroid Build Coastguard Worker    vadd.w        vr15,      vr13,    vr15   // a
626*c0909341SAndroid Build Coastguard Worker    vadd.w        vr16,      vr14,    vr16
627*c0909341SAndroid Build Coastguard Worker    vadd.w        vr22,      vr0,     vr15
628*c0909341SAndroid Build Coastguard Worker    vadd.w        vr23,      vr1,     vr16
629*c0909341SAndroid Build Coastguard Worker    vld           vr0,       t6,      0      // src
630*c0909341SAndroid Build Coastguard Worker    vsllwil.hu.bu vr0,       vr0,     0
631*c0909341SAndroid Build Coastguard Worker    vsllwil.wu.hu vr1,       vr0,     0
632*c0909341SAndroid Build Coastguard Worker    vexth.wu.hu   vr2,       vr0
633*c0909341SAndroid Build Coastguard Worker    vmadd.w       vr20,      vr22,    vr1
634*c0909341SAndroid Build Coastguard Worker    vmadd.w       vr21,      vr23,    vr2
635*c0909341SAndroid Build Coastguard Worker    vssrlrni.h.w  vr21,      vr20,    9
636*c0909341SAndroid Build Coastguard Worker    vst           vr21,      t8,      0
637*c0909341SAndroid Build Coastguard Worker    addi.d        t8,        t8,      16
638*c0909341SAndroid Build Coastguard Worker
639*c0909341SAndroid Build Coastguard Worker    addi.d        t0,        t0,      32
640*c0909341SAndroid Build Coastguard Worker    addi.d        t1,        t1,      32
641*c0909341SAndroid Build Coastguard Worker    addi.d        t2,        t2,      32
642*c0909341SAndroid Build Coastguard Worker    addi.d        t3,        t3,      16
643*c0909341SAndroid Build Coastguard Worker    addi.d        t4,        t4,      16
644*c0909341SAndroid Build Coastguard Worker    addi.d        t5,        t5,      16
645*c0909341SAndroid Build Coastguard Worker    addi.d        t6,        t6,      8
646*c0909341SAndroid Build Coastguard Worker    addi.w        t7,        t7,      -8
647*c0909341SAndroid Build Coastguard Worker    blt           zero,      t7,      .LBS3SGF_V_W
648*c0909341SAndroid Build Coastguard Worker
649*c0909341SAndroid Build Coastguard Worker    addi.w        a5,        a5,      -1
650*c0909341SAndroid Build Coastguard Worker    addi.d        a0,        a0,      384*2
651*c0909341SAndroid Build Coastguard Worker    addi.d        a1,        a1,      REST_UNIT_STRIDE
652*c0909341SAndroid Build Coastguard Worker    addi.d        a3,        a3,      REST_UNIT_STRIDE<<1
653*c0909341SAndroid Build Coastguard Worker    addi.d        a2,        a2,      REST_UNIT_STRIDE<<2
654*c0909341SAndroid Build Coastguard Worker    bnez          a5,        .LBS3SGF_V_H
655*c0909341SAndroid Build Coastguard Workerendfunc
656*c0909341SAndroid Build Coastguard Worker
657*c0909341SAndroid Build Coastguard Workerfunction boxsum3_sgf_v_8bpc_lasx
658*c0909341SAndroid Build Coastguard Worker    addi.d        a1,        a1,      (3*REST_UNIT_STRIDE+3)   // src
659*c0909341SAndroid Build Coastguard Worker    addi.d        a2,        a2,      REST_UNIT_STRIDE<<2
660*c0909341SAndroid Build Coastguard Worker    addi.d        a2,        a2,      (REST_UNIT_STRIDE<<2)+12
661*c0909341SAndroid Build Coastguard Worker    addi.d        a3,        a3,      REST_UNIT_STRIDE<<2
662*c0909341SAndroid Build Coastguard Worker    addi.d        a3,        a3,      6
663*c0909341SAndroid Build Coastguard Worker.LBS3SGF_V_H_LASX:
664*c0909341SAndroid Build Coastguard Worker    // A int32_t *sumsq
665*c0909341SAndroid Build Coastguard Worker    addi.d        t0,        a2,      -(REST_UNIT_STRIDE<<2)   // -stride
666*c0909341SAndroid Build Coastguard Worker    addi.d        t1,        a2,      0    // sumsq
667*c0909341SAndroid Build Coastguard Worker    addi.d        t2,        a2,      REST_UNIT_STRIDE<<2      // +stride
668*c0909341SAndroid Build Coastguard Worker    addi.d        t6,        a1,      0
669*c0909341SAndroid Build Coastguard Worker    addi.w        t7,        a4,      0
670*c0909341SAndroid Build Coastguard Worker    addi.d        t8,        a0,      0
671*c0909341SAndroid Build Coastguard Worker    // B coef *sum
672*c0909341SAndroid Build Coastguard Worker    addi.d        t3,        a3,      -(REST_UNIT_STRIDE<<1)   // -stride
673*c0909341SAndroid Build Coastguard Worker    addi.d        t4,        a3,      0
674*c0909341SAndroid Build Coastguard Worker    addi.d        t5,        a3,      REST_UNIT_STRIDE<<1
675*c0909341SAndroid Build Coastguard Worker
676*c0909341SAndroid Build Coastguard Worker.LBS3SGF_V_W_LASX:
677*c0909341SAndroid Build Coastguard Worker    xvld           xr0,       t0,      0   // P[i - REST_UNIT_STRIDE]
678*c0909341SAndroid Build Coastguard Worker    xvld           xr1,       t0,      32
679*c0909341SAndroid Build Coastguard Worker    xvld           xr2,       t1,      -4  // P[i-1]  -1 0 1 2
680*c0909341SAndroid Build Coastguard Worker    xvld           xr3,       t1,      28           // 3 4 5 6
681*c0909341SAndroid Build Coastguard Worker    xvld           xr4,       t2,      0   // P[i + REST_UNIT_STRIDE]
682*c0909341SAndroid Build Coastguard Worker    xvld           xr5,       t2,      32
683*c0909341SAndroid Build Coastguard Worker    xvld           xr6,       t1,      0   // p[i]     0 1 2 3
684*c0909341SAndroid Build Coastguard Worker    xvld           xr7,       t1,      32           // 4 5 6 7
685*c0909341SAndroid Build Coastguard Worker    xvld           xr8,       t1,      4   // p[i+1]   1 2 3 4
686*c0909341SAndroid Build Coastguard Worker    xvld           xr9,       t1,      36           // 5 6 7 8
687*c0909341SAndroid Build Coastguard Worker
688*c0909341SAndroid Build Coastguard Worker    xvld           xr10,      t0,      -4  // P[i - 1 - REST_UNIT_STRIDE]
689*c0909341SAndroid Build Coastguard Worker    xvld           xr11,      t0,      28
690*c0909341SAndroid Build Coastguard Worker    xvld           xr12,      t2,      -4  // P[i - 1 + REST_UNIT_STRIDE]
691*c0909341SAndroid Build Coastguard Worker    xvld           xr13,      t2,      28
692*c0909341SAndroid Build Coastguard Worker    xvld           xr14,      t0,      4   // P[i + 1 - REST_UNIT_STRIDE]
693*c0909341SAndroid Build Coastguard Worker    xvld           xr15,      t0,      36
694*c0909341SAndroid Build Coastguard Worker    xvld           xr16,      t2,      4   // P[i + 1 + REST_UNIT_STRIDE]
695*c0909341SAndroid Build Coastguard Worker    xvld           xr17,      t2,      36
696*c0909341SAndroid Build Coastguard Worker
697*c0909341SAndroid Build Coastguard Worker    xvadd.w        xr0,       xr2,     xr0
698*c0909341SAndroid Build Coastguard Worker    xvadd.w        xr4,       xr6,     xr4
699*c0909341SAndroid Build Coastguard Worker    xvadd.w        xr0,       xr0,     xr8
700*c0909341SAndroid Build Coastguard Worker    xvadd.w        xr20,      xr0,     xr4
701*c0909341SAndroid Build Coastguard Worker    xvslli.w       xr20,      xr20,    2      // 0 1 2 3
702*c0909341SAndroid Build Coastguard Worker    xvadd.w        xr0,       xr1,     xr3
703*c0909341SAndroid Build Coastguard Worker    xvadd.w        xr4,       xr5,     xr7
704*c0909341SAndroid Build Coastguard Worker    xvadd.w        xr0,       xr0,     xr9
705*c0909341SAndroid Build Coastguard Worker    xvadd.w        xr21,      xr0,     xr4
706*c0909341SAndroid Build Coastguard Worker    xvslli.w       xr21,      xr21,    2      // 4 5 6 7
707*c0909341SAndroid Build Coastguard Worker    xvadd.w        xr12,      xr10,    xr12
708*c0909341SAndroid Build Coastguard Worker    xvadd.w        xr16,      xr14,    xr16
709*c0909341SAndroid Build Coastguard Worker    xvadd.w        xr22,      xr12,    xr16
710*c0909341SAndroid Build Coastguard Worker    xvslli.w       xr23,      xr22,    1
711*c0909341SAndroid Build Coastguard Worker    xvadd.w        xr22,      xr23,    xr22
712*c0909341SAndroid Build Coastguard Worker    xvadd.w        xr11,      xr11,    xr13
713*c0909341SAndroid Build Coastguard Worker    xvadd.w        xr15,      xr15,    xr17
714*c0909341SAndroid Build Coastguard Worker    xvadd.w        xr0,       xr11,    xr15
715*c0909341SAndroid Build Coastguard Worker    xvslli.w       xr23,      xr0,     1
716*c0909341SAndroid Build Coastguard Worker    xvadd.w        xr23,      xr23,    xr0
717*c0909341SAndroid Build Coastguard Worker    xvadd.w        xr20,      xr20,    xr22   // b
718*c0909341SAndroid Build Coastguard Worker    xvadd.w        xr21,      xr21,    xr23
719*c0909341SAndroid Build Coastguard Worker
720*c0909341SAndroid Build Coastguard Worker    // B coef *sum
721*c0909341SAndroid Build Coastguard Worker    xvld           xr0,       t3,      0   // P[i - REST_UNIT_STRIDE]
722*c0909341SAndroid Build Coastguard Worker    xvld           xr1,       t4,      -2  // p[i - 1]
723*c0909341SAndroid Build Coastguard Worker    xvld           xr2,       t4,      0   // p[i]
724*c0909341SAndroid Build Coastguard Worker    xvld           xr3,       t4,      2   // p[i + 1]
725*c0909341SAndroid Build Coastguard Worker    xvld           xr4,       t5,      0   // P[i + REST_UNIT_STRIDE]
726*c0909341SAndroid Build Coastguard Worker    xvld           xr5,       t3,      -2  // P[i - 1 - REST_UNIT_STRIDE]
727*c0909341SAndroid Build Coastguard Worker    xvld           xr6,       t5,      -2  // P[i - 1 + REST_UNIT_STRIDE]
728*c0909341SAndroid Build Coastguard Worker    xvld           xr7,       t3,      2   // P[i + 1 - REST_UNIT_STRIDE]
729*c0909341SAndroid Build Coastguard Worker    xvld           xr8,       t5,      2   // P[i + 1 + REST_UNIT_STRIDE]
730*c0909341SAndroid Build Coastguard Worker
731*c0909341SAndroid Build Coastguard Worker    xvaddwev.w.h   xr9,       xr0,     xr1
732*c0909341SAndroid Build Coastguard Worker    xvaddwod.w.h   xr10,      xr0,     xr1
733*c0909341SAndroid Build Coastguard Worker    xvaddwev.w.h   xr11,      xr2,     xr3
734*c0909341SAndroid Build Coastguard Worker    xvaddwod.w.h   xr12,      xr2,     xr3
735*c0909341SAndroid Build Coastguard Worker    xvadd.w        xr9,       xr11,    xr9   // 0 2 4 6 8 10 12 14
736*c0909341SAndroid Build Coastguard Worker    xvadd.w        xr10,      xr12,    xr10  // 1 3 5 7 9 11 13 15
737*c0909341SAndroid Build Coastguard Worker    xvilvl.w       xr11,      xr10,    xr9   // 0 1 2 3 8 9 10 11
738*c0909341SAndroid Build Coastguard Worker    xvilvh.w       xr12,      xr10,    xr9   // 4 5 6 7 12 13 14 15
739*c0909341SAndroid Build Coastguard Worker    xvsllwil.w.h   xr0,       xr4,     0     // 0 1 2 3 8 9 10 11
740*c0909341SAndroid Build Coastguard Worker    xvexth.w.h     xr1,       xr4            // 4 5 6 7 12 13 14 15
741*c0909341SAndroid Build Coastguard Worker
742*c0909341SAndroid Build Coastguard Worker    xvadd.w        xr0,       xr11,    xr0
743*c0909341SAndroid Build Coastguard Worker    xvadd.w        xr1,       xr12,    xr1
744*c0909341SAndroid Build Coastguard Worker    xvslli.w       xr0,       xr0,     2
745*c0909341SAndroid Build Coastguard Worker    xvslli.w       xr1,       xr1,     2
746*c0909341SAndroid Build Coastguard Worker
747*c0909341SAndroid Build Coastguard Worker    xvaddwev.w.h   xr9,       xr5,     xr6
748*c0909341SAndroid Build Coastguard Worker    xvaddwod.w.h   xr10,      xr5,     xr6
749*c0909341SAndroid Build Coastguard Worker    xvaddwev.w.h   xr11,      xr7,     xr8
750*c0909341SAndroid Build Coastguard Worker    xvaddwod.w.h   xr12,      xr7,     xr8
751*c0909341SAndroid Build Coastguard Worker    xvadd.w        xr9,       xr11,    xr9
752*c0909341SAndroid Build Coastguard Worker    xvadd.w        xr10,      xr12,    xr10
753*c0909341SAndroid Build Coastguard Worker    xvilvl.w       xr13,      xr10,    xr9   // 0 1 2 3 8 9 10 11
754*c0909341SAndroid Build Coastguard Worker    xvilvh.w       xr14,      xr10,    xr9   // 4 5 6 7 12 13 14 15
755*c0909341SAndroid Build Coastguard Worker
756*c0909341SAndroid Build Coastguard Worker    xvslli.w       xr15,      xr13,    1
757*c0909341SAndroid Build Coastguard Worker    xvslli.w       xr16,      xr14,    1
758*c0909341SAndroid Build Coastguard Worker    xvadd.w        xr15,      xr13,    xr15   // a
759*c0909341SAndroid Build Coastguard Worker    xvadd.w        xr16,      xr14,    xr16
760*c0909341SAndroid Build Coastguard Worker    xvadd.w        xr22,      xr0,     xr15   // A B
761*c0909341SAndroid Build Coastguard Worker    xvadd.w        xr23,      xr1,     xr16   // C D
762*c0909341SAndroid Build Coastguard Worker
763*c0909341SAndroid Build Coastguard Worker    vld            vr0,       t6,      0      // src
764*c0909341SAndroid Build Coastguard Worker    vilvh.d        vr2,       vr0,     vr0
765*c0909341SAndroid Build Coastguard Worker    vext2xv.wu.bu  xr1,       xr0
766*c0909341SAndroid Build Coastguard Worker    vext2xv.wu.bu  xr2,       xr2
767*c0909341SAndroid Build Coastguard Worker    xvor.v         xr15,      xr22,    xr22   // A B
768*c0909341SAndroid Build Coastguard Worker    xvpermi.q      xr22,      xr23,    0b00000010  // A C
769*c0909341SAndroid Build Coastguard Worker    xvpermi.q      xr23,      xr15,    0b00110001
770*c0909341SAndroid Build Coastguard Worker    xvmadd.w       xr20,      xr22,    xr1
771*c0909341SAndroid Build Coastguard Worker    xvmadd.w       xr21,      xr23,    xr2
772*c0909341SAndroid Build Coastguard Worker    xvssrlrni.h.w  xr21,      xr20,    9
773*c0909341SAndroid Build Coastguard Worker    xvpermi.d      xr22,      xr21,    0b11011000
774*c0909341SAndroid Build Coastguard Worker    xvst           xr22,      t8,      0
775*c0909341SAndroid Build Coastguard Worker    addi.d         t8,        t8,      32
776*c0909341SAndroid Build Coastguard Worker
777*c0909341SAndroid Build Coastguard Worker    addi.d        t0,        t0,      64
778*c0909341SAndroid Build Coastguard Worker    addi.d        t1,        t1,      64
779*c0909341SAndroid Build Coastguard Worker    addi.d        t2,        t2,      64
780*c0909341SAndroid Build Coastguard Worker    addi.d        t3,        t3,      32
781*c0909341SAndroid Build Coastguard Worker    addi.d        t4,        t4,      32
782*c0909341SAndroid Build Coastguard Worker    addi.d        t5,        t5,      32
783*c0909341SAndroid Build Coastguard Worker    addi.d        t6,        t6,      16
784*c0909341SAndroid Build Coastguard Worker    addi.w        t7,        t7,      -16
785*c0909341SAndroid Build Coastguard Worker    blt           zero,      t7,      .LBS3SGF_V_W_LASX
786*c0909341SAndroid Build Coastguard Worker
787*c0909341SAndroid Build Coastguard Worker    addi.w        a5,        a5,      -1
788*c0909341SAndroid Build Coastguard Worker    addi.d        a0,        a0,      384*2
789*c0909341SAndroid Build Coastguard Worker    addi.d        a1,        a1,      REST_UNIT_STRIDE
790*c0909341SAndroid Build Coastguard Worker    addi.d        a3,        a3,      REST_UNIT_STRIDE<<1
791*c0909341SAndroid Build Coastguard Worker    addi.d        a2,        a2,      REST_UNIT_STRIDE<<2
792*c0909341SAndroid Build Coastguard Worker    bnez          a5,        .LBS3SGF_V_H_LASX
793*c0909341SAndroid Build Coastguard Workerendfunc
794*c0909341SAndroid Build Coastguard Worker
795*c0909341SAndroid Build Coastguard Worker#define FILTER_OUT_STRIDE (384)
796*c0909341SAndroid Build Coastguard Worker
797*c0909341SAndroid Build Coastguard Worker/*
798*c0909341SAndroid Build Coastguard Workersgr_3x3_finish_c(const pixel *p, const ptrdiff_t stride,
799*c0909341SAndroid Build Coastguard Worker                   const int16_t *dst, const int w1;
800*c0909341SAndroid Build Coastguard Worker                   const int w, const int h);
801*c0909341SAndroid Build Coastguard Worker*/
802*c0909341SAndroid Build Coastguard Workerfunction sgr_3x3_finish_8bpc_lsx
803*c0909341SAndroid Build Coastguard Worker    vreplgr2vr.w  vr3,     a3            // w1
804*c0909341SAndroid Build Coastguard Worker    andi          t4,      a4,       0x7
805*c0909341SAndroid Build Coastguard Worker    sub.w         t5,      a4,       t4
806*c0909341SAndroid Build Coastguard Worker
807*c0909341SAndroid Build Coastguard Worker    beq           zero,    t5,       .LSGR3X3_REM
808*c0909341SAndroid Build Coastguard Worker
809*c0909341SAndroid Build Coastguard Worker.LSGR3X3_H:
810*c0909341SAndroid Build Coastguard Worker    addi.d        t0,      a0,       0
811*c0909341SAndroid Build Coastguard Worker    addi.d        t1,      a2,       0
812*c0909341SAndroid Build Coastguard Worker    addi.w        t2,      t5,       0
813*c0909341SAndroid Build Coastguard Worker    andi          t4,      a4,       0x7
814*c0909341SAndroid Build Coastguard Worker.LSGR3X3_W:
815*c0909341SAndroid Build Coastguard Worker    vld           vr0,     t0,       0
816*c0909341SAndroid Build Coastguard Worker    vld           vr1,     t1,       0
817*c0909341SAndroid Build Coastguard Worker    vsllwil.hu.bu vr2,     vr0,      4   // u 8 h
818*c0909341SAndroid Build Coastguard Worker    vsllwil.wu.hu vr4,     vr2,      0   // p
819*c0909341SAndroid Build Coastguard Worker    vexth.wu.hu   vr5,     vr2           // p
820*c0909341SAndroid Build Coastguard Worker    vslli.w       vr6,     vr4,      7
821*c0909341SAndroid Build Coastguard Worker    vslli.w       vr7,     vr5,      7
822*c0909341SAndroid Build Coastguard Worker    vsllwil.w.h   vr8,     vr1,      0   // dst
823*c0909341SAndroid Build Coastguard Worker    vexth.w.h     vr9,     vr1           // dst
824*c0909341SAndroid Build Coastguard Worker    vsub.w        vr8,     vr8,      vr4
825*c0909341SAndroid Build Coastguard Worker    vsub.w        vr9,     vr9,      vr5
826*c0909341SAndroid Build Coastguard Worker    vmadd.w       vr6,     vr8,      vr3  // v 0 - 3
827*c0909341SAndroid Build Coastguard Worker    vmadd.w       vr7,     vr9,      vr3  // v 4 - 7
828*c0909341SAndroid Build Coastguard Worker    vssrarni.hu.w vr7,     vr6,      11
829*c0909341SAndroid Build Coastguard Worker    vssrlni.bu.h  vr7,     vr7,      0
830*c0909341SAndroid Build Coastguard Worker    vstelm.d      vr7,     t0,       0,    0
831*c0909341SAndroid Build Coastguard Worker    addi.d        t0,      t0,       8
832*c0909341SAndroid Build Coastguard Worker    addi.d        t1,      t1,       16
833*c0909341SAndroid Build Coastguard Worker    addi.d        t2,      t2,       -8
834*c0909341SAndroid Build Coastguard Worker    bne           zero,    t2,       .LSGR3X3_W
835*c0909341SAndroid Build Coastguard Worker
836*c0909341SAndroid Build Coastguard Worker    beq           t4,      zero,     .LSGR3X3_NOREM
837*c0909341SAndroid Build Coastguard Worker
838*c0909341SAndroid Build Coastguard Worker    vld           vr0,     t0,       0
839*c0909341SAndroid Build Coastguard Worker    vld           vr1,     t1,       0
840*c0909341SAndroid Build Coastguard Worker    vsllwil.hu.bu vr2,     vr0,      4   // u 8 h
841*c0909341SAndroid Build Coastguard Worker    vsllwil.wu.hu vr4,     vr2,      0   // p
842*c0909341SAndroid Build Coastguard Worker    vexth.wu.hu   vr5,     vr2           // p
843*c0909341SAndroid Build Coastguard Worker    vslli.w       vr6,     vr4,      7
844*c0909341SAndroid Build Coastguard Worker    vslli.w       vr7,     vr5,      7
845*c0909341SAndroid Build Coastguard Worker    vsllwil.w.h   vr8,     vr1,      0   // dst
846*c0909341SAndroid Build Coastguard Worker    vexth.w.h     vr9,     vr1           // dst
847*c0909341SAndroid Build Coastguard Worker    vsub.w        vr8,     vr8,      vr4
848*c0909341SAndroid Build Coastguard Worker    vsub.w        vr9,     vr9,      vr5
849*c0909341SAndroid Build Coastguard Worker    vmadd.w       vr6,     vr8,      vr3  // v 0 - 3
850*c0909341SAndroid Build Coastguard Worker    vmadd.w       vr7,     vr9,      vr3  // v 4 - 7
851*c0909341SAndroid Build Coastguard Worker    vssrarni.hu.w vr7,     vr6,      11
852*c0909341SAndroid Build Coastguard Worker    vssrlni.bu.h  vr7,     vr7,      0
853*c0909341SAndroid Build Coastguard Worker
854*c0909341SAndroid Build Coastguard Worker.LSGR3X3_ST:
855*c0909341SAndroid Build Coastguard Worker    vstelm.b      vr7,     t0,       0,    0
856*c0909341SAndroid Build Coastguard Worker    addi.d        t0,      t0,       1
857*c0909341SAndroid Build Coastguard Worker    vbsrl.v       vr7,     vr7,      1
858*c0909341SAndroid Build Coastguard Worker    addi.w        t4,      t4,       -1
859*c0909341SAndroid Build Coastguard Worker    bnez          t4,      .LSGR3X3_ST
860*c0909341SAndroid Build Coastguard Worker
861*c0909341SAndroid Build Coastguard Worker.LSGR3X3_NOREM:
862*c0909341SAndroid Build Coastguard Worker    addi.w        a5,      a5,       -1
863*c0909341SAndroid Build Coastguard Worker    add.d         a0,      a0,       a1
864*c0909341SAndroid Build Coastguard Worker    addi.d        a2,      a2,       (FILTER_OUT_STRIDE<<1)
865*c0909341SAndroid Build Coastguard Worker    bnez          a5,      .LSGR3X3_H
866*c0909341SAndroid Build Coastguard Worker    b             .LSGR3X3_END
867*c0909341SAndroid Build Coastguard Worker
868*c0909341SAndroid Build Coastguard Worker.LSGR3X3_REM:
869*c0909341SAndroid Build Coastguard Worker    andi          t4,      a4,       0x7
870*c0909341SAndroid Build Coastguard Worker    addi.d        t0,      a0,       0
871*c0909341SAndroid Build Coastguard Worker    vld           vr0,     t0,       0
872*c0909341SAndroid Build Coastguard Worker    vld           vr1,     a2,       0
873*c0909341SAndroid Build Coastguard Worker    vsllwil.hu.bu vr2,     vr0,      4   // u 8 h
874*c0909341SAndroid Build Coastguard Worker    vsllwil.wu.hu vr4,     vr2,      0   // p
875*c0909341SAndroid Build Coastguard Worker    vexth.wu.hu   vr5,     vr2           // p
876*c0909341SAndroid Build Coastguard Worker    vslli.w       vr6,     vr4,      7
877*c0909341SAndroid Build Coastguard Worker    vslli.w       vr7,     vr5,      7
878*c0909341SAndroid Build Coastguard Worker    vsllwil.w.h   vr8,     vr1,      0   // dst
879*c0909341SAndroid Build Coastguard Worker    vexth.w.h     vr9,     vr1           // dst
880*c0909341SAndroid Build Coastguard Worker    vsub.w        vr8,     vr8,      vr4
881*c0909341SAndroid Build Coastguard Worker    vsub.w        vr9,     vr9,      vr5
882*c0909341SAndroid Build Coastguard Worker    vmadd.w       vr6,     vr8,      vr3  // v 0 - 3
883*c0909341SAndroid Build Coastguard Worker    vmadd.w       vr7,     vr9,      vr3  // v 4 - 7
884*c0909341SAndroid Build Coastguard Worker    vssrarni.hu.w vr7,     vr6,      11
885*c0909341SAndroid Build Coastguard Worker    vssrlni.bu.h  vr7,     vr7,      0
886*c0909341SAndroid Build Coastguard Worker
887*c0909341SAndroid Build Coastguard Worker.LSGR3X3_REM_ST:
888*c0909341SAndroid Build Coastguard Worker    vstelm.b      vr7,     t0,       0,    0
889*c0909341SAndroid Build Coastguard Worker    addi.d        t0,      t0,       1
890*c0909341SAndroid Build Coastguard Worker    vbsrl.v       vr7,     vr7,      1
891*c0909341SAndroid Build Coastguard Worker    addi.w        t4,      t4,       -1
892*c0909341SAndroid Build Coastguard Worker    bnez          t4,      .LSGR3X3_REM_ST
893*c0909341SAndroid Build Coastguard Worker    addi.w        a5,      a5,       -1
894*c0909341SAndroid Build Coastguard Worker    add.d         a0,      a0,       a1
895*c0909341SAndroid Build Coastguard Worker    addi.d        a2,      a2,       (FILTER_OUT_STRIDE<<1)
896*c0909341SAndroid Build Coastguard Worker    bnez          a5,      .LSGR3X3_REM
897*c0909341SAndroid Build Coastguard Worker
898*c0909341SAndroid Build Coastguard Worker.LSGR3X3_END:
899*c0909341SAndroid Build Coastguard Workerendfunc
900*c0909341SAndroid Build Coastguard Worker
901*c0909341SAndroid Build Coastguard Worker/*
902*c0909341SAndroid Build Coastguard Workervoid boxsum5(int32_t *sumsq, coef *sum,
903*c0909341SAndroid Build Coastguard Worker             const pixel *const src,
904*c0909341SAndroid Build Coastguard Worker             const int w, const int h)
905*c0909341SAndroid Build Coastguard Worker*/
906*c0909341SAndroid Build Coastguard Workerfunction boxsum5_h_8bpc_lsx
907*c0909341SAndroid Build Coastguard Worker    addi.w        a4,      a4,        -4
908*c0909341SAndroid Build Coastguard Worker    addi.d        a0,      a0,        REST_UNIT_STRIDE<<2
909*c0909341SAndroid Build Coastguard Worker    addi.d        a1,      a1,        REST_UNIT_STRIDE<<1
910*c0909341SAndroid Build Coastguard Worker    li.w          t6,      1
911*c0909341SAndroid Build Coastguard Worker.LBOXSUM5_H_H:
912*c0909341SAndroid Build Coastguard Worker    addi.w        t3,      a3,        0
913*c0909341SAndroid Build Coastguard Worker    addi.d        t2,      a2,        0
914*c0909341SAndroid Build Coastguard Worker    addi.d        t0,      a0,        0
915*c0909341SAndroid Build Coastguard Worker    addi.d        t1,      a1,        0
916*c0909341SAndroid Build Coastguard Worker
917*c0909341SAndroid Build Coastguard Worker.LBOXSUM5_H_W:
918*c0909341SAndroid Build Coastguard Worker    vld           vr0,     t2,        0                   // a
919*c0909341SAndroid Build Coastguard Worker    vld           vr1,     t2,        REST_UNIT_STRIDE    // b
920*c0909341SAndroid Build Coastguard Worker    vld           vr2,     t2,        REST_UNIT_STRIDE<<1 // c
921*c0909341SAndroid Build Coastguard Worker    vld           vr3,     t2,        REST_UNIT_STRIDE*3  // d
922*c0909341SAndroid Build Coastguard Worker    vld           vr4,     t2,        REST_UNIT_STRIDE<<2 // e
923*c0909341SAndroid Build Coastguard Worker
924*c0909341SAndroid Build Coastguard Worker    vilvl.b       vr5,     vr1,       vr0
925*c0909341SAndroid Build Coastguard Worker    vilvh.b       vr6,     vr1,       vr0
926*c0909341SAndroid Build Coastguard Worker    vilvl.b       vr7,     vr3,       vr2
927*c0909341SAndroid Build Coastguard Worker    vilvh.b       vr8,     vr3,       vr2
928*c0909341SAndroid Build Coastguard Worker    //sum_v
929*c0909341SAndroid Build Coastguard Worker    vhaddw.hu.bu  vr9,     vr5,       vr5  // 0 1  2  3  4  5  6  7
930*c0909341SAndroid Build Coastguard Worker    vhaddw.hu.bu  vr10,    vr6,       vr6  // 8 9 10 11 12 13 14 15  a+b
931*c0909341SAndroid Build Coastguard Worker    vhaddw.hu.bu  vr11,    vr7,       vr7
932*c0909341SAndroid Build Coastguard Worker    vhaddw.hu.bu  vr12,    vr8,       vr8
933*c0909341SAndroid Build Coastguard Worker    vadd.h        vr9,     vr9,       vr11
934*c0909341SAndroid Build Coastguard Worker    vadd.h        vr10,    vr10,      vr12  // a + b + c + d
935*c0909341SAndroid Build Coastguard Worker    vsllwil.hu.bu vr11,    vr4,       0
936*c0909341SAndroid Build Coastguard Worker    vexth.hu.bu   vr12,    vr4
937*c0909341SAndroid Build Coastguard Worker    vadd.h        vr9,     vr9,       vr11
938*c0909341SAndroid Build Coastguard Worker    vadd.h        vr10,    vr10,      vr12
939*c0909341SAndroid Build Coastguard Worker    vst           vr9,     t1,        0
940*c0909341SAndroid Build Coastguard Worker    vst           vr10,    t1,        16
941*c0909341SAndroid Build Coastguard Worker    addi.d        t1,      t1,        32
942*c0909341SAndroid Build Coastguard Worker
943*c0909341SAndroid Build Coastguard Worker    // sumsq
944*c0909341SAndroid Build Coastguard Worker    vmulwev.h.bu  vr9,     vr5,       vr5  // a*a 0 1  2  3  4  5  6  7
945*c0909341SAndroid Build Coastguard Worker    vmulwev.h.bu  vr10,    vr6,       vr6  // a*a 8 9 10 11 12 13 14 15
946*c0909341SAndroid Build Coastguard Worker    vmulwod.h.bu  vr13,    vr5,       vr5  // b*b 0 1  2  3  4  5  6  7
947*c0909341SAndroid Build Coastguard Worker    vmulwod.h.bu  vr14,    vr6,       vr6  // b*b 8 9 10 11 12 13 14 15
948*c0909341SAndroid Build Coastguard Worker    vmulwev.h.bu  vr15,    vr7,       vr7  // c*c 0 1  2  3  4  5  6  7
949*c0909341SAndroid Build Coastguard Worker    vmulwev.h.bu  vr16,    vr8,       vr8  // c*c 8 9 10 11 12 13 14 15
950*c0909341SAndroid Build Coastguard Worker    vmulwod.h.bu  vr17,    vr7,       vr7  // d*d 0 1  2  3  4  5  6  7
951*c0909341SAndroid Build Coastguard Worker    vmulwod.h.bu  vr18,    vr8,       vr8  // d*d 8 9 10 11 12 13 14 15
952*c0909341SAndroid Build Coastguard Worker    vaddwev.w.hu  vr5,     vr9,       vr13  // 0 2 4 6
953*c0909341SAndroid Build Coastguard Worker    vaddwod.w.hu  vr6,     vr9,       vr13  // 1 3 5 7
954*c0909341SAndroid Build Coastguard Worker    vaddwev.w.hu  vr7,     vr10,      vr14  // 8 10 12 14
955*c0909341SAndroid Build Coastguard Worker    vaddwod.w.hu  vr8,     vr10,      vr14  // 9 11 13 15   a + b
956*c0909341SAndroid Build Coastguard Worker    vaddwev.w.hu  vr19,    vr15,      vr17  // 0 2 4 6
957*c0909341SAndroid Build Coastguard Worker    vaddwod.w.hu  vr20,    vr15,      vr17  // 1 3 5 7
958*c0909341SAndroid Build Coastguard Worker    vaddwev.w.hu  vr21,    vr16,      vr18  // 8 10 12 14
959*c0909341SAndroid Build Coastguard Worker    vaddwod.w.hu  vr22,    vr16,      vr18  // 9 11 13 15   c + d
960*c0909341SAndroid Build Coastguard Worker    vadd.w        vr5,     vr5,       vr19
961*c0909341SAndroid Build Coastguard Worker    vadd.w        vr6,     vr6,       vr20
962*c0909341SAndroid Build Coastguard Worker    vadd.w        vr7,     vr7,       vr21
963*c0909341SAndroid Build Coastguard Worker    vadd.w        vr8,     vr8,       vr22
964*c0909341SAndroid Build Coastguard Worker    vmaddwev.w.hu vr5,     vr11,      vr11
965*c0909341SAndroid Build Coastguard Worker    vmaddwod.w.hu vr6,     vr11,      vr11
966*c0909341SAndroid Build Coastguard Worker    vmaddwev.w.hu vr7,     vr12,      vr12
967*c0909341SAndroid Build Coastguard Worker    vmaddwod.w.hu vr8,     vr12,      vr12
968*c0909341SAndroid Build Coastguard Worker    vilvl.w       vr19,    vr6,       vr5
969*c0909341SAndroid Build Coastguard Worker    vilvh.w       vr20,    vr6,       vr5
970*c0909341SAndroid Build Coastguard Worker    vilvl.w       vr21,    vr8,       vr7
971*c0909341SAndroid Build Coastguard Worker    vilvh.w       vr22,    vr8,       vr7
972*c0909341SAndroid Build Coastguard Worker
973*c0909341SAndroid Build Coastguard Worker    vst           vr19,    t0,        0
974*c0909341SAndroid Build Coastguard Worker    vst           vr20,    t0,        16
975*c0909341SAndroid Build Coastguard Worker    vst           vr21,    t0,        32
976*c0909341SAndroid Build Coastguard Worker    vst           vr22,    t0,        48
977*c0909341SAndroid Build Coastguard Worker    addi.d        t0,      t0,        64
978*c0909341SAndroid Build Coastguard Worker    addi.d        t2,      t2,        16
979*c0909341SAndroid Build Coastguard Worker    addi.w        t3,      t3,        -16
980*c0909341SAndroid Build Coastguard Worker    blt           zero,    t3,        .LBOXSUM5_H_W
981*c0909341SAndroid Build Coastguard Worker
982*c0909341SAndroid Build Coastguard Worker    addi.d        a0,      a0,        REST_UNIT_STRIDE<<2
983*c0909341SAndroid Build Coastguard Worker    addi.d        a1,      a1,        REST_UNIT_STRIDE<<1
984*c0909341SAndroid Build Coastguard Worker    addi.d        a2,      a2,        REST_UNIT_STRIDE
985*c0909341SAndroid Build Coastguard Worker    addi.d        a4,      a4,        -1
986*c0909341SAndroid Build Coastguard Worker    bnez          a4,      .LBOXSUM5_H_H
987*c0909341SAndroid Build Coastguard Workerendfunc
988*c0909341SAndroid Build Coastguard Worker
989*c0909341SAndroid Build Coastguard Worker/*
990*c0909341SAndroid Build Coastguard Workervoid boxsum5_h(int32_t *sumsq, coef *sum,
991*c0909341SAndroid Build Coastguard Worker               const int w, const int h)
992*c0909341SAndroid Build Coastguard Worker*/
993*c0909341SAndroid Build Coastguard Workerfunction boxsum5_v_8bpc_lsx
994*c0909341SAndroid Build Coastguard Worker    addi.d         a0,      a0,      (REST_UNIT_STRIDE<<2)
995*c0909341SAndroid Build Coastguard Worker    addi.d         a1,      a1,      (REST_UNIT_STRIDE<<1)
996*c0909341SAndroid Build Coastguard Worker    addi.w         a3,      a3,      -4
997*c0909341SAndroid Build Coastguard Worker    addi.w         a2,      a2,      -4
998*c0909341SAndroid Build Coastguard Worker
999*c0909341SAndroid Build Coastguard Worker.LBOXSUM5_V_H:
1000*c0909341SAndroid Build Coastguard Worker    addi.w         t3,      a2,      0
1001*c0909341SAndroid Build Coastguard Worker    addi.d         t0,      a0,      0
1002*c0909341SAndroid Build Coastguard Worker    addi.d         t1,      a1,      0
1003*c0909341SAndroid Build Coastguard Worker    addi.d         t2,      a0,      8
1004*c0909341SAndroid Build Coastguard Worker    addi.d         t3,      a1,      4
1005*c0909341SAndroid Build Coastguard Worker    addi.d         t4,      a2,      0
1006*c0909341SAndroid Build Coastguard Worker
1007*c0909341SAndroid Build Coastguard Worker    vld            vr0,     t1,      0   // a 0 1 2 3 4 5 6 7
1008*c0909341SAndroid Build Coastguard Worker    vld            vr1,     t1,      2   // b 1 2 3 4 5 6 7 8
1009*c0909341SAndroid Build Coastguard Worker    vld            vr2,     t1,      4   // c 2
1010*c0909341SAndroid Build Coastguard Worker    vld            vr3,     t1,      6   // d 3
1011*c0909341SAndroid Build Coastguard Worker    vld            vr4,     t1,      8   // e 4 5 6 7 8 9 10 11
1012*c0909341SAndroid Build Coastguard Worker    vadd.h         vr5,     vr0,     vr1
1013*c0909341SAndroid Build Coastguard Worker    vadd.h         vr6,     vr2,     vr3
1014*c0909341SAndroid Build Coastguard Worker    vpickve2gr.w   t5,      vr4,     2
1015*c0909341SAndroid Build Coastguard Worker    vadd.h         vr5,     vr5,     vr6
1016*c0909341SAndroid Build Coastguard Worker    vadd.h         vr5,     vr5,     vr4
1017*c0909341SAndroid Build Coastguard Worker    vst            vr5,     t3,      0
1018*c0909341SAndroid Build Coastguard Worker
1019*c0909341SAndroid Build Coastguard Worker    vld            vr0,     t0,      0  // 0 1 2 3   a
1020*c0909341SAndroid Build Coastguard Worker    vld            vr1,     t0,      4  // 1 2 3 4   b
1021*c0909341SAndroid Build Coastguard Worker    vld            vr2,     t0,      8  // 2 3 4 5   c
1022*c0909341SAndroid Build Coastguard Worker    vld            vr3,     t0,      12 // 3 4 5 6   d
1023*c0909341SAndroid Build Coastguard Worker    vld            vr4,     t0,      16 // 4 5 6 7   e  a
1024*c0909341SAndroid Build Coastguard Worker    vld            vr5,     t0,      20 // 5 6 7 8      b
1025*c0909341SAndroid Build Coastguard Worker    vld            vr6,     t0,      24 // 6 7 8 9      c
1026*c0909341SAndroid Build Coastguard Worker    vld            vr7,     t0,      28 // 7 8 9 10     d
1027*c0909341SAndroid Build Coastguard Worker    vld            vr8,     t0,      32 // 8 9 10 11    e
1028*c0909341SAndroid Build Coastguard Worker
1029*c0909341SAndroid Build Coastguard Worker    vadd.w         vr9,     vr0,     vr1
1030*c0909341SAndroid Build Coastguard Worker    vadd.w         vr10,    vr2,     vr3
1031*c0909341SAndroid Build Coastguard Worker    vadd.w         vr9,     vr9,     vr10
1032*c0909341SAndroid Build Coastguard Worker    vadd.w         vr9,     vr9,     vr4
1033*c0909341SAndroid Build Coastguard Worker    vadd.w         vr10,    vr4,     vr5
1034*c0909341SAndroid Build Coastguard Worker    vadd.w         vr11,    vr6,     vr7
1035*c0909341SAndroid Build Coastguard Worker    vadd.w         vr10,    vr10,    vr8
1036*c0909341SAndroid Build Coastguard Worker    vadd.w         vr10,    vr10,    vr11
1037*c0909341SAndroid Build Coastguard Worker    vst            vr9,     t2,      0
1038*c0909341SAndroid Build Coastguard Worker    vst            vr10,    t2,      16
1039*c0909341SAndroid Build Coastguard Worker
1040*c0909341SAndroid Build Coastguard Worker    addi.d         t3,      t3,      16
1041*c0909341SAndroid Build Coastguard Worker    addi.d         t1,      t1,      16
1042*c0909341SAndroid Build Coastguard Worker    addi.d         t0,      t0,      32
1043*c0909341SAndroid Build Coastguard Worker    addi.d         t2,      t2,      32
1044*c0909341SAndroid Build Coastguard Worker    addi.w         t4,      t4,      -8
1045*c0909341SAndroid Build Coastguard Worker    ble            t4,      zero,    .LBOXSUM5_V_H1
1046*c0909341SAndroid Build Coastguard Worker
1047*c0909341SAndroid Build Coastguard Worker.LBOXSUM5_V_W:
1048*c0909341SAndroid Build Coastguard Worker    vld            vr0,     t1,      0   // a 0 1 2 3 4 5 6 7
1049*c0909341SAndroid Build Coastguard Worker    vld            vr1,     t1,      2   // b 1 2 3 4 5 6 7 8
1050*c0909341SAndroid Build Coastguard Worker    vld            vr2,     t1,      4   // c 2
1051*c0909341SAndroid Build Coastguard Worker    vld            vr3,     t1,      6   // d 3
1052*c0909341SAndroid Build Coastguard Worker    vld            vr4,     t1,      8   // e 4 5 6 7 8 9 10 11
1053*c0909341SAndroid Build Coastguard Worker    vinsgr2vr.w    vr0,     t5,      0
1054*c0909341SAndroid Build Coastguard Worker    vpickve2gr.w   t5,      vr4,     2
1055*c0909341SAndroid Build Coastguard Worker    vextrins.h     vr1,     vr0,     0x01
1056*c0909341SAndroid Build Coastguard Worker    vadd.h         vr5,     vr0,     vr1
1057*c0909341SAndroid Build Coastguard Worker    vadd.h         vr6,     vr2,     vr3
1058*c0909341SAndroid Build Coastguard Worker    vadd.h         vr5,     vr5,     vr6
1059*c0909341SAndroid Build Coastguard Worker    vadd.h         vr5,     vr5,     vr4
1060*c0909341SAndroid Build Coastguard Worker    vst            vr5,     t3,      0
1061*c0909341SAndroid Build Coastguard Worker
1062*c0909341SAndroid Build Coastguard Worker    vaddi.hu       vr0,     vr8,     0  // 8  9 10 11  a
1063*c0909341SAndroid Build Coastguard Worker    vld            vr1,     t0,      4  // 9 10 11 12  b
1064*c0909341SAndroid Build Coastguard Worker    vld            vr2,     t0,      8  // 10 11 12 13 c
1065*c0909341SAndroid Build Coastguard Worker    vld            vr3,     t0,      12 // 14 15 16 17 d
1066*c0909341SAndroid Build Coastguard Worker    vld            vr4,     t0,      16 // 15 16 17 18 e  a
1067*c0909341SAndroid Build Coastguard Worker    vld            vr5,     t0,      20 // 16 17 18 19    b
1068*c0909341SAndroid Build Coastguard Worker    vld            vr6,     t0,      24 // 17 18 19 20    c
1069*c0909341SAndroid Build Coastguard Worker    vld            vr7,     t0,      28 // 18 19 20 21    d
1070*c0909341SAndroid Build Coastguard Worker    vld            vr8,     t0,      32 // 19 20 21 22    e
1071*c0909341SAndroid Build Coastguard Worker    vextrins.w     vr1,     vr0,     0x01
1072*c0909341SAndroid Build Coastguard Worker    vadd.w         vr9,     vr0,     vr1
1073*c0909341SAndroid Build Coastguard Worker    vadd.w         vr10,    vr2,     vr3
1074*c0909341SAndroid Build Coastguard Worker    vadd.w         vr9,     vr9,     vr10
1075*c0909341SAndroid Build Coastguard Worker    vadd.w         vr9,     vr9,     vr4
1076*c0909341SAndroid Build Coastguard Worker    vadd.w         vr10,    vr4,     vr5
1077*c0909341SAndroid Build Coastguard Worker    vadd.w         vr11,    vr6,     vr7
1078*c0909341SAndroid Build Coastguard Worker    vadd.w         vr10,    vr10,    vr8
1079*c0909341SAndroid Build Coastguard Worker    vadd.w         vr10,    vr10,    vr11
1080*c0909341SAndroid Build Coastguard Worker    vst            vr9,     t2,      0
1081*c0909341SAndroid Build Coastguard Worker    vst            vr10,    t2,      16
1082*c0909341SAndroid Build Coastguard Worker
1083*c0909341SAndroid Build Coastguard Worker    addi.d         t3,      t3,      16
1084*c0909341SAndroid Build Coastguard Worker    addi.d         t1,      t1,      16
1085*c0909341SAndroid Build Coastguard Worker    addi.d         t0,      t0,      32
1086*c0909341SAndroid Build Coastguard Worker    addi.d         t2,      t2,      32
1087*c0909341SAndroid Build Coastguard Worker    addi.w         t4,      t4,      -8
1088*c0909341SAndroid Build Coastguard Worker    blt            zero,    t4,      .LBOXSUM5_V_W
1089*c0909341SAndroid Build Coastguard Worker
1090*c0909341SAndroid Build Coastguard Worker.LBOXSUM5_V_H1:
1091*c0909341SAndroid Build Coastguard Worker    addi.d         a1,       a1,      REST_UNIT_STRIDE<<1
1092*c0909341SAndroid Build Coastguard Worker    addi.d         a0,       a0,      REST_UNIT_STRIDE<<2
1093*c0909341SAndroid Build Coastguard Worker    addi.w         a3,       a3,      -1
1094*c0909341SAndroid Build Coastguard Worker    bnez           a3,       .LBOXSUM5_V_H
1095*c0909341SAndroid Build Coastguard Workerendfunc
1096*c0909341SAndroid Build Coastguard Worker
1097*c0909341SAndroid Build Coastguard Worker/*
1098*c0909341SAndroid Build Coastguard Workerselfguided_filter(int32_t *sumsq, coef *sum,
1099*c0909341SAndroid Build Coastguard Worker                  const int w, const int h,
1100*c0909341SAndroid Build Coastguard Worker                  const unsigned s)
1101*c0909341SAndroid Build Coastguard Worker*/
1102*c0909341SAndroid Build Coastguard Workerfunction boxsum5_sgf_h_8bpc_lsx
1103*c0909341SAndroid Build Coastguard Worker    addi.d        a0,       a0,        REST_UNIT_STRIDE<<2
1104*c0909341SAndroid Build Coastguard Worker    addi.d        a0,       a0,        12   // AA
1105*c0909341SAndroid Build Coastguard Worker    addi.d        a1,       a1,        REST_UNIT_STRIDE<<1
1106*c0909341SAndroid Build Coastguard Worker    addi.d        a1,       a1,        6    // BB
1107*c0909341SAndroid Build Coastguard Worker    la.local      t8,       dav1d_sgr_x_by_x
1108*c0909341SAndroid Build Coastguard Worker    li.w          t6,       164
1109*c0909341SAndroid Build Coastguard Worker    vreplgr2vr.w  vr20,     t6
1110*c0909341SAndroid Build Coastguard Worker    li.w          t6,       255
1111*c0909341SAndroid Build Coastguard Worker    vreplgr2vr.w  vr22,     t6
1112*c0909341SAndroid Build Coastguard Worker    vaddi.wu      vr21,     vr22,      1  // 256
1113*c0909341SAndroid Build Coastguard Worker    vreplgr2vr.w  vr6,      a4
1114*c0909341SAndroid Build Coastguard Worker    vldi          vr19,     0x819
1115*c0909341SAndroid Build Coastguard Worker    addi.w        a2,       a2,        2  // w + 2
1116*c0909341SAndroid Build Coastguard Worker    addi.w        a3,       a3,        2  // h + 2
1117*c0909341SAndroid Build Coastguard Worker
1118*c0909341SAndroid Build Coastguard Worker.LBS5SGF_H_H:
1119*c0909341SAndroid Build Coastguard Worker    addi.w        t2,       a2,        0
1120*c0909341SAndroid Build Coastguard Worker    addi.d        t0,       a0,        -4
1121*c0909341SAndroid Build Coastguard Worker    addi.d        t1,       a1,        -2
1122*c0909341SAndroid Build Coastguard Worker
1123*c0909341SAndroid Build Coastguard Worker.LBS5SGF_H_W:
1124*c0909341SAndroid Build Coastguard Worker    vld           vr0,      t0,        0   // AA[i]
1125*c0909341SAndroid Build Coastguard Worker    vld           vr1,      t0,        16
1126*c0909341SAndroid Build Coastguard Worker    vld           vr2,      t1,        0   // BB[i]
1127*c0909341SAndroid Build Coastguard Worker
1128*c0909341SAndroid Build Coastguard Worker    vmul.w        vr4,      vr0,       vr19 // a * n
1129*c0909341SAndroid Build Coastguard Worker    vmul.w        vr5,      vr1,       vr19 // a * n
1130*c0909341SAndroid Build Coastguard Worker    vsllwil.w.h   vr9,      vr2,       0
1131*c0909341SAndroid Build Coastguard Worker    vexth.w.h     vr10,     vr2
1132*c0909341SAndroid Build Coastguard Worker    vmsub.w       vr4,      vr9,       vr9   // p
1133*c0909341SAndroid Build Coastguard Worker    vmsub.w       vr5,      vr10,      vr10   // p
1134*c0909341SAndroid Build Coastguard Worker    vmaxi.w       vr4,      vr4,       0
1135*c0909341SAndroid Build Coastguard Worker    vmaxi.w       vr5,      vr5,       0    // p
1136*c0909341SAndroid Build Coastguard Worker    vmul.w        vr4,      vr4,       vr6  // p * s
1137*c0909341SAndroid Build Coastguard Worker    vmul.w        vr5,      vr5,       vr6  // p * s
1138*c0909341SAndroid Build Coastguard Worker    vsrlri.w      vr4,      vr4,       20
1139*c0909341SAndroid Build Coastguard Worker    vsrlri.w      vr5,      vr5,       20   // z
1140*c0909341SAndroid Build Coastguard Worker    vmin.w        vr4,      vr4,       vr22
1141*c0909341SAndroid Build Coastguard Worker    vmin.w        vr5,      vr5,       vr22
1142*c0909341SAndroid Build Coastguard Worker
1143*c0909341SAndroid Build Coastguard Worker    // load table data
1144*c0909341SAndroid Build Coastguard Worker    vpickve2gr.w  t6,       vr4,       0
1145*c0909341SAndroid Build Coastguard Worker    ldx.bu        t7,       t8,        t6
1146*c0909341SAndroid Build Coastguard Worker    vinsgr2vr.w   vr7,      t7,        0
1147*c0909341SAndroid Build Coastguard Worker    vpickve2gr.w  t6,       vr4,       1
1148*c0909341SAndroid Build Coastguard Worker    ldx.bu        t7,       t8,        t6
1149*c0909341SAndroid Build Coastguard Worker    vinsgr2vr.w   vr7,      t7,        1
1150*c0909341SAndroid Build Coastguard Worker    vpickve2gr.w  t6,       vr4,       2
1151*c0909341SAndroid Build Coastguard Worker    ldx.bu        t7,       t8,        t6
1152*c0909341SAndroid Build Coastguard Worker    vinsgr2vr.w   vr7,      t7,        2
1153*c0909341SAndroid Build Coastguard Worker    vpickve2gr.w  t6,       vr4,       3
1154*c0909341SAndroid Build Coastguard Worker    ldx.bu        t7,       t8,        t6
1155*c0909341SAndroid Build Coastguard Worker    vinsgr2vr.w   vr7,      t7,        3
1156*c0909341SAndroid Build Coastguard Worker
1157*c0909341SAndroid Build Coastguard Worker    vpickve2gr.w  t6,       vr5,       0
1158*c0909341SAndroid Build Coastguard Worker    ldx.bu        t7,       t8,        t6
1159*c0909341SAndroid Build Coastguard Worker    vinsgr2vr.w   vr8,      t7,        0
1160*c0909341SAndroid Build Coastguard Worker    vpickve2gr.w  t6,       vr5,       1
1161*c0909341SAndroid Build Coastguard Worker    ldx.bu        t7,       t8,        t6
1162*c0909341SAndroid Build Coastguard Worker    vinsgr2vr.w   vr8,      t7,        1
1163*c0909341SAndroid Build Coastguard Worker    vpickve2gr.w  t6,       vr5,       2
1164*c0909341SAndroid Build Coastguard Worker    ldx.bu        t7,       t8,        t6
1165*c0909341SAndroid Build Coastguard Worker    vinsgr2vr.w   vr8,      t7,        2
1166*c0909341SAndroid Build Coastguard Worker    vpickve2gr.w  t6,       vr5,       3
1167*c0909341SAndroid Build Coastguard Worker    ldx.bu        t7,       t8,        t6
1168*c0909341SAndroid Build Coastguard Worker    vinsgr2vr.w   vr8,      t7,        3     // x
1169*c0909341SAndroid Build Coastguard Worker
1170*c0909341SAndroid Build Coastguard Worker    vmul.w        vr9,      vr7,       vr9   // x * BB[i]
1171*c0909341SAndroid Build Coastguard Worker    vmul.w        vr10,     vr8,       vr10
1172*c0909341SAndroid Build Coastguard Worker    vmul.w        vr9,      vr9,       vr20  // x * BB[i] * sgr_one_by_x
1173*c0909341SAndroid Build Coastguard Worker    vmul.w        vr10,     vr10,      vr20
1174*c0909341SAndroid Build Coastguard Worker    vsrlri.w      vr9,      vr9,       12
1175*c0909341SAndroid Build Coastguard Worker    vsrlri.w      vr10,     vr10,      12
1176*c0909341SAndroid Build Coastguard Worker    vsub.w        vr7,      vr21,      vr7
1177*c0909341SAndroid Build Coastguard Worker    vsub.w        vr8,      vr21,      vr8
1178*c0909341SAndroid Build Coastguard Worker    vpickev.h     vr8,      vr8,       vr7
1179*c0909341SAndroid Build Coastguard Worker    vst           vr9,      t0,        0
1180*c0909341SAndroid Build Coastguard Worker    vst           vr10,     t0,        16
1181*c0909341SAndroid Build Coastguard Worker    vst           vr8,      t1,        0
1182*c0909341SAndroid Build Coastguard Worker    addi.d        t0,       t0,        32
1183*c0909341SAndroid Build Coastguard Worker    addi.d        t1,       t1,        16
1184*c0909341SAndroid Build Coastguard Worker    addi.w        t2,       t2,        -8
1185*c0909341SAndroid Build Coastguard Worker    blt           zero,     t2,        .LBS5SGF_H_W
1186*c0909341SAndroid Build Coastguard Worker
1187*c0909341SAndroid Build Coastguard Worker    addi.d        a0,       a0,        REST_UNIT_STRIDE<<2
1188*c0909341SAndroid Build Coastguard Worker    addi.d        a0,       a0,        REST_UNIT_STRIDE<<2
1189*c0909341SAndroid Build Coastguard Worker    addi.d        a1,       a1,        REST_UNIT_STRIDE<<2
1190*c0909341SAndroid Build Coastguard Worker    addi.w        a3,       a3,        -2
1191*c0909341SAndroid Build Coastguard Worker    blt           zero,     a3,        .LBS5SGF_H_H
1192*c0909341SAndroid Build Coastguard Workerendfunc
1193*c0909341SAndroid Build Coastguard Worker
1194*c0909341SAndroid Build Coastguard Worker/*
1195*c0909341SAndroid Build Coastguard Workerselfguided_filter(coef *dst, pixel *src,
1196*c0909341SAndroid Build Coastguard Worker                  int32_t *sumsq, coef *sum,
1197*c0909341SAndroid Build Coastguard Worker                  const int w, const int h)
1198*c0909341SAndroid Build Coastguard Worker*/
1199*c0909341SAndroid Build Coastguard Workerfunction boxsum5_sgf_v_8bpc_lsx
1200*c0909341SAndroid Build Coastguard Worker    addi.d        a1,        a1,       3*REST_UNIT_STRIDE+3       // src
1201*c0909341SAndroid Build Coastguard Worker    addi.d        a2,        a2,       (2*REST_UNIT_STRIDE+3)<<1  // A
1202*c0909341SAndroid Build Coastguard Worker    addi.d        a2,        a2,       (2*REST_UNIT_STRIDE+3)<<1
1203*c0909341SAndroid Build Coastguard Worker    addi.d        a3,        a3,       (2*REST_UNIT_STRIDE+3)<<1  // B
1204*c0909341SAndroid Build Coastguard Worker    addi.w        a5,        a5,       -1
1205*c0909341SAndroid Build Coastguard Worker    vldi          vr10,      0x806
1206*c0909341SAndroid Build Coastguard Worker    vldi          vr11,      0x805
1207*c0909341SAndroid Build Coastguard Worker    vldi          vr22,      0x406
1208*c0909341SAndroid Build Coastguard Worker
1209*c0909341SAndroid Build Coastguard Worker.LBS5SGF_V_H:
1210*c0909341SAndroid Build Coastguard Worker    addi.d        t0,        a0,       0
1211*c0909341SAndroid Build Coastguard Worker    addi.d        t1,        a1,       0
1212*c0909341SAndroid Build Coastguard Worker    addi.d        t2,        a2,       0
1213*c0909341SAndroid Build Coastguard Worker    addi.d        t3,        a3,       0
1214*c0909341SAndroid Build Coastguard Worker    addi.w        t4,        a4,       0
1215*c0909341SAndroid Build Coastguard Worker
1216*c0909341SAndroid Build Coastguard Worker    addi.d        t5,        a0,       384*2
1217*c0909341SAndroid Build Coastguard Worker    addi.d        t6,        a1,       REST_UNIT_STRIDE
1218*c0909341SAndroid Build Coastguard Worker    addi.d        t7,        a2,       REST_UNIT_STRIDE<<2
1219*c0909341SAndroid Build Coastguard Worker    addi.d        t8,        a3,       REST_UNIT_STRIDE<<1   // B
1220*c0909341SAndroid Build Coastguard Worker.LBS5SGF_V_W:
1221*c0909341SAndroid Build Coastguard Worker    // a
1222*c0909341SAndroid Build Coastguard Worker    vld           vr0,       t3,       -REST_UNIT_STRIDE*2
1223*c0909341SAndroid Build Coastguard Worker    vld           vr1,       t3,       REST_UNIT_STRIDE*2
1224*c0909341SAndroid Build Coastguard Worker    vld           vr2,       t3,       (-REST_UNIT_STRIDE-1)*2
1225*c0909341SAndroid Build Coastguard Worker    vld           vr3,       t3,       (REST_UNIT_STRIDE-1)*2
1226*c0909341SAndroid Build Coastguard Worker    vld           vr4,       t3,       (1-REST_UNIT_STRIDE)*2
1227*c0909341SAndroid Build Coastguard Worker    vld           vr5,       t3,       (1+REST_UNIT_STRIDE)*2
1228*c0909341SAndroid Build Coastguard Worker    vaddwev.w.h   vr6,       vr0,      vr1
1229*c0909341SAndroid Build Coastguard Worker    vaddwod.w.h   vr7,       vr0,      vr1
1230*c0909341SAndroid Build Coastguard Worker    vmul.w        vr6,       vr6,      vr10
1231*c0909341SAndroid Build Coastguard Worker    vmul.w        vr7,       vr7,      vr10
1232*c0909341SAndroid Build Coastguard Worker    vaddwev.w.h   vr8,       vr2,      vr3
1233*c0909341SAndroid Build Coastguard Worker    vaddwod.w.h   vr9,       vr2,      vr3
1234*c0909341SAndroid Build Coastguard Worker    vaddwev.w.h   vr12,      vr4,      vr5
1235*c0909341SAndroid Build Coastguard Worker    vaddwod.w.h   vr13,      vr4,      vr5
1236*c0909341SAndroid Build Coastguard Worker    vadd.w        vr8,       vr8,      vr12
1237*c0909341SAndroid Build Coastguard Worker    vadd.w        vr9,       vr9,      vr13
1238*c0909341SAndroid Build Coastguard Worker    vmadd.w       vr6,       vr8,      vr11
1239*c0909341SAndroid Build Coastguard Worker    vmadd.w       vr7,       vr9,      vr11
1240*c0909341SAndroid Build Coastguard Worker    vilvl.w       vr18,      vr7,      vr6
1241*c0909341SAndroid Build Coastguard Worker    vilvh.w       vr19,      vr7,      vr6
1242*c0909341SAndroid Build Coastguard Worker    // b
1243*c0909341SAndroid Build Coastguard Worker    vld           vr0,       t2,       -REST_UNIT_STRIDE*4
1244*c0909341SAndroid Build Coastguard Worker    vld           vr1,       t2,       -REST_UNIT_STRIDE*4+16
1245*c0909341SAndroid Build Coastguard Worker    vld           vr2,       t2,       REST_UNIT_STRIDE*4
1246*c0909341SAndroid Build Coastguard Worker    vld           vr3,       t2,       REST_UNIT_STRIDE*4+16
1247*c0909341SAndroid Build Coastguard Worker    vld           vr4,       t2,       (-REST_UNIT_STRIDE-1)*4
1248*c0909341SAndroid Build Coastguard Worker    vld           vr5,       t2,       (-REST_UNIT_STRIDE-1)*4+16
1249*c0909341SAndroid Build Coastguard Worker    vld           vr8,       t2,       (REST_UNIT_STRIDE-1)*4
1250*c0909341SAndroid Build Coastguard Worker    vld           vr9,       t2,       (REST_UNIT_STRIDE-1)*4+16
1251*c0909341SAndroid Build Coastguard Worker    vld           vr12,      t2,       (1-REST_UNIT_STRIDE)*4
1252*c0909341SAndroid Build Coastguard Worker    vld           vr13,      t2,       (1-REST_UNIT_STRIDE)*4+16
1253*c0909341SAndroid Build Coastguard Worker    vld           vr14,      t2,       (1+REST_UNIT_STRIDE)*4
1254*c0909341SAndroid Build Coastguard Worker    vld           vr15,      t2,       (1+REST_UNIT_STRIDE)*4+16
1255*c0909341SAndroid Build Coastguard Worker    vadd.w        vr0,       vr0,      vr2  // 0 1 2 3
1256*c0909341SAndroid Build Coastguard Worker    vadd.w        vr1,       vr1,      vr3  // 4 5 6 7
1257*c0909341SAndroid Build Coastguard Worker    vmul.w        vr20,      vr0,      vr10
1258*c0909341SAndroid Build Coastguard Worker    vmul.w        vr21,      vr1,      vr10
1259*c0909341SAndroid Build Coastguard Worker    vadd.w        vr4,       vr4,      vr8  // 0 1 2 3
1260*c0909341SAndroid Build Coastguard Worker    vadd.w        vr5,       vr5,      vr9  // 4 5 6 7
1261*c0909341SAndroid Build Coastguard Worker    vadd.w        vr12,      vr12,     vr14
1262*c0909341SAndroid Build Coastguard Worker    vadd.w        vr13,      vr13,     vr15
1263*c0909341SAndroid Build Coastguard Worker    vadd.w        vr12,      vr12,     vr4
1264*c0909341SAndroid Build Coastguard Worker    vadd.w        vr13,      vr13,     vr5
1265*c0909341SAndroid Build Coastguard Worker    vmadd.w       vr20,      vr12,     vr11
1266*c0909341SAndroid Build Coastguard Worker    vmadd.w       vr21,      vr13,     vr11
1267*c0909341SAndroid Build Coastguard Worker    vld           vr2,       t1,       0
1268*c0909341SAndroid Build Coastguard Worker    vsllwil.hu.bu vr2,       vr2,      0
1269*c0909341SAndroid Build Coastguard Worker    vsllwil.wu.hu vr3,       vr2,      0
1270*c0909341SAndroid Build Coastguard Worker    vexth.wu.hu   vr4,       vr2
1271*c0909341SAndroid Build Coastguard Worker    vmadd.w       vr20,      vr18,     vr3
1272*c0909341SAndroid Build Coastguard Worker    vmadd.w       vr21,      vr19,     vr4
1273*c0909341SAndroid Build Coastguard Worker    vssrlrni.h.w  vr21,      vr20,     9
1274*c0909341SAndroid Build Coastguard Worker    vst           vr21,      t0,       0
1275*c0909341SAndroid Build Coastguard Worker
1276*c0909341SAndroid Build Coastguard Worker    addi.d        t1,        t1,       8
1277*c0909341SAndroid Build Coastguard Worker    addi.d        t2,        t2,       32
1278*c0909341SAndroid Build Coastguard Worker    addi.d        t3,        t3,       16
1279*c0909341SAndroid Build Coastguard Worker
1280*c0909341SAndroid Build Coastguard Worker    // a
1281*c0909341SAndroid Build Coastguard Worker    vld           vr0,       t8,       0
1282*c0909341SAndroid Build Coastguard Worker    vld           vr1,       t8,       -2
1283*c0909341SAndroid Build Coastguard Worker    vld           vr2,       t8,       2
1284*c0909341SAndroid Build Coastguard Worker    vmulwev.w.h   vr3,       vr0,      vr22
1285*c0909341SAndroid Build Coastguard Worker    vmulwod.w.h   vr4,       vr0,      vr22
1286*c0909341SAndroid Build Coastguard Worker    vaddwev.w.h   vr5,       vr1,      vr2
1287*c0909341SAndroid Build Coastguard Worker    vaddwod.w.h   vr6,       vr1,      vr2
1288*c0909341SAndroid Build Coastguard Worker    vmadd.w       vr3,       vr5,      vr11
1289*c0909341SAndroid Build Coastguard Worker    vmadd.w       vr4,       vr6,      vr11
1290*c0909341SAndroid Build Coastguard Worker    vilvl.w       vr19,      vr4,      vr3
1291*c0909341SAndroid Build Coastguard Worker    vilvh.w       vr20,      vr4,      vr3
1292*c0909341SAndroid Build Coastguard Worker    // b
1293*c0909341SAndroid Build Coastguard Worker    vld           vr0,       t7,       0
1294*c0909341SAndroid Build Coastguard Worker    vld           vr1,       t7,       -4
1295*c0909341SAndroid Build Coastguard Worker    vld           vr2,       t7,       4
1296*c0909341SAndroid Build Coastguard Worker    vld           vr5,       t7,       16
1297*c0909341SAndroid Build Coastguard Worker    vld           vr6,       t7,       12
1298*c0909341SAndroid Build Coastguard Worker    vld           vr7,       t7,       20
1299*c0909341SAndroid Build Coastguard Worker    vmul.w        vr8,       vr0,      vr10
1300*c0909341SAndroid Build Coastguard Worker    vmul.w        vr9,       vr5,      vr10
1301*c0909341SAndroid Build Coastguard Worker    vadd.w        vr12,      vr1,      vr2
1302*c0909341SAndroid Build Coastguard Worker    vadd.w        vr13,      vr6,      vr7
1303*c0909341SAndroid Build Coastguard Worker    vmadd.w       vr8,       vr12,     vr11
1304*c0909341SAndroid Build Coastguard Worker    vmadd.w       vr9,       vr13,     vr11
1305*c0909341SAndroid Build Coastguard Worker    vld           vr2,       t6,       0
1306*c0909341SAndroid Build Coastguard Worker    vsllwil.hu.bu vr2,       vr2,      0
1307*c0909341SAndroid Build Coastguard Worker    vsllwil.wu.hu vr3,       vr2,      0
1308*c0909341SAndroid Build Coastguard Worker    vexth.wu.hu   vr4,       vr2
1309*c0909341SAndroid Build Coastguard Worker    vmadd.w       vr8,       vr19,     vr3
1310*c0909341SAndroid Build Coastguard Worker    vmadd.w       vr9,       vr20,     vr4
1311*c0909341SAndroid Build Coastguard Worker    vssrlrni.h.w  vr9,       vr8,      8
1312*c0909341SAndroid Build Coastguard Worker    vst           vr9,       t0,       384*2
1313*c0909341SAndroid Build Coastguard Worker
1314*c0909341SAndroid Build Coastguard Worker    addi.d        t0,        t0,       16
1315*c0909341SAndroid Build Coastguard Worker    addi.d        t8,        t8,       16
1316*c0909341SAndroid Build Coastguard Worker    addi.d        t7,        t7,       32
1317*c0909341SAndroid Build Coastguard Worker    addi.d        t6,        t6,       8
1318*c0909341SAndroid Build Coastguard Worker    addi.w        t4,        t4,       -8
1319*c0909341SAndroid Build Coastguard Worker    blt           zero,      t4,       .LBS5SGF_V_W
1320*c0909341SAndroid Build Coastguard Worker
1321*c0909341SAndroid Build Coastguard Worker    addi.w        a5,        a5,       -2
1322*c0909341SAndroid Build Coastguard Worker    addi.d        a0,        a0,       384*4                // dst
1323*c0909341SAndroid Build Coastguard Worker    addi.d        a1,        a1,       REST_UNIT_STRIDE<<1  // src
1324*c0909341SAndroid Build Coastguard Worker    addi.d        a2,        a2,       REST_UNIT_STRIDE<<2  //
1325*c0909341SAndroid Build Coastguard Worker    addi.d        a2,        a2,       REST_UNIT_STRIDE<<2
1326*c0909341SAndroid Build Coastguard Worker    addi.d        a3,        a3,       REST_UNIT_STRIDE<<2  //
1327*c0909341SAndroid Build Coastguard Worker    blt           zero,      a5,       .LBS5SGF_V_H
1328*c0909341SAndroid Build Coastguard Worker    bnez          a5,        .LBS5SGF_END
1329*c0909341SAndroid Build Coastguard Worker.LBS5SGF_V_W1:
1330*c0909341SAndroid Build Coastguard Worker    // a
1331*c0909341SAndroid Build Coastguard Worker    vld           vr0,       a3,       -REST_UNIT_STRIDE*2
1332*c0909341SAndroid Build Coastguard Worker    vld           vr1,       a3,       REST_UNIT_STRIDE*2
1333*c0909341SAndroid Build Coastguard Worker    vld           vr2,       a3,       (-REST_UNIT_STRIDE-1)*2
1334*c0909341SAndroid Build Coastguard Worker    vld           vr3,       a3,       (REST_UNIT_STRIDE-1)*2
1335*c0909341SAndroid Build Coastguard Worker    vld           vr4,       a3,       (1-REST_UNIT_STRIDE)*2
1336*c0909341SAndroid Build Coastguard Worker    vld           vr5,       a3,       (1+REST_UNIT_STRIDE)*2
1337*c0909341SAndroid Build Coastguard Worker    vaddwev.w.h   vr6,       vr0,      vr1
1338*c0909341SAndroid Build Coastguard Worker    vaddwod.w.h   vr7,       vr0,      vr1
1339*c0909341SAndroid Build Coastguard Worker    vmul.w        vr6,       vr6,      vr10
1340*c0909341SAndroid Build Coastguard Worker    vmul.w        vr7,       vr7,      vr10
1341*c0909341SAndroid Build Coastguard Worker    vaddwev.w.h   vr8,       vr2,      vr3
1342*c0909341SAndroid Build Coastguard Worker    vaddwod.w.h   vr9,       vr2,      vr3
1343*c0909341SAndroid Build Coastguard Worker    vaddwev.w.h   vr12,      vr4,      vr5
1344*c0909341SAndroid Build Coastguard Worker    vaddwod.w.h   vr13,      vr4,      vr5
1345*c0909341SAndroid Build Coastguard Worker    vadd.w        vr8,       vr8,      vr12
1346*c0909341SAndroid Build Coastguard Worker    vadd.w        vr9,       vr9,      vr13
1347*c0909341SAndroid Build Coastguard Worker    vmadd.w       vr6,       vr8,      vr11
1348*c0909341SAndroid Build Coastguard Worker    vmadd.w       vr7,       vr9,      vr11
1349*c0909341SAndroid Build Coastguard Worker    vilvl.w       vr18,      vr7,      vr6
1350*c0909341SAndroid Build Coastguard Worker    vilvh.w       vr19,      vr7,      vr6
1351*c0909341SAndroid Build Coastguard Worker    // b
1352*c0909341SAndroid Build Coastguard Worker    vld           vr0,       a2,       -REST_UNIT_STRIDE*4
1353*c0909341SAndroid Build Coastguard Worker    vld           vr1,       a2,       -REST_UNIT_STRIDE*4+16
1354*c0909341SAndroid Build Coastguard Worker    vld           vr2,       a2,       REST_UNIT_STRIDE*4
1355*c0909341SAndroid Build Coastguard Worker    vld           vr3,       a2,       REST_UNIT_STRIDE*4+16
1356*c0909341SAndroid Build Coastguard Worker    vld           vr4,       a2,       (-REST_UNIT_STRIDE-1)*4
1357*c0909341SAndroid Build Coastguard Worker    vld           vr5,       a2,       (-REST_UNIT_STRIDE-1)*4+16
1358*c0909341SAndroid Build Coastguard Worker    vld           vr8,       a2,       (REST_UNIT_STRIDE-1)*4
1359*c0909341SAndroid Build Coastguard Worker    vld           vr9,       a2,       (REST_UNIT_STRIDE-1)*4+16
1360*c0909341SAndroid Build Coastguard Worker    vld           vr12,      a2,       (1-REST_UNIT_STRIDE)*4
1361*c0909341SAndroid Build Coastguard Worker    vld           vr13,      a2,       (1-REST_UNIT_STRIDE)*4+16
1362*c0909341SAndroid Build Coastguard Worker    vld           vr14,      a2,       (1+REST_UNIT_STRIDE)*4
1363*c0909341SAndroid Build Coastguard Worker    vld           vr15,      a2,       (1+REST_UNIT_STRIDE)*4+16
1364*c0909341SAndroid Build Coastguard Worker    vadd.w        vr0,       vr0,      vr2  // 0 1 2 3
1365*c0909341SAndroid Build Coastguard Worker    vadd.w        vr1,       vr1,      vr3  // 4 5 6 7
1366*c0909341SAndroid Build Coastguard Worker    vmul.w        vr20,      vr0,      vr10
1367*c0909341SAndroid Build Coastguard Worker    vmul.w        vr21,      vr1,      vr10
1368*c0909341SAndroid Build Coastguard Worker    vadd.w        vr4,       vr4,      vr8  // 0 1 2 3
1369*c0909341SAndroid Build Coastguard Worker    vadd.w        vr5,       vr5,      vr9  // 4 5 6 7
1370*c0909341SAndroid Build Coastguard Worker    vadd.w        vr12,      vr12,     vr14
1371*c0909341SAndroid Build Coastguard Worker    vadd.w        vr13,      vr13,     vr15
1372*c0909341SAndroid Build Coastguard Worker    vadd.w        vr12,      vr12,     vr4
1373*c0909341SAndroid Build Coastguard Worker    vadd.w        vr13,      vr13,     vr5
1374*c0909341SAndroid Build Coastguard Worker    vmadd.w       vr20,      vr12,     vr11
1375*c0909341SAndroid Build Coastguard Worker    vmadd.w       vr21,      vr13,     vr11
1376*c0909341SAndroid Build Coastguard Worker    vld           vr2,       a1,       0
1377*c0909341SAndroid Build Coastguard Worker    vsllwil.hu.bu vr2,       vr2,      0
1378*c0909341SAndroid Build Coastguard Worker    vsllwil.wu.hu vr3,       vr2,      0
1379*c0909341SAndroid Build Coastguard Worker    vexth.wu.hu   vr4,       vr2
1380*c0909341SAndroid Build Coastguard Worker    vmadd.w       vr20,      vr18,     vr3
1381*c0909341SAndroid Build Coastguard Worker    vmadd.w       vr21,      vr19,     vr4
1382*c0909341SAndroid Build Coastguard Worker    vssrlrni.h.w  vr21,      vr20,     9
1383*c0909341SAndroid Build Coastguard Worker    vst           vr21,      a0,       0
1384*c0909341SAndroid Build Coastguard Worker    addi.d        a3,        a3,       16
1385*c0909341SAndroid Build Coastguard Worker    addi.d        a2,        a2,       32
1386*c0909341SAndroid Build Coastguard Worker    addi.d        a1,        a1,       8
1387*c0909341SAndroid Build Coastguard Worker    addi.d        a0,        a0,       16
1388*c0909341SAndroid Build Coastguard Worker    addi.w        a4,        a4,       -8
1389*c0909341SAndroid Build Coastguard Worker    blt           zero,      a4,       .LBS5SGF_V_W1
1390*c0909341SAndroid Build Coastguard Worker.LBS5SGF_END:
1391*c0909341SAndroid Build Coastguard Workerendfunc
1392*c0909341SAndroid Build Coastguard Worker
1393*c0909341SAndroid Build Coastguard Worker/*
1394*c0909341SAndroid Build Coastguard Workervoid dav1d_sgr_mix_finish_lsx(uint8_t *p, const ptrdiff_t stride,
1395*c0909341SAndroid Build Coastguard Worker                              const int16_t *dst0, const int16_t *dst1,
1396*c0909341SAndroid Build Coastguard Worker                              const int w0, const int w1,
1397*c0909341SAndroid Build Coastguard Worker                              const int w, const int h);
1398*c0909341SAndroid Build Coastguard Worker*/
1399*c0909341SAndroid Build Coastguard Workerfunction sgr_mix_finish_8bpc_lsx
1400*c0909341SAndroid Build Coastguard Worker    vreplgr2vr.w  vr3,     a4            // w0
1401*c0909341SAndroid Build Coastguard Worker    vreplgr2vr.w  vr13,    a5            // w1
1402*c0909341SAndroid Build Coastguard Worker    andi          t4,      a6,       0x7
1403*c0909341SAndroid Build Coastguard Worker    sub.w         t5,      a6,       t4
1404*c0909341SAndroid Build Coastguard Worker
1405*c0909341SAndroid Build Coastguard Worker    beq           zero,    t5,      .LSGRMIX_REM
1406*c0909341SAndroid Build Coastguard Worker
1407*c0909341SAndroid Build Coastguard Worker.LSGRMIX_H:
1408*c0909341SAndroid Build Coastguard Worker    addi.d        t0,      a0,       0
1409*c0909341SAndroid Build Coastguard Worker    addi.d        t1,      a2,       0   // dst0
1410*c0909341SAndroid Build Coastguard Worker    addi.d        t3,      a3,       0   // dst1
1411*c0909341SAndroid Build Coastguard Worker    addi.w        t2,      t5,       0
1412*c0909341SAndroid Build Coastguard Worker    andi          t4,      a6,       0x7
1413*c0909341SAndroid Build Coastguard Worker.LSGRMIX_W:
1414*c0909341SAndroid Build Coastguard Worker    vld           vr0,     t0,       0
1415*c0909341SAndroid Build Coastguard Worker    vld           vr1,     t1,       0
1416*c0909341SAndroid Build Coastguard Worker    vld           vr10,    t3,       0
1417*c0909341SAndroid Build Coastguard Worker    vsllwil.hu.bu vr2,     vr0,      4   // u 8 h
1418*c0909341SAndroid Build Coastguard Worker    vsllwil.wu.hu vr4,     vr2,      0   // u 0 1 2 3
1419*c0909341SAndroid Build Coastguard Worker    vexth.wu.hu   vr5,     vr2           // u 4 5 6 7
1420*c0909341SAndroid Build Coastguard Worker    vslli.w       vr6,     vr4,      7
1421*c0909341SAndroid Build Coastguard Worker    vslli.w       vr7,     vr5,      7
1422*c0909341SAndroid Build Coastguard Worker    vsllwil.w.h   vr8,     vr1,      0   // dst0
1423*c0909341SAndroid Build Coastguard Worker    vexth.w.h     vr9,     vr1           // dst0
1424*c0909341SAndroid Build Coastguard Worker    vsub.w        vr8,     vr8,      vr4
1425*c0909341SAndroid Build Coastguard Worker    vsub.w        vr9,     vr9,      vr5
1426*c0909341SAndroid Build Coastguard Worker    vmadd.w       vr6,     vr8,      vr3  // v 0 - 3
1427*c0909341SAndroid Build Coastguard Worker    vmadd.w       vr7,     vr9,      vr3  // v 4 - 7
1428*c0909341SAndroid Build Coastguard Worker
1429*c0909341SAndroid Build Coastguard Worker    vsllwil.w.h   vr11,    vr10,     0    // dst1
1430*c0909341SAndroid Build Coastguard Worker    vexth.w.h     vr12,    vr10           // dst1
1431*c0909341SAndroid Build Coastguard Worker    vsub.w        vr11,    vr11,     vr4
1432*c0909341SAndroid Build Coastguard Worker    vsub.w        vr12,    vr12,     vr5
1433*c0909341SAndroid Build Coastguard Worker    vmadd.w       vr6,     vr11,     vr13
1434*c0909341SAndroid Build Coastguard Worker    vmadd.w       vr7,     vr12,     vr13
1435*c0909341SAndroid Build Coastguard Worker
1436*c0909341SAndroid Build Coastguard Worker    vssrarni.hu.w vr7,     vr6,      11
1437*c0909341SAndroid Build Coastguard Worker    vssrlni.bu.h  vr7,     vr7,      0
1438*c0909341SAndroid Build Coastguard Worker    vstelm.d      vr7,     t0,       0,    0
1439*c0909341SAndroid Build Coastguard Worker    addi.d        t0,      t0,       8
1440*c0909341SAndroid Build Coastguard Worker    addi.d        t1,      t1,       16
1441*c0909341SAndroid Build Coastguard Worker    addi.d        t3,      t3,       16
1442*c0909341SAndroid Build Coastguard Worker    addi.d        t2,      t2,       -8
1443*c0909341SAndroid Build Coastguard Worker    bne           zero,    t2,       .LSGRMIX_W
1444*c0909341SAndroid Build Coastguard Worker
1445*c0909341SAndroid Build Coastguard Worker    beq           t4,      zero,     .LSGRMIX_W8
1446*c0909341SAndroid Build Coastguard Worker
1447*c0909341SAndroid Build Coastguard Worker    vld           vr0,     t0,       0
1448*c0909341SAndroid Build Coastguard Worker    vld           vr1,     t1,       0
1449*c0909341SAndroid Build Coastguard Worker    vld           vr10,    t3,       0
1450*c0909341SAndroid Build Coastguard Worker    vsllwil.hu.bu vr2,     vr0,      4   // u 8 h
1451*c0909341SAndroid Build Coastguard Worker    vsllwil.wu.hu vr4,     vr2,      0   // p
1452*c0909341SAndroid Build Coastguard Worker    vexth.wu.hu   vr5,     vr2           // p
1453*c0909341SAndroid Build Coastguard Worker    vslli.w       vr6,     vr4,      7
1454*c0909341SAndroid Build Coastguard Worker    vslli.w       vr7,     vr5,      7
1455*c0909341SAndroid Build Coastguard Worker    vsllwil.w.h   vr8,     vr1,      0   // dst
1456*c0909341SAndroid Build Coastguard Worker    vexth.w.h     vr9,     vr1           // dst
1457*c0909341SAndroid Build Coastguard Worker    vsub.w        vr8,     vr8,      vr4
1458*c0909341SAndroid Build Coastguard Worker    vsub.w        vr9,     vr9,      vr5
1459*c0909341SAndroid Build Coastguard Worker    vmadd.w       vr6,     vr8,      vr3  // v 0 - 3
1460*c0909341SAndroid Build Coastguard Worker    vmadd.w       vr7,     vr9,      vr3  // v 4 - 7
1461*c0909341SAndroid Build Coastguard Worker
1462*c0909341SAndroid Build Coastguard Worker    vsllwil.w.h   vr11,    vr10,     0    // dst1
1463*c0909341SAndroid Build Coastguard Worker    vexth.w.h     vr12,    vr10           // dst1
1464*c0909341SAndroid Build Coastguard Worker    vsub.w        vr11,    vr11,     vr4
1465*c0909341SAndroid Build Coastguard Worker    vsub.w        vr12,    vr12,     vr5
1466*c0909341SAndroid Build Coastguard Worker    vmadd.w       vr6,     vr11,     vr13
1467*c0909341SAndroid Build Coastguard Worker    vmadd.w       vr7,     vr12,     vr13
1468*c0909341SAndroid Build Coastguard Worker
1469*c0909341SAndroid Build Coastguard Worker    vssrarni.hu.w vr7,     vr6,      11
1470*c0909341SAndroid Build Coastguard Worker    vssrlni.bu.h  vr7,     vr7,      0
1471*c0909341SAndroid Build Coastguard Worker
1472*c0909341SAndroid Build Coastguard Worker.LSGRMIX_ST:
1473*c0909341SAndroid Build Coastguard Worker    vstelm.b      vr7,     t0,       0,    0
1474*c0909341SAndroid Build Coastguard Worker    addi.d        t0,      t0,       1
1475*c0909341SAndroid Build Coastguard Worker    vbsrl.v       vr7,     vr7,      1
1476*c0909341SAndroid Build Coastguard Worker    addi.w        t4,      t4,       -1
1477*c0909341SAndroid Build Coastguard Worker    bnez          t4,      .LSGRMIX_ST
1478*c0909341SAndroid Build Coastguard Worker
1479*c0909341SAndroid Build Coastguard Worker.LSGRMIX_W8:
1480*c0909341SAndroid Build Coastguard Worker    addi.w        a7,      a7,       -1
1481*c0909341SAndroid Build Coastguard Worker    add.d         a0,      a0,       a1
1482*c0909341SAndroid Build Coastguard Worker    addi.d        a2,      a2,       (FILTER_OUT_STRIDE<<1)
1483*c0909341SAndroid Build Coastguard Worker    addi.d        a3,      a3,       (FILTER_OUT_STRIDE<<1)
1484*c0909341SAndroid Build Coastguard Worker    bnez          a7,      .LSGRMIX_H
1485*c0909341SAndroid Build Coastguard Worker    b             .LSGR_MIX_END
1486*c0909341SAndroid Build Coastguard Worker
1487*c0909341SAndroid Build Coastguard Worker.LSGRMIX_REM:
1488*c0909341SAndroid Build Coastguard Worker    andi          t4,      a6,       0x7
1489*c0909341SAndroid Build Coastguard Worker    vld           vr0,     a0,       0
1490*c0909341SAndroid Build Coastguard Worker    vld           vr1,     a2,       0
1491*c0909341SAndroid Build Coastguard Worker    vld           vr10,    a3,       0
1492*c0909341SAndroid Build Coastguard Worker    vsllwil.hu.bu vr2,     vr0,      4   // u 8 h
1493*c0909341SAndroid Build Coastguard Worker    vsllwil.wu.hu vr4,     vr2,      0   // p
1494*c0909341SAndroid Build Coastguard Worker    vexth.wu.hu   vr5,     vr2           // p
1495*c0909341SAndroid Build Coastguard Worker    vslli.w       vr6,     vr4,      7
1496*c0909341SAndroid Build Coastguard Worker    vslli.w       vr7,     vr5,      7
1497*c0909341SAndroid Build Coastguard Worker    vsllwil.w.h   vr8,     vr1,      0   // dst
1498*c0909341SAndroid Build Coastguard Worker    vexth.w.h     vr9,     vr1           // dst
1499*c0909341SAndroid Build Coastguard Worker    vsub.w        vr8,     vr8,      vr4
1500*c0909341SAndroid Build Coastguard Worker    vsub.w        vr9,     vr9,      vr5
1501*c0909341SAndroid Build Coastguard Worker    vmadd.w       vr6,     vr8,      vr3  // v 0 - 3
1502*c0909341SAndroid Build Coastguard Worker    vmadd.w       vr7,     vr9,      vr3  // v 4 - 7
1503*c0909341SAndroid Build Coastguard Worker
1504*c0909341SAndroid Build Coastguard Worker    vsllwil.w.h   vr11,    vr10,     0    // dst1
1505*c0909341SAndroid Build Coastguard Worker    vexth.w.h     vr12,    vr10           // dst1
1506*c0909341SAndroid Build Coastguard Worker    vsub.w        vr11,    vr11,     vr4
1507*c0909341SAndroid Build Coastguard Worker    vsub.w        vr12,    vr12,     vr5
1508*c0909341SAndroid Build Coastguard Worker    vmadd.w       vr6,     vr11,     vr13
1509*c0909341SAndroid Build Coastguard Worker    vmadd.w       vr7,     vr12,     vr13
1510*c0909341SAndroid Build Coastguard Worker
1511*c0909341SAndroid Build Coastguard Worker    vssrarni.hu.w vr7,     vr6,      11
1512*c0909341SAndroid Build Coastguard Worker    vssrlni.bu.h  vr7,     vr7,      0
1513*c0909341SAndroid Build Coastguard Worker    addi.d        t0,      a0,       0
1514*c0909341SAndroid Build Coastguard Worker.LSGRMIX_REM_ST:
1515*c0909341SAndroid Build Coastguard Worker    vstelm.b      vr7,     t0,       0,    0
1516*c0909341SAndroid Build Coastguard Worker    addi.d        t0,      t0,       1
1517*c0909341SAndroid Build Coastguard Worker    vbsrl.v       vr7,     vr7,      1
1518*c0909341SAndroid Build Coastguard Worker    addi.w        t4,      t4,       -1
1519*c0909341SAndroid Build Coastguard Worker    bnez          t4,      .LSGRMIX_REM_ST
1520*c0909341SAndroid Build Coastguard Worker
1521*c0909341SAndroid Build Coastguard Worker    addi.w        a7,      a7,       -1
1522*c0909341SAndroid Build Coastguard Worker    add.d         a0,      a0,       a1
1523*c0909341SAndroid Build Coastguard Worker    addi.d        a2,      a2,       (FILTER_OUT_STRIDE<<1)
1524*c0909341SAndroid Build Coastguard Worker    addi.d        a3,      a3,       (FILTER_OUT_STRIDE<<1)
1525*c0909341SAndroid Build Coastguard Worker    bnez          a7,      .LSGRMIX_REM
1526*c0909341SAndroid Build Coastguard Worker
1527*c0909341SAndroid Build Coastguard Worker.LSGR_MIX_END:
1528*c0909341SAndroid Build Coastguard Workerendfunc
1529*c0909341SAndroid Build Coastguard Worker
1530*c0909341SAndroid Build Coastguard Worker.macro MADD_HU_BU_LASX in0, in1, out0, out1
1531*c0909341SAndroid Build Coastguard Worker    xvsllwil.hu.bu xr12,     \in0,     0
1532*c0909341SAndroid Build Coastguard Worker    xvexth.hu.bu   xr13,     \in0
1533*c0909341SAndroid Build Coastguard Worker    xvmadd.h       \out0,    xr12,     \in1
1534*c0909341SAndroid Build Coastguard Worker    xvmadd.h       \out1,    xr13,     \in1
1535*c0909341SAndroid Build Coastguard Worker.endm
1536*c0909341SAndroid Build Coastguard Worker
1537*c0909341SAndroid Build Coastguard Workerconst wiener_shuf_lasx
1538*c0909341SAndroid Build Coastguard Worker.byte 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18
1539*c0909341SAndroid Build Coastguard Worker.byte 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18
1540*c0909341SAndroid Build Coastguard Workerendconst
1541*c0909341SAndroid Build Coastguard Worker
1542*c0909341SAndroid Build Coastguard Workerfunction wiener_filter_h_8bpc_lasx
1543*c0909341SAndroid Build Coastguard Worker    addi.d         sp,       sp,       -40
1544*c0909341SAndroid Build Coastguard Worker    fst.d          f24,      sp,       0
1545*c0909341SAndroid Build Coastguard Worker    fst.d          f25,      sp,       8
1546*c0909341SAndroid Build Coastguard Worker    fst.d          f26,      sp,       16
1547*c0909341SAndroid Build Coastguard Worker    fst.d          f27,      sp,       24
1548*c0909341SAndroid Build Coastguard Worker    fst.d          f28,      sp,       32
1549*c0909341SAndroid Build Coastguard Worker    li.w           t7,       1<<14          // clip_limit
1550*c0909341SAndroid Build Coastguard Worker
1551*c0909341SAndroid Build Coastguard Worker    la.local       t1,       wiener_shuf_lasx
1552*c0909341SAndroid Build Coastguard Worker    xvld           xr4,      t1,       0
1553*c0909341SAndroid Build Coastguard Worker    vld            vr27,     a2,       0    // filter[0][k]
1554*c0909341SAndroid Build Coastguard Worker    xvpermi.q      xr14,     xr27,     0b00000000
1555*c0909341SAndroid Build Coastguard Worker    xvrepl128vei.h xr21,     xr14,     0
1556*c0909341SAndroid Build Coastguard Worker    xvrepl128vei.h xr22,     xr14,     1
1557*c0909341SAndroid Build Coastguard Worker    xvrepl128vei.h xr23,     xr14,     2
1558*c0909341SAndroid Build Coastguard Worker    xvrepl128vei.h xr24,     xr14,     3
1559*c0909341SAndroid Build Coastguard Worker    xvrepl128vei.h xr25,     xr14,     4
1560*c0909341SAndroid Build Coastguard Worker    xvrepl128vei.h xr26,     xr14,     5
1561*c0909341SAndroid Build Coastguard Worker    xvrepl128vei.h xr27,     xr14,     6
1562*c0909341SAndroid Build Coastguard Worker    xvreplgr2vr.w  xr0,      t7
1563*c0909341SAndroid Build Coastguard Worker
1564*c0909341SAndroid Build Coastguard Worker.WIENER_FILTER_H_H_LASX:
1565*c0909341SAndroid Build Coastguard Worker    addi.w         a4,       a4,       -1    // h
1566*c0909341SAndroid Build Coastguard Worker    addi.w         t0,       a3,       0     // w
1567*c0909341SAndroid Build Coastguard Worker    addi.d         t1,       a1,       0     // tmp_ptr
1568*c0909341SAndroid Build Coastguard Worker    addi.d         t2,       a0,       0     // hor_ptr
1569*c0909341SAndroid Build Coastguard Worker
1570*c0909341SAndroid Build Coastguard Worker.WIENER_FILTER_H_W_LASX:
1571*c0909341SAndroid Build Coastguard Worker    addi.w         t0,       t0,       -32
1572*c0909341SAndroid Build Coastguard Worker    xvld           xr5,      t1,       0
1573*c0909341SAndroid Build Coastguard Worker    xvld           xr13,     t1,       16
1574*c0909341SAndroid Build Coastguard Worker
1575*c0909341SAndroid Build Coastguard Worker    xvsubi.bu      xr14,     xr4,      2
1576*c0909341SAndroid Build Coastguard Worker    xvsubi.bu      xr15,     xr4,      1
1577*c0909341SAndroid Build Coastguard Worker    xvshuf.b       xr6,      xr13,     xr5,     xr14  // 1 ... 8, 9 ... 16
1578*c0909341SAndroid Build Coastguard Worker    xvshuf.b       xr7,      xr13,     xr5,     xr15  // 2 ... 9, 10 ... 17
1579*c0909341SAndroid Build Coastguard Worker    xvshuf.b       xr8,      xr13,     xr5,     xr4   // 3 ... 10, 11 ... 18
1580*c0909341SAndroid Build Coastguard Worker    xvaddi.bu      xr14,     xr4,      1
1581*c0909341SAndroid Build Coastguard Worker    xvaddi.bu      xr15,     xr4,      2
1582*c0909341SAndroid Build Coastguard Worker    xvshuf.b       xr9,      xr13,     xr5,     xr14  // 4 ... 11, 12 ... 19
1583*c0909341SAndroid Build Coastguard Worker    xvshuf.b       xr10,     xr13,     xr5,     xr15  // 5 ... 12, 13 ... 20
1584*c0909341SAndroid Build Coastguard Worker    xvaddi.bu      xr14,     xr4,      3
1585*c0909341SAndroid Build Coastguard Worker    xvshuf.b       xr11,     xr13,     xr5,     xr14  // 6 ... 13, 14 ... 21
1586*c0909341SAndroid Build Coastguard Worker
1587*c0909341SAndroid Build Coastguard Worker    xvsllwil.hu.bu xr15,     xr8,      0    //  3  4  5  6  7  8  9 10
1588*c0909341SAndroid Build Coastguard Worker    xvexth.hu.bu   xr16,     xr8            // 11 12 13 14 15 16 17 18
1589*c0909341SAndroid Build Coastguard Worker    xvsllwil.wu.hu xr17,     xr15,     7    //  3  4  5  6
1590*c0909341SAndroid Build Coastguard Worker    xvexth.wu.hu   xr18,     xr15           //  7  8  9 10
1591*c0909341SAndroid Build Coastguard Worker    xvsllwil.wu.hu xr19,     xr16,     7    // 11 12 13 14
1592*c0909341SAndroid Build Coastguard Worker    xvexth.wu.hu   xr20,     xr16           // 15 16 17 18
1593*c0909341SAndroid Build Coastguard Worker    xvslli.w       xr18,     xr18,     7
1594*c0909341SAndroid Build Coastguard Worker    xvslli.w       xr20,     xr20,     7
1595*c0909341SAndroid Build Coastguard Worker    xvxor.v        xr15,     xr15,     xr15
1596*c0909341SAndroid Build Coastguard Worker    xvxor.v        xr14,     xr14,     xr14
1597*c0909341SAndroid Build Coastguard Worker
1598*c0909341SAndroid Build Coastguard Worker    MADD_HU_BU_LASX xr5,  xr21, xr14, xr15
1599*c0909341SAndroid Build Coastguard Worker    MADD_HU_BU_LASX xr6,  xr22, xr14, xr15
1600*c0909341SAndroid Build Coastguard Worker    MADD_HU_BU_LASX xr7,  xr23, xr14, xr15
1601*c0909341SAndroid Build Coastguard Worker    MADD_HU_BU_LASX xr8,  xr24, xr14, xr15
1602*c0909341SAndroid Build Coastguard Worker    MADD_HU_BU_LASX xr9,  xr25, xr14, xr15
1603*c0909341SAndroid Build Coastguard Worker    MADD_HU_BU_LASX xr10, xr26, xr14, xr15
1604*c0909341SAndroid Build Coastguard Worker    MADD_HU_BU_LASX xr11, xr27, xr14, xr15
1605*c0909341SAndroid Build Coastguard Worker
1606*c0909341SAndroid Build Coastguard Worker    xvsllwil.w.h   xr5,      xr14,     0   //  0  1  2  3
1607*c0909341SAndroid Build Coastguard Worker    xvexth.w.h     xr6,      xr14          //  4  5  6  7
1608*c0909341SAndroid Build Coastguard Worker    xvsllwil.w.h   xr7,      xr15,     0   //  8  9 10 11
1609*c0909341SAndroid Build Coastguard Worker    xvexth.w.h     xr8,      xr15          // 12 13 14 15
1610*c0909341SAndroid Build Coastguard Worker    xvadd.w        xr17,     xr17,     xr5
1611*c0909341SAndroid Build Coastguard Worker    xvadd.w        xr18,     xr18,     xr6
1612*c0909341SAndroid Build Coastguard Worker    xvadd.w        xr19,     xr19,     xr7
1613*c0909341SAndroid Build Coastguard Worker    xvadd.w        xr20,     xr20,     xr8
1614*c0909341SAndroid Build Coastguard Worker    xvadd.w        xr17,     xr17,     xr0
1615*c0909341SAndroid Build Coastguard Worker    xvadd.w        xr18,     xr18,     xr0
1616*c0909341SAndroid Build Coastguard Worker    xvadd.w        xr19,     xr19,     xr0
1617*c0909341SAndroid Build Coastguard Worker    xvadd.w        xr20,     xr20,     xr0
1618*c0909341SAndroid Build Coastguard Worker
1619*c0909341SAndroid Build Coastguard Worker    xvsrli.w       xr1,      xr0,      1
1620*c0909341SAndroid Build Coastguard Worker    xvsubi.wu      xr1,      xr1,      1
1621*c0909341SAndroid Build Coastguard Worker    xvxor.v        xr3,      xr3,      xr3
1622*c0909341SAndroid Build Coastguard Worker    xvsrari.w      xr17,     xr17,     3
1623*c0909341SAndroid Build Coastguard Worker    xvsrari.w      xr18,     xr18,     3
1624*c0909341SAndroid Build Coastguard Worker    xvsrari.w      xr19,     xr19,     3
1625*c0909341SAndroid Build Coastguard Worker    xvsrari.w      xr20,     xr20,     3
1626*c0909341SAndroid Build Coastguard Worker    xvclip.w       xr17,     xr17,     xr3,     xr1
1627*c0909341SAndroid Build Coastguard Worker    xvclip.w       xr18,     xr18,     xr3,     xr1
1628*c0909341SAndroid Build Coastguard Worker    xvclip.w       xr19,     xr19,     xr3,     xr1
1629*c0909341SAndroid Build Coastguard Worker    xvclip.w       xr20,     xr20,     xr3,     xr1
1630*c0909341SAndroid Build Coastguard Worker
1631*c0909341SAndroid Build Coastguard Worker    xvor.v         xr5,      xr17,     xr17
1632*c0909341SAndroid Build Coastguard Worker    xvor.v         xr6,      xr19,     xr19
1633*c0909341SAndroid Build Coastguard Worker    xvpermi.q      xr17,     xr18,     0b00000010
1634*c0909341SAndroid Build Coastguard Worker    xvpermi.q      xr19,     xr20,     0b00000010
1635*c0909341SAndroid Build Coastguard Worker
1636*c0909341SAndroid Build Coastguard Worker    xvst           xr17,     t2,       0
1637*c0909341SAndroid Build Coastguard Worker    xvst           xr19,     t2,       32
1638*c0909341SAndroid Build Coastguard Worker    xvpermi.q      xr18,     xr5,      0b00110001
1639*c0909341SAndroid Build Coastguard Worker    xvpermi.q      xr20,     xr6,      0b00110001
1640*c0909341SAndroid Build Coastguard Worker    xvst           xr18,     t2,       64
1641*c0909341SAndroid Build Coastguard Worker    xvst           xr20,     t2,       96
1642*c0909341SAndroid Build Coastguard Worker    addi.d         t1,       t1,       32
1643*c0909341SAndroid Build Coastguard Worker    addi.d         t2,       t2,       128
1644*c0909341SAndroid Build Coastguard Worker    blt            zero,     t0,       .WIENER_FILTER_H_W_LASX
1645*c0909341SAndroid Build Coastguard Worker
1646*c0909341SAndroid Build Coastguard Worker    addi.d         a1,       a1,       REST_UNIT_STRIDE
1647*c0909341SAndroid Build Coastguard Worker    addi.d         a0,       a0,       (REST_UNIT_STRIDE << 2)
1648*c0909341SAndroid Build Coastguard Worker    bnez           a4,       .WIENER_FILTER_H_H_LASX
1649*c0909341SAndroid Build Coastguard Worker
1650*c0909341SAndroid Build Coastguard Worker    fld.d          f24,      sp,       0
1651*c0909341SAndroid Build Coastguard Worker    fld.d          f25,      sp,       8
1652*c0909341SAndroid Build Coastguard Worker    fld.d          f26,      sp,       16
1653*c0909341SAndroid Build Coastguard Worker    fld.d          f27,      sp,       24
1654*c0909341SAndroid Build Coastguard Worker    fld.d          f28,      sp,       32
1655*c0909341SAndroid Build Coastguard Worker    addi.d         sp,       sp,       40
1656*c0909341SAndroid Build Coastguard Workerendfunc
1657*c0909341SAndroid Build Coastguard Worker
1658*c0909341SAndroid Build Coastguard Worker.macro APPLY_FILTER_LASX in0, in1, in2
1659*c0909341SAndroid Build Coastguard Worker    alsl.d         t7,       \in0,     \in1,    2
1660*c0909341SAndroid Build Coastguard Worker    xvld           xr10,     t7,       0
1661*c0909341SAndroid Build Coastguard Worker    xvld           xr12,     t7,       32
1662*c0909341SAndroid Build Coastguard Worker    xvmadd.w       xr14,     xr10,     \in2
1663*c0909341SAndroid Build Coastguard Worker    xvmadd.w       xr16,     xr12,     \in2
1664*c0909341SAndroid Build Coastguard Worker.endm
1665*c0909341SAndroid Build Coastguard Worker
1666*c0909341SAndroid Build Coastguard Worker.macro wiener_filter_v_8bpc_core_lasx
1667*c0909341SAndroid Build Coastguard Worker    xvreplgr2vr.w  xr14,     t6
1668*c0909341SAndroid Build Coastguard Worker    xvreplgr2vr.w  xr16,     t6
1669*c0909341SAndroid Build Coastguard Worker
1670*c0909341SAndroid Build Coastguard Worker    addi.w         t7,       t2,       0      // j + index k
1671*c0909341SAndroid Build Coastguard Worker    mul.w          t7,       t7,       t8     // (j + index) * REST_UNIT_STRIDE
1672*c0909341SAndroid Build Coastguard Worker    add.w          t7,       t7,       t4     // (j + index) * REST_UNIT_STRIDE + i
1673*c0909341SAndroid Build Coastguard Worker
1674*c0909341SAndroid Build Coastguard Worker    APPLY_FILTER_LASX  t7, a2, xr2
1675*c0909341SAndroid Build Coastguard Worker    APPLY_FILTER_LASX  t8, t7, xr3
1676*c0909341SAndroid Build Coastguard Worker    APPLY_FILTER_LASX  t8, t7, xr4
1677*c0909341SAndroid Build Coastguard Worker    APPLY_FILTER_LASX  t8, t7, xr5
1678*c0909341SAndroid Build Coastguard Worker    APPLY_FILTER_LASX  t8, t7, xr6
1679*c0909341SAndroid Build Coastguard Worker    APPLY_FILTER_LASX  t8, t7, xr7
1680*c0909341SAndroid Build Coastguard Worker    APPLY_FILTER_LASX  t8, t7, xr8
1681*c0909341SAndroid Build Coastguard Worker    xvssrarni.hu.w xr16,     xr14,      11
1682*c0909341SAndroid Build Coastguard Worker    xvpermi.d      xr17,     xr16,      0b11011000
1683*c0909341SAndroid Build Coastguard Worker    xvssrlni.bu.h  xr17,     xr17,      0
1684*c0909341SAndroid Build Coastguard Worker    xvpermi.d      xr17,     xr17,      0b00001000
1685*c0909341SAndroid Build Coastguard Worker.endm
1686*c0909341SAndroid Build Coastguard Worker
1687*c0909341SAndroid Build Coastguard Workerfunction wiener_filter_v_8bpc_lasx
1688*c0909341SAndroid Build Coastguard Worker    li.w          t6,       -(1 << 18)
1689*c0909341SAndroid Build Coastguard Worker
1690*c0909341SAndroid Build Coastguard Worker    li.w          t8,       REST_UNIT_STRIDE
1691*c0909341SAndroid Build Coastguard Worker    ld.h          t0,       a3,       0
1692*c0909341SAndroid Build Coastguard Worker    ld.h          t1,       a3,       2
1693*c0909341SAndroid Build Coastguard Worker    xvreplgr2vr.w xr2,      t0
1694*c0909341SAndroid Build Coastguard Worker    xvreplgr2vr.w xr3,      t1
1695*c0909341SAndroid Build Coastguard Worker    ld.h          t0,       a3,       4
1696*c0909341SAndroid Build Coastguard Worker    ld.h          t1,       a3,       6
1697*c0909341SAndroid Build Coastguard Worker    xvreplgr2vr.w xr4,      t0
1698*c0909341SAndroid Build Coastguard Worker    xvreplgr2vr.w xr5,      t1
1699*c0909341SAndroid Build Coastguard Worker    ld.h          t0,       a3,       8
1700*c0909341SAndroid Build Coastguard Worker    ld.h          t1,       a3,       10
1701*c0909341SAndroid Build Coastguard Worker    xvreplgr2vr.w xr6,      t0
1702*c0909341SAndroid Build Coastguard Worker    xvreplgr2vr.w xr7,      t1
1703*c0909341SAndroid Build Coastguard Worker    ld.h          t0,       a3,       12
1704*c0909341SAndroid Build Coastguard Worker    xvreplgr2vr.w xr8,      t0
1705*c0909341SAndroid Build Coastguard Worker
1706*c0909341SAndroid Build Coastguard Worker    andi          t1,       a4,       0xf
1707*c0909341SAndroid Build Coastguard Worker    sub.w         t0,       a4,       t1    // w-w%16
1708*c0909341SAndroid Build Coastguard Worker    or            t2,       zero,     zero  // j
1709*c0909341SAndroid Build Coastguard Worker    or            t4,       zero,     zero
1710*c0909341SAndroid Build Coastguard Worker    beqz          t0,       .WIENER_FILTER_V_W_LT16_LASX
1711*c0909341SAndroid Build Coastguard Worker
1712*c0909341SAndroid Build Coastguard Worker.WIENER_FILTER_V_H_LASX:
1713*c0909341SAndroid Build Coastguard Worker    andi          t1,       a4,       0xf
1714*c0909341SAndroid Build Coastguard Worker    add.d         t3,       zero,     a0     // p
1715*c0909341SAndroid Build Coastguard Worker    or            t4,       zero,     zero   // i
1716*c0909341SAndroid Build Coastguard Worker
1717*c0909341SAndroid Build Coastguard Worker.WIENER_FILTER_V_W_LASX:
1718*c0909341SAndroid Build Coastguard Worker
1719*c0909341SAndroid Build Coastguard Worker    wiener_filter_v_8bpc_core_lasx
1720*c0909341SAndroid Build Coastguard Worker
1721*c0909341SAndroid Build Coastguard Worker    mul.w         t5,       t2,       a1   // j * stride
1722*c0909341SAndroid Build Coastguard Worker    add.w         t5,       t5,       t4   // j * stride + i
1723*c0909341SAndroid Build Coastguard Worker    add.d         t3,       a0,       t5
1724*c0909341SAndroid Build Coastguard Worker    addi.w        t4,       t4,       16
1725*c0909341SAndroid Build Coastguard Worker    vst           vr17,     t3,       0
1726*c0909341SAndroid Build Coastguard Worker    bne           t0,       t4,       .WIENER_FILTER_V_W_LASX
1727*c0909341SAndroid Build Coastguard Worker
1728*c0909341SAndroid Build Coastguard Worker    beqz          t1,       .WIENER_FILTER_V_W_EQ16_LASX
1729*c0909341SAndroid Build Coastguard Worker
1730*c0909341SAndroid Build Coastguard Worker    wiener_filter_v_8bpc_core_lsx
1731*c0909341SAndroid Build Coastguard Worker
1732*c0909341SAndroid Build Coastguard Worker    addi.d        t3,       t3,       16
1733*c0909341SAndroid Build Coastguard Worker    andi          t1,       a4,       0xf
1734*c0909341SAndroid Build Coastguard Worker
1735*c0909341SAndroid Build Coastguard Worker.WIENER_FILTER_V_ST_REM_LASX:
1736*c0909341SAndroid Build Coastguard Worker    vstelm.b      vr17,     t3,       0,    0
1737*c0909341SAndroid Build Coastguard Worker    vbsrl.v       vr17,     vr17,     1
1738*c0909341SAndroid Build Coastguard Worker    addi.d        t3,       t3,       1
1739*c0909341SAndroid Build Coastguard Worker    addi.w        t1,       t1,       -1
1740*c0909341SAndroid Build Coastguard Worker    bnez          t1,       .WIENER_FILTER_V_ST_REM_LASX
1741*c0909341SAndroid Build Coastguard Worker.WIENER_FILTER_V_W_EQ16_LASX:
1742*c0909341SAndroid Build Coastguard Worker    addi.w        t2,       t2,       1
1743*c0909341SAndroid Build Coastguard Worker    blt           t2,       a5,       .WIENER_FILTER_V_H_LASX
1744*c0909341SAndroid Build Coastguard Worker    b              .WIENER_FILTER_V_LASX_END
1745*c0909341SAndroid Build Coastguard Worker
1746*c0909341SAndroid Build Coastguard Worker.WIENER_FILTER_V_W_LT16_LASX:
1747*c0909341SAndroid Build Coastguard Worker    andi          t1,       a4,       0xf
1748*c0909341SAndroid Build Coastguard Worker    add.d         t3,       zero,     a0
1749*c0909341SAndroid Build Coastguard Worker
1750*c0909341SAndroid Build Coastguard Worker    wiener_filter_v_8bpc_core_lsx
1751*c0909341SAndroid Build Coastguard Worker
1752*c0909341SAndroid Build Coastguard Worker    mul.w         t5,       t2,       a1   // j * stride
1753*c0909341SAndroid Build Coastguard Worker    add.d         t3,       a0,       t5
1754*c0909341SAndroid Build Coastguard Worker
1755*c0909341SAndroid Build Coastguard Worker.WIENER_FILTER_V_ST_REM_1_LASX:
1756*c0909341SAndroid Build Coastguard Worker    vstelm.b      vr17,     t3,       0,    0
1757*c0909341SAndroid Build Coastguard Worker    vbsrl.v       vr17,     vr17,     1
1758*c0909341SAndroid Build Coastguard Worker    addi.d        t3,       t3,       1
1759*c0909341SAndroid Build Coastguard Worker    addi.w        t1,       t1,       -1
1760*c0909341SAndroid Build Coastguard Worker    bnez          t1,       .WIENER_FILTER_V_ST_REM_1_LASX
1761*c0909341SAndroid Build Coastguard Worker
1762*c0909341SAndroid Build Coastguard Worker    addi.w        t2,       t2,       1
1763*c0909341SAndroid Build Coastguard Worker    blt           t2,       a5,       .WIENER_FILTER_V_W_LT16_LASX
1764*c0909341SAndroid Build Coastguard Worker
1765*c0909341SAndroid Build Coastguard Worker.WIENER_FILTER_V_LASX_END:
1766*c0909341SAndroid Build Coastguard Workerendfunc
1767*c0909341SAndroid Build Coastguard Worker
1768*c0909341SAndroid Build Coastguard Workerfunction boxsum3_sgf_h_8bpc_lasx
1769*c0909341SAndroid Build Coastguard Worker    addi.d         a0,       a0,        (REST_UNIT_STRIDE<<2)+12  // AA
1770*c0909341SAndroid Build Coastguard Worker    //addi.d        a0,       a0,        12   // AA
1771*c0909341SAndroid Build Coastguard Worker    addi.d         a1,       a1,        (REST_UNIT_STRIDE<<1)+6   // BB
1772*c0909341SAndroid Build Coastguard Worker    //addi.d        a1,       a1,        6    // BB
1773*c0909341SAndroid Build Coastguard Worker    la.local       t8,       dav1d_sgr_x_by_x
1774*c0909341SAndroid Build Coastguard Worker    li.w           t6,       455
1775*c0909341SAndroid Build Coastguard Worker    xvreplgr2vr.w  xr20,     t6
1776*c0909341SAndroid Build Coastguard Worker    li.w           t6,       255
1777*c0909341SAndroid Build Coastguard Worker    xvreplgr2vr.w  xr22,     t6
1778*c0909341SAndroid Build Coastguard Worker    xvaddi.wu      xr21,     xr22,      1  // 256
1779*c0909341SAndroid Build Coastguard Worker    xvreplgr2vr.w  xr6,      a4
1780*c0909341SAndroid Build Coastguard Worker    xvldi          xr19,     0x809
1781*c0909341SAndroid Build Coastguard Worker    addi.w         a2,       a2,        2  // w + 2
1782*c0909341SAndroid Build Coastguard Worker    addi.w         a3,       a3,        2  // h + 2
1783*c0909341SAndroid Build Coastguard Worker
1784*c0909341SAndroid Build Coastguard Worker.LBS3SGF_H_H_LASX:
1785*c0909341SAndroid Build Coastguard Worker    addi.w         t2,       a2,        0
1786*c0909341SAndroid Build Coastguard Worker    addi.d         t0,       a0,        -4
1787*c0909341SAndroid Build Coastguard Worker    addi.d         t1,       a1,        -2
1788*c0909341SAndroid Build Coastguard Worker
1789*c0909341SAndroid Build Coastguard Worker.LBS3SGF_H_W_LASX:
1790*c0909341SAndroid Build Coastguard Worker    addi.w         t2,       t2,        -16
1791*c0909341SAndroid Build Coastguard Worker    xvld           xr0,      t0,        0   // AA[i]
1792*c0909341SAndroid Build Coastguard Worker    xvld           xr1,      t0,        32
1793*c0909341SAndroid Build Coastguard Worker    xvld           xr2,      t1,        0   // BB[i]
1794*c0909341SAndroid Build Coastguard Worker
1795*c0909341SAndroid Build Coastguard Worker    xvmul.w        xr4,      xr0,       xr19 // a * n
1796*c0909341SAndroid Build Coastguard Worker    xvmul.w        xr5,      xr1,       xr19
1797*c0909341SAndroid Build Coastguard Worker    vext2xv.w.h    xr9,      xr2
1798*c0909341SAndroid Build Coastguard Worker    xvpermi.q      xr10,     xr2,       0b00000001
1799*c0909341SAndroid Build Coastguard Worker    vext2xv.w.h    xr10,     xr10
1800*c0909341SAndroid Build Coastguard Worker    xvmsub.w       xr4,      xr9,       xr9  // p
1801*c0909341SAndroid Build Coastguard Worker    xvmsub.w       xr5,      xr10,      xr10
1802*c0909341SAndroid Build Coastguard Worker    xvmaxi.w       xr4,      xr4,       0
1803*c0909341SAndroid Build Coastguard Worker    xvmaxi.w       xr5,      xr5,       0
1804*c0909341SAndroid Build Coastguard Worker    xvmul.w        xr4,      xr4,       xr6  // p * s
1805*c0909341SAndroid Build Coastguard Worker    xvmul.w        xr5,      xr5,       xr6
1806*c0909341SAndroid Build Coastguard Worker    xvsrlri.w      xr4,      xr4,       20
1807*c0909341SAndroid Build Coastguard Worker    xvsrlri.w      xr5,      xr5,       20
1808*c0909341SAndroid Build Coastguard Worker    xvmin.w        xr4,      xr4,       xr22
1809*c0909341SAndroid Build Coastguard Worker    xvmin.w        xr5,      xr5,       xr22
1810*c0909341SAndroid Build Coastguard Worker
1811*c0909341SAndroid Build Coastguard Worker    vpickve2gr.w   t6,       vr4,       0
1812*c0909341SAndroid Build Coastguard Worker    ldx.bu         t7,       t8,        t6
1813*c0909341SAndroid Build Coastguard Worker    vinsgr2vr.w    vr7,      t7,        0
1814*c0909341SAndroid Build Coastguard Worker    vpickve2gr.w   t6,       vr4,       1
1815*c0909341SAndroid Build Coastguard Worker    ldx.bu         t7,       t8,        t6
1816*c0909341SAndroid Build Coastguard Worker    vinsgr2vr.w    vr7,      t7,        1
1817*c0909341SAndroid Build Coastguard Worker    vpickve2gr.w   t6,       vr4,       2
1818*c0909341SAndroid Build Coastguard Worker    ldx.bu         t7,       t8,        t6
1819*c0909341SAndroid Build Coastguard Worker    vinsgr2vr.w    vr7,      t7,        2
1820*c0909341SAndroid Build Coastguard Worker    vpickve2gr.w   t6,       vr4,       3
1821*c0909341SAndroid Build Coastguard Worker    ldx.bu         t7,       t8,        t6
1822*c0909341SAndroid Build Coastguard Worker    vinsgr2vr.w    vr7,      t7,        3
1823*c0909341SAndroid Build Coastguard Worker
1824*c0909341SAndroid Build Coastguard Worker    xvpickve2gr.w  t6,       xr4,       4
1825*c0909341SAndroid Build Coastguard Worker    ldx.bu         t7,       t8,        t6
1826*c0909341SAndroid Build Coastguard Worker    xvinsgr2vr.w   xr7,      t7,        4
1827*c0909341SAndroid Build Coastguard Worker    xvpickve2gr.w  t6,       xr4,       5
1828*c0909341SAndroid Build Coastguard Worker    ldx.bu         t7,       t8,        t6
1829*c0909341SAndroid Build Coastguard Worker    xvinsgr2vr.w   xr7,      t7,        5
1830*c0909341SAndroid Build Coastguard Worker    xvpickve2gr.w  t6,       xr4,       6
1831*c0909341SAndroid Build Coastguard Worker    ldx.bu         t7,       t8,        t6
1832*c0909341SAndroid Build Coastguard Worker    xvinsgr2vr.w   xr7,      t7,        6
1833*c0909341SAndroid Build Coastguard Worker    xvpickve2gr.w  t6,       xr4,       7
1834*c0909341SAndroid Build Coastguard Worker    ldx.bu         t7,       t8,        t6
1835*c0909341SAndroid Build Coastguard Worker    xvinsgr2vr.w   xr7,      t7,        7     // x
1836*c0909341SAndroid Build Coastguard Worker
1837*c0909341SAndroid Build Coastguard Worker    vpickve2gr.w   t6,       vr5,       0
1838*c0909341SAndroid Build Coastguard Worker    ldx.bu         t7,       t8,        t6
1839*c0909341SAndroid Build Coastguard Worker    vinsgr2vr.w    vr8,      t7,        0
1840*c0909341SAndroid Build Coastguard Worker    vpickve2gr.w   t6,       vr5,       1
1841*c0909341SAndroid Build Coastguard Worker    ldx.bu         t7,       t8,        t6
1842*c0909341SAndroid Build Coastguard Worker    vinsgr2vr.w    vr8,      t7,        1
1843*c0909341SAndroid Build Coastguard Worker    vpickve2gr.w   t6,       vr5,       2
1844*c0909341SAndroid Build Coastguard Worker    ldx.bu         t7,       t8,        t6
1845*c0909341SAndroid Build Coastguard Worker    vinsgr2vr.w    vr8,      t7,        2
1846*c0909341SAndroid Build Coastguard Worker    vpickve2gr.w   t6,       vr5,       3
1847*c0909341SAndroid Build Coastguard Worker    ldx.bu         t7,       t8,        t6
1848*c0909341SAndroid Build Coastguard Worker    vinsgr2vr.w    vr8,      t7,        3
1849*c0909341SAndroid Build Coastguard Worker
1850*c0909341SAndroid Build Coastguard Worker    xvpickve2gr.w  t6,       xr5,       4
1851*c0909341SAndroid Build Coastguard Worker    ldx.bu         t7,       t8,        t6
1852*c0909341SAndroid Build Coastguard Worker    xvinsgr2vr.w   xr8,      t7,        4
1853*c0909341SAndroid Build Coastguard Worker    xvpickve2gr.w  t6,       xr5,       5
1854*c0909341SAndroid Build Coastguard Worker    ldx.bu         t7,       t8,        t6
1855*c0909341SAndroid Build Coastguard Worker    xvinsgr2vr.w   xr8,      t7,        5
1856*c0909341SAndroid Build Coastguard Worker    xvpickve2gr.w  t6,       xr5,       6
1857*c0909341SAndroid Build Coastguard Worker    ldx.bu         t7,       t8,        t6
1858*c0909341SAndroid Build Coastguard Worker    xvinsgr2vr.w   xr8,      t7,        6
1859*c0909341SAndroid Build Coastguard Worker    xvpickve2gr.w  t6,       xr5,       7
1860*c0909341SAndroid Build Coastguard Worker    ldx.bu         t7,       t8,        t6
1861*c0909341SAndroid Build Coastguard Worker    xvinsgr2vr.w   xr8,      t7,        7     // x
1862*c0909341SAndroid Build Coastguard Worker
1863*c0909341SAndroid Build Coastguard Worker    xvmul.w        xr9,      xr7,       xr9   // x * BB[i]
1864*c0909341SAndroid Build Coastguard Worker    xvmul.w        xr10,     xr8,       xr10
1865*c0909341SAndroid Build Coastguard Worker    xvmul.w        xr9,      xr9,       xr20  // x * BB[i] * sgr_one_by_x
1866*c0909341SAndroid Build Coastguard Worker    xvmul.w        xr10,     xr10,      xr20
1867*c0909341SAndroid Build Coastguard Worker    xvsrlri.w      xr9,      xr9,       12
1868*c0909341SAndroid Build Coastguard Worker    xvsrlri.w      xr10,     xr10,      12
1869*c0909341SAndroid Build Coastguard Worker    xvsub.w        xr7,      xr21,      xr7
1870*c0909341SAndroid Build Coastguard Worker    xvsub.w        xr8,      xr21,      xr8
1871*c0909341SAndroid Build Coastguard Worker    xvpickev.h     xr12,     xr8,       xr7
1872*c0909341SAndroid Build Coastguard Worker    xvpermi.d      xr11,     xr12,      0b11011000
1873*c0909341SAndroid Build Coastguard Worker
1874*c0909341SAndroid Build Coastguard Worker    xvst           xr9,      t0,        0
1875*c0909341SAndroid Build Coastguard Worker    xvst           xr10,     t0,        32
1876*c0909341SAndroid Build Coastguard Worker    xvst           xr11,     t1,        0
1877*c0909341SAndroid Build Coastguard Worker    addi.d         t0,       t0,        64
1878*c0909341SAndroid Build Coastguard Worker    addi.d         t1,       t1,        32
1879*c0909341SAndroid Build Coastguard Worker    blt            zero,     t2,        .LBS3SGF_H_W_LASX
1880*c0909341SAndroid Build Coastguard Worker
1881*c0909341SAndroid Build Coastguard Worker    addi.d         a0,       a0,        REST_UNIT_STRIDE<<2
1882*c0909341SAndroid Build Coastguard Worker    addi.d         a1,       a1,        REST_UNIT_STRIDE<<1
1883*c0909341SAndroid Build Coastguard Worker    addi.w         a3,       a3,        -1
1884*c0909341SAndroid Build Coastguard Worker    bnez           a3,       .LBS3SGF_H_H_LASX
1885*c0909341SAndroid Build Coastguard Workerendfunc
1886*c0909341SAndroid Build Coastguard Worker
1887*c0909341SAndroid Build Coastguard Workerfunction boxsum3_h_8bpc_lasx
1888*c0909341SAndroid Build Coastguard Worker    addi.d         a2,      a2,      REST_UNIT_STRIDE
1889*c0909341SAndroid Build Coastguard Worker    li.w           t0,      1
1890*c0909341SAndroid Build Coastguard Worker    addi.w         a3,      a3,      -2
1891*c0909341SAndroid Build Coastguard Worker    addi.w         a4,      a4,      -4
1892*c0909341SAndroid Build Coastguard Worker.LBS3_H_H_LASX:
1893*c0909341SAndroid Build Coastguard Worker    alsl.d         t1,      t0,      a1,    1     // sum_v    *sum_v = sum + x
1894*c0909341SAndroid Build Coastguard Worker    alsl.d         t2,      t0,      a0,    2     // sumsq_v  *sumsq_v = sumsq + x
1895*c0909341SAndroid Build Coastguard Worker    add.d          t3,      t0,      a2           // s
1896*c0909341SAndroid Build Coastguard Worker    addi.w         t5,      a3,      0
1897*c0909341SAndroid Build Coastguard Worker
1898*c0909341SAndroid Build Coastguard Worker.LBS3_H_W_LASX:
1899*c0909341SAndroid Build Coastguard Worker    xvld           xr0,     t3,      0
1900*c0909341SAndroid Build Coastguard Worker    xvld           xr1,     t3,      REST_UNIT_STRIDE
1901*c0909341SAndroid Build Coastguard Worker    xvld           xr2,     t3,      (REST_UNIT_STRIDE<<1)
1902*c0909341SAndroid Build Coastguard Worker
1903*c0909341SAndroid Build Coastguard Worker    xvilvl.b       xr3,     xr1,     xr0
1904*c0909341SAndroid Build Coastguard Worker    xvhaddw.hu.bu  xr4,     xr3,     xr3
1905*c0909341SAndroid Build Coastguard Worker    xvilvh.b       xr5,     xr1,     xr0
1906*c0909341SAndroid Build Coastguard Worker    xvhaddw.hu.bu  xr6,     xr5,     xr5
1907*c0909341SAndroid Build Coastguard Worker    xvsllwil.hu.bu xr7,     xr2,     0
1908*c0909341SAndroid Build Coastguard Worker    xvexth.hu.bu   xr8,     xr2
1909*c0909341SAndroid Build Coastguard Worker    // sum_v
1910*c0909341SAndroid Build Coastguard Worker    xvadd.h        xr4,     xr4,     xr7  // 0 2
1911*c0909341SAndroid Build Coastguard Worker    xvadd.h        xr6,     xr6,     xr8  // 1 3
1912*c0909341SAndroid Build Coastguard Worker    xvor.v         xr9,     xr4,     xr4
1913*c0909341SAndroid Build Coastguard Worker    xvpermi.q      xr4,     xr6,     0b00000010
1914*c0909341SAndroid Build Coastguard Worker    xvpermi.q      xr6,     xr9,     0b00110001
1915*c0909341SAndroid Build Coastguard Worker    xvst           xr4,     t1,      REST_UNIT_STRIDE<<1
1916*c0909341SAndroid Build Coastguard Worker    xvst           xr6,     t1,      (REST_UNIT_STRIDE<<1)+32
1917*c0909341SAndroid Build Coastguard Worker    addi.d         t1,      t1,      64
1918*c0909341SAndroid Build Coastguard Worker    // sumsq
1919*c0909341SAndroid Build Coastguard Worker    xvmulwev.h.bu  xr9,     xr3,     xr3
1920*c0909341SAndroid Build Coastguard Worker    xvmulwod.h.bu  xr10,    xr3,     xr3
1921*c0909341SAndroid Build Coastguard Worker    xvmulwev.h.bu  xr11,    xr5,     xr5
1922*c0909341SAndroid Build Coastguard Worker    xvmulwod.h.bu  xr12,    xr5,     xr5
1923*c0909341SAndroid Build Coastguard Worker    xvaddwev.w.hu  xr13,    xr10,    xr9
1924*c0909341SAndroid Build Coastguard Worker    xvaddwod.w.hu  xr14,    xr10,    xr9
1925*c0909341SAndroid Build Coastguard Worker    xvaddwev.w.hu  xr15,    xr12,    xr11
1926*c0909341SAndroid Build Coastguard Worker    xvaddwod.w.hu  xr16,    xr12,    xr11
1927*c0909341SAndroid Build Coastguard Worker    xvmaddwev.w.hu xr13,    xr7,     xr7
1928*c0909341SAndroid Build Coastguard Worker    xvmaddwod.w.hu xr14,    xr7,     xr7
1929*c0909341SAndroid Build Coastguard Worker    xvmaddwev.w.hu xr15,    xr8,     xr8
1930*c0909341SAndroid Build Coastguard Worker    xvmaddwod.w.hu xr16,    xr8,     xr8
1931*c0909341SAndroid Build Coastguard Worker    xvilvl.w       xr9,     xr14,    xr13
1932*c0909341SAndroid Build Coastguard Worker    xvilvh.w       xr10,    xr14,    xr13
1933*c0909341SAndroid Build Coastguard Worker    xvilvl.w       xr11,    xr16,    xr15
1934*c0909341SAndroid Build Coastguard Worker    xvilvh.w       xr12,    xr16,    xr15
1935*c0909341SAndroid Build Coastguard Worker    xvor.v         xr7,     xr9,     xr9
1936*c0909341SAndroid Build Coastguard Worker    xvor.v         xr8,     xr11,    xr11
1937*c0909341SAndroid Build Coastguard Worker    xvpermi.q      xr9,     xr10,    0b00000010
1938*c0909341SAndroid Build Coastguard Worker    xvpermi.q      xr10,    xr7,     0b00110001
1939*c0909341SAndroid Build Coastguard Worker    xvpermi.q      xr11,    xr12,    0b00000010
1940*c0909341SAndroid Build Coastguard Worker    xvpermi.q      xr12,    xr8,     0b00110001
1941*c0909341SAndroid Build Coastguard Worker    xvst           xr9,     t2,      REST_UNIT_STRIDE<<2
1942*c0909341SAndroid Build Coastguard Worker    xvst           xr11,    t2,      (REST_UNIT_STRIDE<<2)+32
1943*c0909341SAndroid Build Coastguard Worker    xvst           xr10,    t2,      (REST_UNIT_STRIDE<<2)+64
1944*c0909341SAndroid Build Coastguard Worker    xvst           xr12,    t2,      (REST_UNIT_STRIDE<<2)+96
1945*c0909341SAndroid Build Coastguard Worker
1946*c0909341SAndroid Build Coastguard Worker    addi.d         t2,      t2,      128
1947*c0909341SAndroid Build Coastguard Worker    addi.w         t5,      t5,      -32
1948*c0909341SAndroid Build Coastguard Worker    addi.d         t3,      t3,      32
1949*c0909341SAndroid Build Coastguard Worker    blt            zero,    t5,      .LBS3_H_W_LASX
1950*c0909341SAndroid Build Coastguard Worker
1951*c0909341SAndroid Build Coastguard Worker    addi.d         a0,      a0,      REST_UNIT_STRIDE<<2
1952*c0909341SAndroid Build Coastguard Worker    addi.d         a1,      a1,      REST_UNIT_STRIDE<<1
1953*c0909341SAndroid Build Coastguard Worker    addi.d         a2,      a2,      REST_UNIT_STRIDE
1954*c0909341SAndroid Build Coastguard Worker    addi.d         a4,      a4,      -1
1955*c0909341SAndroid Build Coastguard Worker    blt            zero,    a4,      .LBS3_H_H_LASX
1956*c0909341SAndroid Build Coastguard Workerendfunc
1957