1*c0909341SAndroid Build Coastguard Worker/* 2*c0909341SAndroid Build Coastguard Worker * Copyright © 2023, VideoLAN and dav1d authors 3*c0909341SAndroid Build Coastguard Worker * Copyright © 2023, Loongson Technology Corporation Limited 4*c0909341SAndroid Build Coastguard Worker * All rights reserved. 5*c0909341SAndroid Build Coastguard Worker * 6*c0909341SAndroid Build Coastguard Worker * Redistribution and use in source and binary forms, with or without 7*c0909341SAndroid Build Coastguard Worker * modification, are permitted provided that the following conditions are met: 8*c0909341SAndroid Build Coastguard Worker * 9*c0909341SAndroid Build Coastguard Worker * 1. Redistributions of source code must retain the above copyright notice, this 10*c0909341SAndroid Build Coastguard Worker * list of conditions and the following disclaimer. 11*c0909341SAndroid Build Coastguard Worker * 12*c0909341SAndroid Build Coastguard Worker * 2. Redistributions in binary form must reproduce the above copyright notice, 13*c0909341SAndroid Build Coastguard Worker * this list of conditions and the following disclaimer in the documentation 14*c0909341SAndroid Build Coastguard Worker * and/or other materials provided with the distribution. 15*c0909341SAndroid Build Coastguard Worker * 16*c0909341SAndroid Build Coastguard Worker * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND 17*c0909341SAndroid Build Coastguard Worker * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED 18*c0909341SAndroid Build Coastguard Worker * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 19*c0909341SAndroid Build Coastguard Worker * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR 20*c0909341SAndroid Build Coastguard Worker * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES 21*c0909341SAndroid Build Coastguard Worker * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 22*c0909341SAndroid Build Coastguard Worker * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND 23*c0909341SAndroid Build Coastguard Worker * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 24*c0909341SAndroid Build Coastguard Worker * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 25*c0909341SAndroid Build Coastguard Worker * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 26*c0909341SAndroid Build Coastguard Worker */ 27*c0909341SAndroid Build Coastguard Worker 28*c0909341SAndroid Build Coastguard Worker#include "src/loongarch/loongson_asm.S" 29*c0909341SAndroid Build Coastguard Worker 30*c0909341SAndroid Build Coastguard Worker#define REST_UNIT_STRIDE (400) 31*c0909341SAndroid Build Coastguard Worker 32*c0909341SAndroid Build Coastguard Worker.macro MADD_HU_BU in0, in1, out0, out1 33*c0909341SAndroid Build Coastguard Worker vsllwil.hu.bu vr12, \in0, 0 34*c0909341SAndroid Build Coastguard Worker vexth.hu.bu vr13, \in0 35*c0909341SAndroid Build Coastguard Worker vmadd.h \out0, vr12, \in1 36*c0909341SAndroid Build Coastguard Worker vmadd.h \out1, vr13, \in1 37*c0909341SAndroid Build Coastguard Worker.endm 38*c0909341SAndroid Build Coastguard Worker 39*c0909341SAndroid Build Coastguard Workerconst wiener_shuf 40*c0909341SAndroid Build Coastguard Worker.byte 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18 41*c0909341SAndroid Build Coastguard Workerendconst 42*c0909341SAndroid Build Coastguard Worker 43*c0909341SAndroid Build Coastguard Worker/* 44*c0909341SAndroid Build Coastguard Workervoid wiener_filter_h_lsx(int32_t *hor_ptr, 45*c0909341SAndroid Build Coastguard Worker uint8_t *tmp_ptr, 46*c0909341SAndroid Build Coastguard Worker const int16_t filterh[8], 47*c0909341SAndroid Build Coastguard Worker const int w, const int h) 48*c0909341SAndroid Build Coastguard Worker*/ 49*c0909341SAndroid Build Coastguard Workerfunction wiener_filter_h_8bpc_lsx 50*c0909341SAndroid Build Coastguard Worker addi.d sp, sp, -40 51*c0909341SAndroid Build Coastguard Worker fst.d f24, sp, 0 52*c0909341SAndroid Build Coastguard Worker fst.d f25, sp, 8 53*c0909341SAndroid Build Coastguard Worker fst.d f26, sp, 16 54*c0909341SAndroid Build Coastguard Worker fst.d f27, sp, 24 55*c0909341SAndroid Build Coastguard Worker fst.d f28, sp, 32 56*c0909341SAndroid Build Coastguard Worker li.w t7, 1<<14 // clip_limit 57*c0909341SAndroid Build Coastguard Worker 58*c0909341SAndroid Build Coastguard Worker la.local t1, wiener_shuf 59*c0909341SAndroid Build Coastguard Worker vld vr4, t1, 0 60*c0909341SAndroid Build Coastguard Worker vld vr14, a2, 0 // filter[0][k] 61*c0909341SAndroid Build Coastguard Worker vreplvei.h vr21, vr14, 0 62*c0909341SAndroid Build Coastguard Worker vreplvei.h vr22, vr14, 1 63*c0909341SAndroid Build Coastguard Worker vreplvei.h vr23, vr14, 2 64*c0909341SAndroid Build Coastguard Worker vreplvei.h vr24, vr14, 3 65*c0909341SAndroid Build Coastguard Worker vreplvei.h vr25, vr14, 4 66*c0909341SAndroid Build Coastguard Worker vreplvei.h vr26, vr14, 5 67*c0909341SAndroid Build Coastguard Worker vreplvei.h vr27, vr14, 6 68*c0909341SAndroid Build Coastguard Worker vreplgr2vr.w vr0, t7 69*c0909341SAndroid Build Coastguard Worker 70*c0909341SAndroid Build Coastguard Worker.WIENER_FILTER_H_H: 71*c0909341SAndroid Build Coastguard Worker addi.w a4, a4, -1 // h 72*c0909341SAndroid Build Coastguard Worker addi.w t0, a3, 0 // w 73*c0909341SAndroid Build Coastguard Worker addi.d t1, a1, 0 // tmp_ptr 74*c0909341SAndroid Build Coastguard Worker addi.d t2, a0, 0 // hor_ptr 75*c0909341SAndroid Build Coastguard Worker 76*c0909341SAndroid Build Coastguard Worker.WIENER_FILTER_H_W: 77*c0909341SAndroid Build Coastguard Worker addi.w t0, t0, -16 78*c0909341SAndroid Build Coastguard Worker vld vr5, t1, 0 79*c0909341SAndroid Build Coastguard Worker vld vr13, t1, 16 80*c0909341SAndroid Build Coastguard Worker 81*c0909341SAndroid Build Coastguard Worker vsubi.bu vr14, vr4, 2 82*c0909341SAndroid Build Coastguard Worker vsubi.bu vr15, vr4, 1 83*c0909341SAndroid Build Coastguard Worker vshuf.b vr6, vr13, vr5, vr14 // 1 ... 8, 9 ... 16 84*c0909341SAndroid Build Coastguard Worker vshuf.b vr7, vr13, vr5, vr15 // 2 ... 9, 10 ... 17 85*c0909341SAndroid Build Coastguard Worker vshuf.b vr8, vr13, vr5, vr4 // 3 ... 10, 11 ... 18 86*c0909341SAndroid Build Coastguard Worker vaddi.bu vr14, vr4, 1 87*c0909341SAndroid Build Coastguard Worker vaddi.bu vr15, vr4, 2 88*c0909341SAndroid Build Coastguard Worker vshuf.b vr9, vr13, vr5, vr14 // 4 ... 11, 12 ... 19 89*c0909341SAndroid Build Coastguard Worker vshuf.b vr10, vr13, vr5, vr15 // 5 ... 12, 13 ... 20 90*c0909341SAndroid Build Coastguard Worker vaddi.bu vr14, vr4, 3 91*c0909341SAndroid Build Coastguard Worker vshuf.b vr11, vr13, vr5, vr14 // 6 ... 13, 14 ... 21 92*c0909341SAndroid Build Coastguard Worker 93*c0909341SAndroid Build Coastguard Worker vsllwil.hu.bu vr15, vr8, 0 // 3 4 5 6 7 8 9 10 94*c0909341SAndroid Build Coastguard Worker vexth.hu.bu vr16, vr8 // 11 12 13 14 15 16 17 18 95*c0909341SAndroid Build Coastguard Worker vsllwil.wu.hu vr17, vr15, 7 // 3 4 5 6 96*c0909341SAndroid Build Coastguard Worker vexth.wu.hu vr18, vr15 // 7 8 9 10 97*c0909341SAndroid Build Coastguard Worker vsllwil.wu.hu vr19, vr16, 7 // 11 12 13 14 98*c0909341SAndroid Build Coastguard Worker vexth.wu.hu vr20, vr16 // 15 16 17 18 99*c0909341SAndroid Build Coastguard Worker vslli.w vr18, vr18, 7 100*c0909341SAndroid Build Coastguard Worker vslli.w vr20, vr20, 7 101*c0909341SAndroid Build Coastguard Worker vxor.v vr15, vr15, vr15 102*c0909341SAndroid Build Coastguard Worker vxor.v vr14, vr14, vr14 103*c0909341SAndroid Build Coastguard Worker 104*c0909341SAndroid Build Coastguard Worker MADD_HU_BU vr5, vr21, vr14, vr15 105*c0909341SAndroid Build Coastguard Worker MADD_HU_BU vr6, vr22, vr14, vr15 106*c0909341SAndroid Build Coastguard Worker MADD_HU_BU vr7, vr23, vr14, vr15 107*c0909341SAndroid Build Coastguard Worker MADD_HU_BU vr8, vr24, vr14, vr15 108*c0909341SAndroid Build Coastguard Worker MADD_HU_BU vr9, vr25, vr14, vr15 109*c0909341SAndroid Build Coastguard Worker MADD_HU_BU vr10, vr26, vr14, vr15 110*c0909341SAndroid Build Coastguard Worker MADD_HU_BU vr11, vr27, vr14, vr15 111*c0909341SAndroid Build Coastguard Worker 112*c0909341SAndroid Build Coastguard Worker vsllwil.w.h vr5, vr14, 0 // 0 1 2 3 113*c0909341SAndroid Build Coastguard Worker vexth.w.h vr6, vr14 // 4 5 6 7 114*c0909341SAndroid Build Coastguard Worker vsllwil.w.h vr7, vr15, 0 // 8 9 10 11 115*c0909341SAndroid Build Coastguard Worker vexth.w.h vr8, vr15 // 12 13 14 15 116*c0909341SAndroid Build Coastguard Worker vadd.w vr17, vr17, vr5 117*c0909341SAndroid Build Coastguard Worker vadd.w vr18, vr18, vr6 118*c0909341SAndroid Build Coastguard Worker vadd.w vr19, vr19, vr7 119*c0909341SAndroid Build Coastguard Worker vadd.w vr20, vr20, vr8 120*c0909341SAndroid Build Coastguard Worker vadd.w vr17, vr17, vr0 121*c0909341SAndroid Build Coastguard Worker vadd.w vr18, vr18, vr0 122*c0909341SAndroid Build Coastguard Worker vadd.w vr19, vr19, vr0 123*c0909341SAndroid Build Coastguard Worker vadd.w vr20, vr20, vr0 124*c0909341SAndroid Build Coastguard Worker 125*c0909341SAndroid Build Coastguard Worker vsrli.w vr1, vr0, 1 126*c0909341SAndroid Build Coastguard Worker vsubi.wu vr1, vr1, 1 127*c0909341SAndroid Build Coastguard Worker vxor.v vr3, vr3, vr3 128*c0909341SAndroid Build Coastguard Worker vsrari.w vr17, vr17, 3 129*c0909341SAndroid Build Coastguard Worker vsrari.w vr18, vr18, 3 130*c0909341SAndroid Build Coastguard Worker vsrari.w vr19, vr19, 3 131*c0909341SAndroid Build Coastguard Worker vsrari.w vr20, vr20, 3 132*c0909341SAndroid Build Coastguard Worker vclip.w vr17, vr17, vr3, vr1 133*c0909341SAndroid Build Coastguard Worker vclip.w vr18, vr18, vr3, vr1 134*c0909341SAndroid Build Coastguard Worker vclip.w vr19, vr19, vr3, vr1 135*c0909341SAndroid Build Coastguard Worker vclip.w vr20, vr20, vr3, vr1 136*c0909341SAndroid Build Coastguard Worker 137*c0909341SAndroid Build Coastguard Worker vst vr17, t2, 0 138*c0909341SAndroid Build Coastguard Worker vst vr18, t2, 16 139*c0909341SAndroid Build Coastguard Worker vst vr19, t2, 32 140*c0909341SAndroid Build Coastguard Worker vst vr20, t2, 48 141*c0909341SAndroid Build Coastguard Worker addi.d t1, t1, 16 142*c0909341SAndroid Build Coastguard Worker addi.d t2, t2, 64 143*c0909341SAndroid Build Coastguard Worker blt zero, t0, .WIENER_FILTER_H_W 144*c0909341SAndroid Build Coastguard Worker 145*c0909341SAndroid Build Coastguard Worker addi.d a1, a1, REST_UNIT_STRIDE 146*c0909341SAndroid Build Coastguard Worker addi.d a0, a0, (REST_UNIT_STRIDE << 2) 147*c0909341SAndroid Build Coastguard Worker bnez a4, .WIENER_FILTER_H_H 148*c0909341SAndroid Build Coastguard Worker 149*c0909341SAndroid Build Coastguard Worker fld.d f24, sp, 0 150*c0909341SAndroid Build Coastguard Worker fld.d f25, sp, 8 151*c0909341SAndroid Build Coastguard Worker fld.d f26, sp, 16 152*c0909341SAndroid Build Coastguard Worker fld.d f27, sp, 24 153*c0909341SAndroid Build Coastguard Worker fld.d f28, sp, 32 154*c0909341SAndroid Build Coastguard Worker addi.d sp, sp, 40 155*c0909341SAndroid Build Coastguard Workerendfunc 156*c0909341SAndroid Build Coastguard Worker 157*c0909341SAndroid Build Coastguard Worker.macro APPLY_FILTER in0, in1, in2 158*c0909341SAndroid Build Coastguard Worker alsl.d t7, \in0, \in1, 2 159*c0909341SAndroid Build Coastguard Worker vld vr10, t7, 0 160*c0909341SAndroid Build Coastguard Worker vld vr11, t7, 16 161*c0909341SAndroid Build Coastguard Worker vld vr12, t7, 32 162*c0909341SAndroid Build Coastguard Worker vld vr13, t7, 48 163*c0909341SAndroid Build Coastguard Worker vmadd.w vr14, vr10, \in2 164*c0909341SAndroid Build Coastguard Worker vmadd.w vr15, vr11, \in2 165*c0909341SAndroid Build Coastguard Worker vmadd.w vr16, vr12, \in2 166*c0909341SAndroid Build Coastguard Worker vmadd.w vr17, vr13, \in2 167*c0909341SAndroid Build Coastguard Worker.endm 168*c0909341SAndroid Build Coastguard Worker 169*c0909341SAndroid Build Coastguard Worker.macro wiener_filter_v_8bpc_core_lsx 170*c0909341SAndroid Build Coastguard Worker vreplgr2vr.w vr14, t6 171*c0909341SAndroid Build Coastguard Worker vreplgr2vr.w vr15, t6 172*c0909341SAndroid Build Coastguard Worker vreplgr2vr.w vr16, t6 173*c0909341SAndroid Build Coastguard Worker vreplgr2vr.w vr17, t6 174*c0909341SAndroid Build Coastguard Worker 175*c0909341SAndroid Build Coastguard Worker addi.w t7, t2, 0 // j + index k 176*c0909341SAndroid Build Coastguard Worker mul.w t7, t7, t8 // (j + index) * REST_UNIT_STRIDE 177*c0909341SAndroid Build Coastguard Worker add.w t7, t7, t4 // (j + index) * REST_UNIT_STRIDE + i 178*c0909341SAndroid Build Coastguard Worker 179*c0909341SAndroid Build Coastguard Worker APPLY_FILTER t7, a2, vr2 180*c0909341SAndroid Build Coastguard Worker APPLY_FILTER t8, t7, vr3 181*c0909341SAndroid Build Coastguard Worker APPLY_FILTER t8, t7, vr4 182*c0909341SAndroid Build Coastguard Worker APPLY_FILTER t8, t7, vr5 183*c0909341SAndroid Build Coastguard Worker APPLY_FILTER t8, t7, vr6 184*c0909341SAndroid Build Coastguard Worker APPLY_FILTER t8, t7, vr7 185*c0909341SAndroid Build Coastguard Worker APPLY_FILTER t8, t7, vr8 186*c0909341SAndroid Build Coastguard Worker vssrarni.hu.w vr15, vr14, 11 187*c0909341SAndroid Build Coastguard Worker vssrarni.hu.w vr17, vr16, 11 188*c0909341SAndroid Build Coastguard Worker vssrlni.bu.h vr17, vr15, 0 189*c0909341SAndroid Build Coastguard Worker.endm 190*c0909341SAndroid Build Coastguard Worker 191*c0909341SAndroid Build Coastguard Worker/* 192*c0909341SAndroid Build Coastguard Workervoid wiener_filter_v_lsx(uint8_t *p, 193*c0909341SAndroid Build Coastguard Worker const ptrdiff_t p_stride, 194*c0909341SAndroid Build Coastguard Worker const int32_t *hor, 195*c0909341SAndroid Build Coastguard Worker const int16_t filterv[8], 196*c0909341SAndroid Build Coastguard Worker const int w, const int h) 197*c0909341SAndroid Build Coastguard Worker*/ 198*c0909341SAndroid Build Coastguard Workerfunction wiener_filter_v_8bpc_lsx 199*c0909341SAndroid Build Coastguard Worker li.w t6, -(1 << 18) 200*c0909341SAndroid Build Coastguard Worker 201*c0909341SAndroid Build Coastguard Worker li.w t8, REST_UNIT_STRIDE 202*c0909341SAndroid Build Coastguard Worker ld.h t0, a3, 0 203*c0909341SAndroid Build Coastguard Worker ld.h t1, a3, 2 204*c0909341SAndroid Build Coastguard Worker vreplgr2vr.w vr2, t0 205*c0909341SAndroid Build Coastguard Worker vreplgr2vr.w vr3, t1 206*c0909341SAndroid Build Coastguard Worker ld.h t0, a3, 4 207*c0909341SAndroid Build Coastguard Worker ld.h t1, a3, 6 208*c0909341SAndroid Build Coastguard Worker vreplgr2vr.w vr4, t0 209*c0909341SAndroid Build Coastguard Worker vreplgr2vr.w vr5, t1 210*c0909341SAndroid Build Coastguard Worker ld.h t0, a3, 8 211*c0909341SAndroid Build Coastguard Worker ld.h t1, a3, 10 212*c0909341SAndroid Build Coastguard Worker vreplgr2vr.w vr6, t0 213*c0909341SAndroid Build Coastguard Worker vreplgr2vr.w vr7, t1 214*c0909341SAndroid Build Coastguard Worker ld.h t0, a3, 12 215*c0909341SAndroid Build Coastguard Worker vreplgr2vr.w vr8, t0 216*c0909341SAndroid Build Coastguard Worker 217*c0909341SAndroid Build Coastguard Worker andi t1, a4, 0xf 218*c0909341SAndroid Build Coastguard Worker sub.w t0, a4, t1 // w-w%16 219*c0909341SAndroid Build Coastguard Worker or t2, zero, zero // j 220*c0909341SAndroid Build Coastguard Worker or t4, zero, zero 221*c0909341SAndroid Build Coastguard Worker beqz t0, .WIENER_FILTER_V_W_LT16 222*c0909341SAndroid Build Coastguard Worker 223*c0909341SAndroid Build Coastguard Worker.WIENER_FILTER_V_H: 224*c0909341SAndroid Build Coastguard Worker andi t1, a4, 0xf 225*c0909341SAndroid Build Coastguard Worker add.d t3, zero, a0 // p 226*c0909341SAndroid Build Coastguard Worker or t4, zero, zero // i 227*c0909341SAndroid Build Coastguard Worker 228*c0909341SAndroid Build Coastguard Worker.WIENER_FILTER_V_W: 229*c0909341SAndroid Build Coastguard Worker 230*c0909341SAndroid Build Coastguard Worker wiener_filter_v_8bpc_core_lsx 231*c0909341SAndroid Build Coastguard Worker 232*c0909341SAndroid Build Coastguard Worker mul.w t5, t2, a1 // j * stride 233*c0909341SAndroid Build Coastguard Worker add.w t5, t5, t4 // j * stride + i 234*c0909341SAndroid Build Coastguard Worker add.d t3, a0, t5 235*c0909341SAndroid Build Coastguard Worker addi.w t4, t4, 16 236*c0909341SAndroid Build Coastguard Worker vst vr17, t3, 0 237*c0909341SAndroid Build Coastguard Worker bne t0, t4, .WIENER_FILTER_V_W 238*c0909341SAndroid Build Coastguard Worker 239*c0909341SAndroid Build Coastguard Worker beqz t1, .WIENER_FILTER_V_W_EQ16 240*c0909341SAndroid Build Coastguard Worker 241*c0909341SAndroid Build Coastguard Worker wiener_filter_v_8bpc_core_lsx 242*c0909341SAndroid Build Coastguard Worker 243*c0909341SAndroid Build Coastguard Worker addi.d t3, t3, 16 244*c0909341SAndroid Build Coastguard Worker andi t1, a4, 0xf 245*c0909341SAndroid Build Coastguard Worker 246*c0909341SAndroid Build Coastguard Worker.WIENER_FILTER_V_ST_REM: 247*c0909341SAndroid Build Coastguard Worker vstelm.b vr17, t3, 0, 0 248*c0909341SAndroid Build Coastguard Worker vbsrl.v vr17, vr17, 1 249*c0909341SAndroid Build Coastguard Worker addi.d t3, t3, 1 250*c0909341SAndroid Build Coastguard Worker addi.w t1, t1, -1 251*c0909341SAndroid Build Coastguard Worker bnez t1, .WIENER_FILTER_V_ST_REM 252*c0909341SAndroid Build Coastguard Worker.WIENER_FILTER_V_W_EQ16: 253*c0909341SAndroid Build Coastguard Worker addi.w t2, t2, 1 254*c0909341SAndroid Build Coastguard Worker blt t2, a5, .WIENER_FILTER_V_H 255*c0909341SAndroid Build Coastguard Worker b .WIENER_FILTER_V_END 256*c0909341SAndroid Build Coastguard Worker 257*c0909341SAndroid Build Coastguard Worker.WIENER_FILTER_V_W_LT16: 258*c0909341SAndroid Build Coastguard Worker andi t1, a4, 0xf 259*c0909341SAndroid Build Coastguard Worker add.d t3, zero, a0 260*c0909341SAndroid Build Coastguard Worker 261*c0909341SAndroid Build Coastguard Worker wiener_filter_v_8bpc_core_lsx 262*c0909341SAndroid Build Coastguard Worker 263*c0909341SAndroid Build Coastguard Worker mul.w t5, t2, a1 // j * stride 264*c0909341SAndroid Build Coastguard Worker add.d t3, a0, t5 265*c0909341SAndroid Build Coastguard Worker 266*c0909341SAndroid Build Coastguard Worker.WIENER_FILTER_V_ST_REM_1: 267*c0909341SAndroid Build Coastguard Worker vstelm.b vr17, t3, 0, 0 268*c0909341SAndroid Build Coastguard Worker vbsrl.v vr17, vr17, 1 269*c0909341SAndroid Build Coastguard Worker addi.d t3, t3, 1 270*c0909341SAndroid Build Coastguard Worker addi.w t1, t1, -1 271*c0909341SAndroid Build Coastguard Worker bnez t1, .WIENER_FILTER_V_ST_REM_1 272*c0909341SAndroid Build Coastguard Worker 273*c0909341SAndroid Build Coastguard Worker addi.w t2, t2, 1 274*c0909341SAndroid Build Coastguard Worker blt t2, a5, .WIENER_FILTER_V_W_LT16 275*c0909341SAndroid Build Coastguard Worker 276*c0909341SAndroid Build Coastguard Worker.WIENER_FILTER_V_END: 277*c0909341SAndroid Build Coastguard Workerendfunc 278*c0909341SAndroid Build Coastguard Worker 279*c0909341SAndroid Build Coastguard Worker/* 280*c0909341SAndroid Build Coastguard Workervoid boxsum3_h(int32_t *sumsq, coef *sum, const pixel *src, 281*c0909341SAndroid Build Coastguard Worker const int w, const int h) 282*c0909341SAndroid Build Coastguard Worker*/ 283*c0909341SAndroid Build Coastguard Workerfunction boxsum3_h_8bpc_lsx 284*c0909341SAndroid Build Coastguard Worker addi.d a2, a2, REST_UNIT_STRIDE 285*c0909341SAndroid Build Coastguard Worker li.w t0, 1 286*c0909341SAndroid Build Coastguard Worker addi.w a3, a3, -2 287*c0909341SAndroid Build Coastguard Worker addi.w a4, a4, -4 288*c0909341SAndroid Build Coastguard Worker 289*c0909341SAndroid Build Coastguard Worker.LBS3_H_H: 290*c0909341SAndroid Build Coastguard Worker alsl.d t1, t0, a1, 1 // sum_v *sum_v = sum + x 291*c0909341SAndroid Build Coastguard Worker alsl.d t2, t0, a0, 2 // sumsq_v *sumsq_v = sumsq + x 292*c0909341SAndroid Build Coastguard Worker add.d t3, t0, a2 // s 293*c0909341SAndroid Build Coastguard Worker addi.w t5, a3, 0 294*c0909341SAndroid Build Coastguard Worker.LBS3_H_W: 295*c0909341SAndroid Build Coastguard Worker vld vr0, t3, 0 296*c0909341SAndroid Build Coastguard Worker vld vr1, t3, REST_UNIT_STRIDE 297*c0909341SAndroid Build Coastguard Worker vld vr2, t3, (REST_UNIT_STRIDE<<1) 298*c0909341SAndroid Build Coastguard Worker 299*c0909341SAndroid Build Coastguard Worker vilvl.b vr3, vr1, vr0 300*c0909341SAndroid Build Coastguard Worker vhaddw.hu.bu vr4, vr3, vr3 301*c0909341SAndroid Build Coastguard Worker vilvh.b vr5, vr1, vr0 302*c0909341SAndroid Build Coastguard Worker vhaddw.hu.bu vr6, vr5, vr5 303*c0909341SAndroid Build Coastguard Worker vsllwil.hu.bu vr7, vr2, 0 304*c0909341SAndroid Build Coastguard Worker vexth.hu.bu vr8, vr2 305*c0909341SAndroid Build Coastguard Worker // sum_v 306*c0909341SAndroid Build Coastguard Worker vadd.h vr4, vr4, vr7 307*c0909341SAndroid Build Coastguard Worker vadd.h vr6, vr6, vr8 308*c0909341SAndroid Build Coastguard Worker vst vr4, t1, REST_UNIT_STRIDE<<1 309*c0909341SAndroid Build Coastguard Worker vst vr6, t1, (REST_UNIT_STRIDE<<1)+16 310*c0909341SAndroid Build Coastguard Worker addi.d t1, t1, 32 311*c0909341SAndroid Build Coastguard Worker // sumsq 312*c0909341SAndroid Build Coastguard Worker vmulwev.h.bu vr9, vr3, vr3 313*c0909341SAndroid Build Coastguard Worker vmulwod.h.bu vr10, vr3, vr3 314*c0909341SAndroid Build Coastguard Worker vmulwev.h.bu vr11, vr5, vr5 315*c0909341SAndroid Build Coastguard Worker vmulwod.h.bu vr12, vr5, vr5 316*c0909341SAndroid Build Coastguard Worker vaddwev.w.hu vr13, vr10, vr9 317*c0909341SAndroid Build Coastguard Worker vaddwod.w.hu vr14, vr10, vr9 318*c0909341SAndroid Build Coastguard Worker vaddwev.w.hu vr15, vr12, vr11 319*c0909341SAndroid Build Coastguard Worker vaddwod.w.hu vr16, vr12, vr11 320*c0909341SAndroid Build Coastguard Worker vmaddwev.w.hu vr13, vr7, vr7 321*c0909341SAndroid Build Coastguard Worker vmaddwod.w.hu vr14, vr7, vr7 322*c0909341SAndroid Build Coastguard Worker vmaddwev.w.hu vr15, vr8, vr8 323*c0909341SAndroid Build Coastguard Worker vmaddwod.w.hu vr16, vr8, vr8 324*c0909341SAndroid Build Coastguard Worker vilvl.w vr9, vr14, vr13 325*c0909341SAndroid Build Coastguard Worker vilvh.w vr10, vr14, vr13 326*c0909341SAndroid Build Coastguard Worker vilvl.w vr11, vr16, vr15 327*c0909341SAndroid Build Coastguard Worker vilvh.w vr12, vr16, vr15 328*c0909341SAndroid Build Coastguard Worker vst vr9, t2, REST_UNIT_STRIDE<<2 329*c0909341SAndroid Build Coastguard Worker vst vr10, t2, (REST_UNIT_STRIDE<<2)+16 330*c0909341SAndroid Build Coastguard Worker vst vr11, t2, (REST_UNIT_STRIDE<<2)+32 331*c0909341SAndroid Build Coastguard Worker vst vr12, t2, (REST_UNIT_STRIDE<<2)+48 332*c0909341SAndroid Build Coastguard Worker 333*c0909341SAndroid Build Coastguard Worker addi.d t2, t2, 64 334*c0909341SAndroid Build Coastguard Worker addi.w t5, t5, -16 335*c0909341SAndroid Build Coastguard Worker addi.d t3, t3, 16 336*c0909341SAndroid Build Coastguard Worker blt zero, t5, .LBS3_H_W 337*c0909341SAndroid Build Coastguard Worker 338*c0909341SAndroid Build Coastguard Worker addi.d a0, a0, REST_UNIT_STRIDE<<2 339*c0909341SAndroid Build Coastguard Worker addi.d a1, a1, REST_UNIT_STRIDE<<1 340*c0909341SAndroid Build Coastguard Worker addi.d a2, a2, REST_UNIT_STRIDE 341*c0909341SAndroid Build Coastguard Worker addi.d a4, a4, -1 342*c0909341SAndroid Build Coastguard Worker blt zero, a4, .LBS3_H_H 343*c0909341SAndroid Build Coastguard Workerendfunc 344*c0909341SAndroid Build Coastguard Worker 345*c0909341SAndroid Build Coastguard Worker/* 346*c0909341SAndroid Build Coastguard Workervoid boxsum3_v(int32_t *sumsq, coef *sum, 347*c0909341SAndroid Build Coastguard Worker const int w, const int h) 348*c0909341SAndroid Build Coastguard Worker*/ 349*c0909341SAndroid Build Coastguard Workerfunction boxsum3_v_8bpc_lsx 350*c0909341SAndroid Build Coastguard Worker addi.d a0, a0, (REST_UNIT_STRIDE<<2) 351*c0909341SAndroid Build Coastguard Worker addi.d a1, a1, (REST_UNIT_STRIDE<<1) 352*c0909341SAndroid Build Coastguard Worker addi.w a3, a3, -4 353*c0909341SAndroid Build Coastguard Worker addi.w a2, a2, -4 354*c0909341SAndroid Build Coastguard Worker 355*c0909341SAndroid Build Coastguard Worker.LBS3_V_H: 356*c0909341SAndroid Build Coastguard Worker sub.w t3, a2, zero 357*c0909341SAndroid Build Coastguard Worker addi.d t0, a0, 4 358*c0909341SAndroid Build Coastguard Worker addi.d t1, a1, 2 359*c0909341SAndroid Build Coastguard Worker addi.d t5, a0, 8 360*c0909341SAndroid Build Coastguard Worker addi.d t6, a1, 4 361*c0909341SAndroid Build Coastguard Worker 362*c0909341SAndroid Build Coastguard Worker vld vr0, t1, 0 // a 0 1 2 3 4 5 6 7 363*c0909341SAndroid Build Coastguard Worker vld vr1, t1, 2 // b 1 2 3 4 5 6 7 8 364*c0909341SAndroid Build Coastguard Worker vld vr2, t1, 4 // c 2 3 4 5 6 7 8 9 365*c0909341SAndroid Build Coastguard Worker vld vr3, t0, 0 // a2 0 1 2 3 366*c0909341SAndroid Build Coastguard Worker vld vr4, t0, 4 // b2 1 2 3 4 367*c0909341SAndroid Build Coastguard Worker vld vr5, t0, 8 // c2 2 3 4 5 368*c0909341SAndroid Build Coastguard Worker vld vr6, t0, 16 // 3 4 5 6 369*c0909341SAndroid Build Coastguard Worker vld vr7, t0, 20 // 4 5 6 7 370*c0909341SAndroid Build Coastguard Worker vld vr8, t0, 24 // 5 6 7 8 371*c0909341SAndroid Build Coastguard Worker vadd.h vr9, vr0, vr1 372*c0909341SAndroid Build Coastguard Worker vadd.w vr10, vr3, vr4 373*c0909341SAndroid Build Coastguard Worker vadd.w vr11, vr6, vr7 374*c0909341SAndroid Build Coastguard Worker vadd.h vr9, vr9, vr2 375*c0909341SAndroid Build Coastguard Worker vadd.w vr10, vr10, vr5 376*c0909341SAndroid Build Coastguard Worker vadd.w vr11, vr11, vr8 377*c0909341SAndroid Build Coastguard Worker vpickve2gr.h t7, vr2, 6 378*c0909341SAndroid Build Coastguard Worker vpickve2gr.w t8, vr8, 2 379*c0909341SAndroid Build Coastguard Worker vst vr9, t6, 0 380*c0909341SAndroid Build Coastguard Worker vst vr10, t5, 0 381*c0909341SAndroid Build Coastguard Worker vst vr11, t5, 16 382*c0909341SAndroid Build Coastguard Worker 383*c0909341SAndroid Build Coastguard Worker addi.d t1, t1, 16 384*c0909341SAndroid Build Coastguard Worker addi.d t0, t0, 32 385*c0909341SAndroid Build Coastguard Worker addi.d t5, t5, 32 386*c0909341SAndroid Build Coastguard Worker addi.d t6, t6, 16 387*c0909341SAndroid Build Coastguard Worker addi.d t3, t3, -8 388*c0909341SAndroid Build Coastguard Worker ble t3, zero, .LBS3_V_H0 389*c0909341SAndroid Build Coastguard Worker 390*c0909341SAndroid Build Coastguard Worker.LBS3_V_W8: 391*c0909341SAndroid Build Coastguard Worker vld vr0, t1, 0 // a 0 1 2 3 4 5 6 7 392*c0909341SAndroid Build Coastguard Worker vld vr1, t1, 2 // b 1 2 3 4 5 6 7 8 393*c0909341SAndroid Build Coastguard Worker vld vr2, t1, 4 // c 2 3 4 5 6 7 8 9 394*c0909341SAndroid Build Coastguard Worker vld vr3, t0, 0 // a2 0 1 2 3 395*c0909341SAndroid Build Coastguard Worker vld vr4, t0, 4 // b2 1 2 3 4 396*c0909341SAndroid Build Coastguard Worker vld vr5, t0, 8 // c2 2 3 4 5 397*c0909341SAndroid Build Coastguard Worker vld vr6, t0, 16 // 3 4 5 6 398*c0909341SAndroid Build Coastguard Worker vld vr7, t0, 20 // 4 5 6 7 399*c0909341SAndroid Build Coastguard Worker vld vr8, t0, 24 // 5 6 7 8 400*c0909341SAndroid Build Coastguard Worker vinsgr2vr.h vr0, t7, 0 401*c0909341SAndroid Build Coastguard Worker vinsgr2vr.w vr3, t8, 0 402*c0909341SAndroid Build Coastguard Worker vpickve2gr.h t7, vr2, 6 403*c0909341SAndroid Build Coastguard Worker vpickve2gr.w t8, vr8, 2 404*c0909341SAndroid Build Coastguard Worker vadd.h vr9, vr0, vr1 405*c0909341SAndroid Build Coastguard Worker vadd.w vr10, vr3, vr4 406*c0909341SAndroid Build Coastguard Worker vadd.w vr11, vr6, vr7 407*c0909341SAndroid Build Coastguard Worker vadd.h vr9, vr9, vr2 408*c0909341SAndroid Build Coastguard Worker vadd.w vr10, vr10, vr5 409*c0909341SAndroid Build Coastguard Worker vadd.w vr11, vr11, vr8 410*c0909341SAndroid Build Coastguard Worker vst vr9, t6, 0 411*c0909341SAndroid Build Coastguard Worker vst vr10, t5, 0 412*c0909341SAndroid Build Coastguard Worker vst vr11, t5, 16 413*c0909341SAndroid Build Coastguard Worker addi.d t3, t3, -8 414*c0909341SAndroid Build Coastguard Worker addi.d t1, t1, 16 415*c0909341SAndroid Build Coastguard Worker addi.d t0, t0, 32 416*c0909341SAndroid Build Coastguard Worker addi.d t5, t5, 32 417*c0909341SAndroid Build Coastguard Worker addi.d t6, t6, 16 418*c0909341SAndroid Build Coastguard Worker blt zero, t3, .LBS3_V_W8 419*c0909341SAndroid Build Coastguard Worker 420*c0909341SAndroid Build Coastguard Worker.LBS3_V_H0: 421*c0909341SAndroid Build Coastguard Worker addi.d a1, a1, REST_UNIT_STRIDE<<1 422*c0909341SAndroid Build Coastguard Worker addi.d a0, a0, REST_UNIT_STRIDE<<2 423*c0909341SAndroid Build Coastguard Worker addi.w a3, a3, -1 424*c0909341SAndroid Build Coastguard Worker bnez a3, .LBS3_V_H 425*c0909341SAndroid Build Coastguard Workerendfunc 426*c0909341SAndroid Build Coastguard Worker 427*c0909341SAndroid Build Coastguard Worker/* 428*c0909341SAndroid Build Coastguard Workerboxsum3_selfguided_filter(int32_t *sumsq, coef *sum, 429*c0909341SAndroid Build Coastguard Worker const int w, const int h, 430*c0909341SAndroid Build Coastguard Worker const unsigned s) 431*c0909341SAndroid Build Coastguard Worker*/ 432*c0909341SAndroid Build Coastguard Workerfunction boxsum3_sgf_h_8bpc_lsx 433*c0909341SAndroid Build Coastguard Worker addi.d a0, a0, REST_UNIT_STRIDE<<2 434*c0909341SAndroid Build Coastguard Worker addi.d a0, a0, 12 // AA 435*c0909341SAndroid Build Coastguard Worker addi.d a1, a1, REST_UNIT_STRIDE<<1 436*c0909341SAndroid Build Coastguard Worker addi.d a1, a1, 6 // BB 437*c0909341SAndroid Build Coastguard Worker la.local t8, dav1d_sgr_x_by_x 438*c0909341SAndroid Build Coastguard Worker li.w t6, 455 439*c0909341SAndroid Build Coastguard Worker vreplgr2vr.w vr20, t6 440*c0909341SAndroid Build Coastguard Worker li.w t6, 255 441*c0909341SAndroid Build Coastguard Worker vreplgr2vr.w vr22, t6 442*c0909341SAndroid Build Coastguard Worker vaddi.wu vr21, vr22, 1 // 256 443*c0909341SAndroid Build Coastguard Worker vreplgr2vr.w vr6, a4 444*c0909341SAndroid Build Coastguard Worker vldi vr19, 0x809 445*c0909341SAndroid Build Coastguard Worker addi.w a2, a2, 2 // w + 2 446*c0909341SAndroid Build Coastguard Worker addi.w a3, a3, 2 // h + 2 447*c0909341SAndroid Build Coastguard Worker 448*c0909341SAndroid Build Coastguard Worker.LBS3SGF_H_H: 449*c0909341SAndroid Build Coastguard Worker addi.w t2, a2, 0 450*c0909341SAndroid Build Coastguard Worker addi.d t0, a0, -4 451*c0909341SAndroid Build Coastguard Worker addi.d t1, a1, -2 452*c0909341SAndroid Build Coastguard Worker 453*c0909341SAndroid Build Coastguard Worker.LBS3SGF_H_W: 454*c0909341SAndroid Build Coastguard Worker addi.w t2, t2, -8 455*c0909341SAndroid Build Coastguard Worker vld vr0, t0, 0 // AA[i] 456*c0909341SAndroid Build Coastguard Worker vld vr1, t0, 16 457*c0909341SAndroid Build Coastguard Worker vld vr2, t1, 0 // BB[i] 458*c0909341SAndroid Build Coastguard Worker 459*c0909341SAndroid Build Coastguard Worker vmul.w vr4, vr0, vr19 // a * n 460*c0909341SAndroid Build Coastguard Worker vmul.w vr5, vr1, vr19 // a * n 461*c0909341SAndroid Build Coastguard Worker vsllwil.w.h vr9, vr2, 0 462*c0909341SAndroid Build Coastguard Worker vexth.w.h vr10, vr2 463*c0909341SAndroid Build Coastguard Worker vmsub.w vr4, vr9, vr9 // p 464*c0909341SAndroid Build Coastguard Worker vmsub.w vr5, vr10, vr10 // p 465*c0909341SAndroid Build Coastguard Worker vmaxi.w vr4, vr4, 0 466*c0909341SAndroid Build Coastguard Worker vmaxi.w vr5, vr5, 0 // p 467*c0909341SAndroid Build Coastguard Worker vmul.w vr4, vr4, vr6 // p * s 468*c0909341SAndroid Build Coastguard Worker vmul.w vr5, vr5, vr6 // p * s 469*c0909341SAndroid Build Coastguard Worker vsrlri.w vr4, vr4, 20 470*c0909341SAndroid Build Coastguard Worker vsrlri.w vr5, vr5, 20 // z 471*c0909341SAndroid Build Coastguard Worker vmin.w vr4, vr4, vr22 472*c0909341SAndroid Build Coastguard Worker vmin.w vr5, vr5, vr22 473*c0909341SAndroid Build Coastguard Worker 474*c0909341SAndroid Build Coastguard Worker vpickve2gr.w t6, vr4, 0 475*c0909341SAndroid Build Coastguard Worker ldx.bu t7, t8, t6 476*c0909341SAndroid Build Coastguard Worker vinsgr2vr.w vr7, t7, 0 477*c0909341SAndroid Build Coastguard Worker vpickve2gr.w t6, vr4, 1 478*c0909341SAndroid Build Coastguard Worker ldx.bu t7, t8, t6 479*c0909341SAndroid Build Coastguard Worker vinsgr2vr.w vr7, t7, 1 480*c0909341SAndroid Build Coastguard Worker vpickve2gr.w t6, vr4, 2 481*c0909341SAndroid Build Coastguard Worker ldx.bu t7, t8, t6 482*c0909341SAndroid Build Coastguard Worker vinsgr2vr.w vr7, t7, 2 483*c0909341SAndroid Build Coastguard Worker vpickve2gr.w t6, vr4, 3 484*c0909341SAndroid Build Coastguard Worker ldx.bu t7, t8, t6 485*c0909341SAndroid Build Coastguard Worker vinsgr2vr.w vr7, t7, 3 486*c0909341SAndroid Build Coastguard Worker 487*c0909341SAndroid Build Coastguard Worker vpickve2gr.w t6, vr5, 0 488*c0909341SAndroid Build Coastguard Worker ldx.bu t7, t8, t6 489*c0909341SAndroid Build Coastguard Worker vinsgr2vr.w vr8, t7, 0 490*c0909341SAndroid Build Coastguard Worker vpickve2gr.w t6, vr5, 1 491*c0909341SAndroid Build Coastguard Worker ldx.bu t7, t8, t6 492*c0909341SAndroid Build Coastguard Worker vinsgr2vr.w vr8, t7, 1 493*c0909341SAndroid Build Coastguard Worker vpickve2gr.w t6, vr5, 2 494*c0909341SAndroid Build Coastguard Worker ldx.bu t7, t8, t6 495*c0909341SAndroid Build Coastguard Worker vinsgr2vr.w vr8, t7, 2 496*c0909341SAndroid Build Coastguard Worker vpickve2gr.w t6, vr5, 3 497*c0909341SAndroid Build Coastguard Worker ldx.bu t7, t8, t6 498*c0909341SAndroid Build Coastguard Worker vinsgr2vr.w vr8, t7, 3 // x 499*c0909341SAndroid Build Coastguard Worker 500*c0909341SAndroid Build Coastguard Worker vmul.w vr9, vr7, vr9 // x * BB[i] 501*c0909341SAndroid Build Coastguard Worker vmul.w vr10, vr8, vr10 502*c0909341SAndroid Build Coastguard Worker vmul.w vr9, vr9, vr20 // x * BB[i] * sgr_one_by_x 503*c0909341SAndroid Build Coastguard Worker vmul.w vr10, vr10, vr20 504*c0909341SAndroid Build Coastguard Worker vsrlri.w vr9, vr9, 12 505*c0909341SAndroid Build Coastguard Worker vsrlri.w vr10, vr10, 12 506*c0909341SAndroid Build Coastguard Worker vsub.w vr7, vr21, vr7 507*c0909341SAndroid Build Coastguard Worker vsub.w vr8, vr21, vr8 508*c0909341SAndroid Build Coastguard Worker vpickev.h vr8, vr8, vr7 509*c0909341SAndroid Build Coastguard Worker 510*c0909341SAndroid Build Coastguard Worker vst vr9, t0, 0 511*c0909341SAndroid Build Coastguard Worker vst vr10, t0, 16 512*c0909341SAndroid Build Coastguard Worker vst vr8, t1, 0 513*c0909341SAndroid Build Coastguard Worker addi.d t0, t0, 32 514*c0909341SAndroid Build Coastguard Worker addi.d t1, t1, 16 515*c0909341SAndroid Build Coastguard Worker blt zero, t2, .LBS3SGF_H_W 516*c0909341SAndroid Build Coastguard Worker 517*c0909341SAndroid Build Coastguard Worker addi.d a0, a0, REST_UNIT_STRIDE<<2 518*c0909341SAndroid Build Coastguard Worker addi.d a1, a1, REST_UNIT_STRIDE<<1 519*c0909341SAndroid Build Coastguard Worker addi.w a3, a3, -1 520*c0909341SAndroid Build Coastguard Worker bnez a3, .LBS3SGF_H_H 521*c0909341SAndroid Build Coastguard Workerendfunc 522*c0909341SAndroid Build Coastguard Worker 523*c0909341SAndroid Build Coastguard Worker/* 524*c0909341SAndroid Build Coastguard Workerboxsum3_selfguided_filter(coef *dst, pixel *src, 525*c0909341SAndroid Build Coastguard Worker int32_t *sumsq, coef *sum, 526*c0909341SAndroid Build Coastguard Worker const int w, const int h) 527*c0909341SAndroid Build Coastguard Worker*/ 528*c0909341SAndroid Build Coastguard Workerfunction boxsum3_sgf_v_8bpc_lsx 529*c0909341SAndroid Build Coastguard Worker addi.d a1, a1, (3*REST_UNIT_STRIDE+3) // src 530*c0909341SAndroid Build Coastguard Worker addi.d a2, a2, REST_UNIT_STRIDE<<2 531*c0909341SAndroid Build Coastguard Worker addi.d a2, a2, (REST_UNIT_STRIDE<<2)+12 532*c0909341SAndroid Build Coastguard Worker addi.d a3, a3, REST_UNIT_STRIDE<<2 533*c0909341SAndroid Build Coastguard Worker addi.d a3, a3, 6 534*c0909341SAndroid Build Coastguard Worker.LBS3SGF_V_H: 535*c0909341SAndroid Build Coastguard Worker // A int32_t *sumsq 536*c0909341SAndroid Build Coastguard Worker addi.d t0, a2, -(REST_UNIT_STRIDE<<2) // -stride 537*c0909341SAndroid Build Coastguard Worker addi.d t1, a2, 0 // sumsq 538*c0909341SAndroid Build Coastguard Worker addi.d t2, a2, REST_UNIT_STRIDE<<2 // +stride 539*c0909341SAndroid Build Coastguard Worker addi.d t6, a1, 0 540*c0909341SAndroid Build Coastguard Worker addi.w t7, a4, 0 541*c0909341SAndroid Build Coastguard Worker addi.d t8, a0, 0 542*c0909341SAndroid Build Coastguard Worker // B coef *sum 543*c0909341SAndroid Build Coastguard Worker addi.d t3, a3, -(REST_UNIT_STRIDE<<1) // -stride 544*c0909341SAndroid Build Coastguard Worker addi.d t4, a3, 0 545*c0909341SAndroid Build Coastguard Worker addi.d t5, a3, REST_UNIT_STRIDE<<1 546*c0909341SAndroid Build Coastguard Worker 547*c0909341SAndroid Build Coastguard Worker.LBS3SGF_V_W: 548*c0909341SAndroid Build Coastguard Worker vld vr0, t0, 0 // P[i - REST_UNIT_STRIDE] 549*c0909341SAndroid Build Coastguard Worker vld vr1, t0, 16 550*c0909341SAndroid Build Coastguard Worker vld vr2, t1, -4 // P[i-1] -1 0 1 2 551*c0909341SAndroid Build Coastguard Worker vld vr3, t1, 12 // 3 4 5 6 552*c0909341SAndroid Build Coastguard Worker vld vr4, t2, 0 // P[i + REST_UNIT_STRIDE] 553*c0909341SAndroid Build Coastguard Worker vld vr5, t2, 16 554*c0909341SAndroid Build Coastguard Worker vld vr6, t1, 0 // p[i] 0 1 2 3 555*c0909341SAndroid Build Coastguard Worker vld vr7, t1, 16 // 4 5 6 7 556*c0909341SAndroid Build Coastguard Worker vld vr8, t1, 4 // p[i+1] 1 2 3 4 557*c0909341SAndroid Build Coastguard Worker vld vr9, t1, 20 // 5 6 7 8 558*c0909341SAndroid Build Coastguard Worker 559*c0909341SAndroid Build Coastguard Worker vld vr10, t0, -4 // P[i - 1 - REST_UNIT_STRIDE] 560*c0909341SAndroid Build Coastguard Worker vld vr11, t0, 12 561*c0909341SAndroid Build Coastguard Worker vld vr12, t2, -4 // P[i - 1 + REST_UNIT_STRIDE] 562*c0909341SAndroid Build Coastguard Worker vld vr13, t2, 12 563*c0909341SAndroid Build Coastguard Worker vld vr14, t0, 4 // P[i + 1 - REST_UNIT_STRIDE] 564*c0909341SAndroid Build Coastguard Worker vld vr15, t0, 20 565*c0909341SAndroid Build Coastguard Worker vld vr16, t2, 4 // P[i + 1 + REST_UNIT_STRIDE] 566*c0909341SAndroid Build Coastguard Worker vld vr17, t2, 20 567*c0909341SAndroid Build Coastguard Worker 568*c0909341SAndroid Build Coastguard Worker vadd.w vr0, vr2, vr0 569*c0909341SAndroid Build Coastguard Worker vadd.w vr4, vr6, vr4 570*c0909341SAndroid Build Coastguard Worker vadd.w vr0, vr0, vr8 571*c0909341SAndroid Build Coastguard Worker vadd.w vr20, vr0, vr4 572*c0909341SAndroid Build Coastguard Worker vslli.w vr20, vr20, 2 // 0 1 2 3 573*c0909341SAndroid Build Coastguard Worker vadd.w vr0, vr1, vr3 574*c0909341SAndroid Build Coastguard Worker vadd.w vr4, vr5, vr7 575*c0909341SAndroid Build Coastguard Worker vadd.w vr0, vr0, vr9 576*c0909341SAndroid Build Coastguard Worker vadd.w vr21, vr0, vr4 577*c0909341SAndroid Build Coastguard Worker vslli.w vr21, vr21, 2 // 4 5 6 7 578*c0909341SAndroid Build Coastguard Worker vadd.w vr12, vr10, vr12 579*c0909341SAndroid Build Coastguard Worker vadd.w vr16, vr14, vr16 580*c0909341SAndroid Build Coastguard Worker vadd.w vr22, vr12, vr16 581*c0909341SAndroid Build Coastguard Worker vslli.w vr23, vr22, 1 582*c0909341SAndroid Build Coastguard Worker vadd.w vr22, vr23, vr22 583*c0909341SAndroid Build Coastguard Worker vadd.w vr11, vr11, vr13 584*c0909341SAndroid Build Coastguard Worker vadd.w vr15, vr15, vr17 585*c0909341SAndroid Build Coastguard Worker vadd.w vr0, vr11, vr15 586*c0909341SAndroid Build Coastguard Worker vslli.w vr23, vr0, 1 587*c0909341SAndroid Build Coastguard Worker vadd.w vr23, vr23, vr0 588*c0909341SAndroid Build Coastguard Worker vadd.w vr20, vr20, vr22 // b 589*c0909341SAndroid Build Coastguard Worker vadd.w vr21, vr21, vr23 590*c0909341SAndroid Build Coastguard Worker 591*c0909341SAndroid Build Coastguard Worker // B coef *sum 592*c0909341SAndroid Build Coastguard Worker vld vr0, t3, 0 // P[i - REST_UNIT_STRIDE] 593*c0909341SAndroid Build Coastguard Worker vld vr1, t4, -2 // p[i - 1] 594*c0909341SAndroid Build Coastguard Worker vld vr2, t4, 0 // p[i] 595*c0909341SAndroid Build Coastguard Worker vld vr3, t4, 2 // p[i + 1] 596*c0909341SAndroid Build Coastguard Worker vld vr4, t5, 0 // P[i + REST_UNIT_STRIDE] 597*c0909341SAndroid Build Coastguard Worker vld vr5, t3, -2 // P[i - 1 - REST_UNIT_STRIDE] 598*c0909341SAndroid Build Coastguard Worker vld vr6, t5, -2 // P[i - 1 + REST_UNIT_STRIDE] 599*c0909341SAndroid Build Coastguard Worker vld vr7, t3, 2 // P[i + 1 - REST_UNIT_STRIDE] 600*c0909341SAndroid Build Coastguard Worker vld vr8, t5, 2 // P[i + 1 + REST_UNIT_STRIDE] 601*c0909341SAndroid Build Coastguard Worker vaddwev.w.h vr9, vr0, vr1 602*c0909341SAndroid Build Coastguard Worker vaddwod.w.h vr10, vr0, vr1 603*c0909341SAndroid Build Coastguard Worker vaddwev.w.h vr11, vr2, vr3 604*c0909341SAndroid Build Coastguard Worker vaddwod.w.h vr12, vr2, vr3 605*c0909341SAndroid Build Coastguard Worker vadd.w vr9, vr11, vr9 606*c0909341SAndroid Build Coastguard Worker vadd.w vr10, vr12, vr10 607*c0909341SAndroid Build Coastguard Worker vilvl.w vr11, vr10, vr9 // 0 1 2 3 608*c0909341SAndroid Build Coastguard Worker vilvh.w vr12, vr10, vr9 // 4 5 6 7 609*c0909341SAndroid Build Coastguard Worker vsllwil.w.h vr0, vr4, 0 610*c0909341SAndroid Build Coastguard Worker vexth.w.h vr1, vr4 611*c0909341SAndroid Build Coastguard Worker vadd.w vr0, vr11, vr0 612*c0909341SAndroid Build Coastguard Worker vadd.w vr1, vr12, vr1 613*c0909341SAndroid Build Coastguard Worker vslli.w vr0, vr0, 2 614*c0909341SAndroid Build Coastguard Worker vslli.w vr1, vr1, 2 615*c0909341SAndroid Build Coastguard Worker vaddwev.w.h vr9, vr5, vr6 616*c0909341SAndroid Build Coastguard Worker vaddwod.w.h vr10, vr5, vr6 617*c0909341SAndroid Build Coastguard Worker vaddwev.w.h vr11, vr7, vr8 618*c0909341SAndroid Build Coastguard Worker vaddwod.w.h vr12, vr7, vr8 619*c0909341SAndroid Build Coastguard Worker vadd.w vr9, vr11, vr9 620*c0909341SAndroid Build Coastguard Worker vadd.w vr10, vr12, vr10 621*c0909341SAndroid Build Coastguard Worker vilvl.w vr13, vr10, vr9 622*c0909341SAndroid Build Coastguard Worker vilvh.w vr14, vr10, vr9 623*c0909341SAndroid Build Coastguard Worker vslli.w vr15, vr13, 1 624*c0909341SAndroid Build Coastguard Worker vslli.w vr16, vr14, 1 625*c0909341SAndroid Build Coastguard Worker vadd.w vr15, vr13, vr15 // a 626*c0909341SAndroid Build Coastguard Worker vadd.w vr16, vr14, vr16 627*c0909341SAndroid Build Coastguard Worker vadd.w vr22, vr0, vr15 628*c0909341SAndroid Build Coastguard Worker vadd.w vr23, vr1, vr16 629*c0909341SAndroid Build Coastguard Worker vld vr0, t6, 0 // src 630*c0909341SAndroid Build Coastguard Worker vsllwil.hu.bu vr0, vr0, 0 631*c0909341SAndroid Build Coastguard Worker vsllwil.wu.hu vr1, vr0, 0 632*c0909341SAndroid Build Coastguard Worker vexth.wu.hu vr2, vr0 633*c0909341SAndroid Build Coastguard Worker vmadd.w vr20, vr22, vr1 634*c0909341SAndroid Build Coastguard Worker vmadd.w vr21, vr23, vr2 635*c0909341SAndroid Build Coastguard Worker vssrlrni.h.w vr21, vr20, 9 636*c0909341SAndroid Build Coastguard Worker vst vr21, t8, 0 637*c0909341SAndroid Build Coastguard Worker addi.d t8, t8, 16 638*c0909341SAndroid Build Coastguard Worker 639*c0909341SAndroid Build Coastguard Worker addi.d t0, t0, 32 640*c0909341SAndroid Build Coastguard Worker addi.d t1, t1, 32 641*c0909341SAndroid Build Coastguard Worker addi.d t2, t2, 32 642*c0909341SAndroid Build Coastguard Worker addi.d t3, t3, 16 643*c0909341SAndroid Build Coastguard Worker addi.d t4, t4, 16 644*c0909341SAndroid Build Coastguard Worker addi.d t5, t5, 16 645*c0909341SAndroid Build Coastguard Worker addi.d t6, t6, 8 646*c0909341SAndroid Build Coastguard Worker addi.w t7, t7, -8 647*c0909341SAndroid Build Coastguard Worker blt zero, t7, .LBS3SGF_V_W 648*c0909341SAndroid Build Coastguard Worker 649*c0909341SAndroid Build Coastguard Worker addi.w a5, a5, -1 650*c0909341SAndroid Build Coastguard Worker addi.d a0, a0, 384*2 651*c0909341SAndroid Build Coastguard Worker addi.d a1, a1, REST_UNIT_STRIDE 652*c0909341SAndroid Build Coastguard Worker addi.d a3, a3, REST_UNIT_STRIDE<<1 653*c0909341SAndroid Build Coastguard Worker addi.d a2, a2, REST_UNIT_STRIDE<<2 654*c0909341SAndroid Build Coastguard Worker bnez a5, .LBS3SGF_V_H 655*c0909341SAndroid Build Coastguard Workerendfunc 656*c0909341SAndroid Build Coastguard Worker 657*c0909341SAndroid Build Coastguard Workerfunction boxsum3_sgf_v_8bpc_lasx 658*c0909341SAndroid Build Coastguard Worker addi.d a1, a1, (3*REST_UNIT_STRIDE+3) // src 659*c0909341SAndroid Build Coastguard Worker addi.d a2, a2, REST_UNIT_STRIDE<<2 660*c0909341SAndroid Build Coastguard Worker addi.d a2, a2, (REST_UNIT_STRIDE<<2)+12 661*c0909341SAndroid Build Coastguard Worker addi.d a3, a3, REST_UNIT_STRIDE<<2 662*c0909341SAndroid Build Coastguard Worker addi.d a3, a3, 6 663*c0909341SAndroid Build Coastguard Worker.LBS3SGF_V_H_LASX: 664*c0909341SAndroid Build Coastguard Worker // A int32_t *sumsq 665*c0909341SAndroid Build Coastguard Worker addi.d t0, a2, -(REST_UNIT_STRIDE<<2) // -stride 666*c0909341SAndroid Build Coastguard Worker addi.d t1, a2, 0 // sumsq 667*c0909341SAndroid Build Coastguard Worker addi.d t2, a2, REST_UNIT_STRIDE<<2 // +stride 668*c0909341SAndroid Build Coastguard Worker addi.d t6, a1, 0 669*c0909341SAndroid Build Coastguard Worker addi.w t7, a4, 0 670*c0909341SAndroid Build Coastguard Worker addi.d t8, a0, 0 671*c0909341SAndroid Build Coastguard Worker // B coef *sum 672*c0909341SAndroid Build Coastguard Worker addi.d t3, a3, -(REST_UNIT_STRIDE<<1) // -stride 673*c0909341SAndroid Build Coastguard Worker addi.d t4, a3, 0 674*c0909341SAndroid Build Coastguard Worker addi.d t5, a3, REST_UNIT_STRIDE<<1 675*c0909341SAndroid Build Coastguard Worker 676*c0909341SAndroid Build Coastguard Worker.LBS3SGF_V_W_LASX: 677*c0909341SAndroid Build Coastguard Worker xvld xr0, t0, 0 // P[i - REST_UNIT_STRIDE] 678*c0909341SAndroid Build Coastguard Worker xvld xr1, t0, 32 679*c0909341SAndroid Build Coastguard Worker xvld xr2, t1, -4 // P[i-1] -1 0 1 2 680*c0909341SAndroid Build Coastguard Worker xvld xr3, t1, 28 // 3 4 5 6 681*c0909341SAndroid Build Coastguard Worker xvld xr4, t2, 0 // P[i + REST_UNIT_STRIDE] 682*c0909341SAndroid Build Coastguard Worker xvld xr5, t2, 32 683*c0909341SAndroid Build Coastguard Worker xvld xr6, t1, 0 // p[i] 0 1 2 3 684*c0909341SAndroid Build Coastguard Worker xvld xr7, t1, 32 // 4 5 6 7 685*c0909341SAndroid Build Coastguard Worker xvld xr8, t1, 4 // p[i+1] 1 2 3 4 686*c0909341SAndroid Build Coastguard Worker xvld xr9, t1, 36 // 5 6 7 8 687*c0909341SAndroid Build Coastguard Worker 688*c0909341SAndroid Build Coastguard Worker xvld xr10, t0, -4 // P[i - 1 - REST_UNIT_STRIDE] 689*c0909341SAndroid Build Coastguard Worker xvld xr11, t0, 28 690*c0909341SAndroid Build Coastguard Worker xvld xr12, t2, -4 // P[i - 1 + REST_UNIT_STRIDE] 691*c0909341SAndroid Build Coastguard Worker xvld xr13, t2, 28 692*c0909341SAndroid Build Coastguard Worker xvld xr14, t0, 4 // P[i + 1 - REST_UNIT_STRIDE] 693*c0909341SAndroid Build Coastguard Worker xvld xr15, t0, 36 694*c0909341SAndroid Build Coastguard Worker xvld xr16, t2, 4 // P[i + 1 + REST_UNIT_STRIDE] 695*c0909341SAndroid Build Coastguard Worker xvld xr17, t2, 36 696*c0909341SAndroid Build Coastguard Worker 697*c0909341SAndroid Build Coastguard Worker xvadd.w xr0, xr2, xr0 698*c0909341SAndroid Build Coastguard Worker xvadd.w xr4, xr6, xr4 699*c0909341SAndroid Build Coastguard Worker xvadd.w xr0, xr0, xr8 700*c0909341SAndroid Build Coastguard Worker xvadd.w xr20, xr0, xr4 701*c0909341SAndroid Build Coastguard Worker xvslli.w xr20, xr20, 2 // 0 1 2 3 702*c0909341SAndroid Build Coastguard Worker xvadd.w xr0, xr1, xr3 703*c0909341SAndroid Build Coastguard Worker xvadd.w xr4, xr5, xr7 704*c0909341SAndroid Build Coastguard Worker xvadd.w xr0, xr0, xr9 705*c0909341SAndroid Build Coastguard Worker xvadd.w xr21, xr0, xr4 706*c0909341SAndroid Build Coastguard Worker xvslli.w xr21, xr21, 2 // 4 5 6 7 707*c0909341SAndroid Build Coastguard Worker xvadd.w xr12, xr10, xr12 708*c0909341SAndroid Build Coastguard Worker xvadd.w xr16, xr14, xr16 709*c0909341SAndroid Build Coastguard Worker xvadd.w xr22, xr12, xr16 710*c0909341SAndroid Build Coastguard Worker xvslli.w xr23, xr22, 1 711*c0909341SAndroid Build Coastguard Worker xvadd.w xr22, xr23, xr22 712*c0909341SAndroid Build Coastguard Worker xvadd.w xr11, xr11, xr13 713*c0909341SAndroid Build Coastguard Worker xvadd.w xr15, xr15, xr17 714*c0909341SAndroid Build Coastguard Worker xvadd.w xr0, xr11, xr15 715*c0909341SAndroid Build Coastguard Worker xvslli.w xr23, xr0, 1 716*c0909341SAndroid Build Coastguard Worker xvadd.w xr23, xr23, xr0 717*c0909341SAndroid Build Coastguard Worker xvadd.w xr20, xr20, xr22 // b 718*c0909341SAndroid Build Coastguard Worker xvadd.w xr21, xr21, xr23 719*c0909341SAndroid Build Coastguard Worker 720*c0909341SAndroid Build Coastguard Worker // B coef *sum 721*c0909341SAndroid Build Coastguard Worker xvld xr0, t3, 0 // P[i - REST_UNIT_STRIDE] 722*c0909341SAndroid Build Coastguard Worker xvld xr1, t4, -2 // p[i - 1] 723*c0909341SAndroid Build Coastguard Worker xvld xr2, t4, 0 // p[i] 724*c0909341SAndroid Build Coastguard Worker xvld xr3, t4, 2 // p[i + 1] 725*c0909341SAndroid Build Coastguard Worker xvld xr4, t5, 0 // P[i + REST_UNIT_STRIDE] 726*c0909341SAndroid Build Coastguard Worker xvld xr5, t3, -2 // P[i - 1 - REST_UNIT_STRIDE] 727*c0909341SAndroid Build Coastguard Worker xvld xr6, t5, -2 // P[i - 1 + REST_UNIT_STRIDE] 728*c0909341SAndroid Build Coastguard Worker xvld xr7, t3, 2 // P[i + 1 - REST_UNIT_STRIDE] 729*c0909341SAndroid Build Coastguard Worker xvld xr8, t5, 2 // P[i + 1 + REST_UNIT_STRIDE] 730*c0909341SAndroid Build Coastguard Worker 731*c0909341SAndroid Build Coastguard Worker xvaddwev.w.h xr9, xr0, xr1 732*c0909341SAndroid Build Coastguard Worker xvaddwod.w.h xr10, xr0, xr1 733*c0909341SAndroid Build Coastguard Worker xvaddwev.w.h xr11, xr2, xr3 734*c0909341SAndroid Build Coastguard Worker xvaddwod.w.h xr12, xr2, xr3 735*c0909341SAndroid Build Coastguard Worker xvadd.w xr9, xr11, xr9 // 0 2 4 6 8 10 12 14 736*c0909341SAndroid Build Coastguard Worker xvadd.w xr10, xr12, xr10 // 1 3 5 7 9 11 13 15 737*c0909341SAndroid Build Coastguard Worker xvilvl.w xr11, xr10, xr9 // 0 1 2 3 8 9 10 11 738*c0909341SAndroid Build Coastguard Worker xvilvh.w xr12, xr10, xr9 // 4 5 6 7 12 13 14 15 739*c0909341SAndroid Build Coastguard Worker xvsllwil.w.h xr0, xr4, 0 // 0 1 2 3 8 9 10 11 740*c0909341SAndroid Build Coastguard Worker xvexth.w.h xr1, xr4 // 4 5 6 7 12 13 14 15 741*c0909341SAndroid Build Coastguard Worker 742*c0909341SAndroid Build Coastguard Worker xvadd.w xr0, xr11, xr0 743*c0909341SAndroid Build Coastguard Worker xvadd.w xr1, xr12, xr1 744*c0909341SAndroid Build Coastguard Worker xvslli.w xr0, xr0, 2 745*c0909341SAndroid Build Coastguard Worker xvslli.w xr1, xr1, 2 746*c0909341SAndroid Build Coastguard Worker 747*c0909341SAndroid Build Coastguard Worker xvaddwev.w.h xr9, xr5, xr6 748*c0909341SAndroid Build Coastguard Worker xvaddwod.w.h xr10, xr5, xr6 749*c0909341SAndroid Build Coastguard Worker xvaddwev.w.h xr11, xr7, xr8 750*c0909341SAndroid Build Coastguard Worker xvaddwod.w.h xr12, xr7, xr8 751*c0909341SAndroid Build Coastguard Worker xvadd.w xr9, xr11, xr9 752*c0909341SAndroid Build Coastguard Worker xvadd.w xr10, xr12, xr10 753*c0909341SAndroid Build Coastguard Worker xvilvl.w xr13, xr10, xr9 // 0 1 2 3 8 9 10 11 754*c0909341SAndroid Build Coastguard Worker xvilvh.w xr14, xr10, xr9 // 4 5 6 7 12 13 14 15 755*c0909341SAndroid Build Coastguard Worker 756*c0909341SAndroid Build Coastguard Worker xvslli.w xr15, xr13, 1 757*c0909341SAndroid Build Coastguard Worker xvslli.w xr16, xr14, 1 758*c0909341SAndroid Build Coastguard Worker xvadd.w xr15, xr13, xr15 // a 759*c0909341SAndroid Build Coastguard Worker xvadd.w xr16, xr14, xr16 760*c0909341SAndroid Build Coastguard Worker xvadd.w xr22, xr0, xr15 // A B 761*c0909341SAndroid Build Coastguard Worker xvadd.w xr23, xr1, xr16 // C D 762*c0909341SAndroid Build Coastguard Worker 763*c0909341SAndroid Build Coastguard Worker vld vr0, t6, 0 // src 764*c0909341SAndroid Build Coastguard Worker vilvh.d vr2, vr0, vr0 765*c0909341SAndroid Build Coastguard Worker vext2xv.wu.bu xr1, xr0 766*c0909341SAndroid Build Coastguard Worker vext2xv.wu.bu xr2, xr2 767*c0909341SAndroid Build Coastguard Worker xvor.v xr15, xr22, xr22 // A B 768*c0909341SAndroid Build Coastguard Worker xvpermi.q xr22, xr23, 0b00000010 // A C 769*c0909341SAndroid Build Coastguard Worker xvpermi.q xr23, xr15, 0b00110001 770*c0909341SAndroid Build Coastguard Worker xvmadd.w xr20, xr22, xr1 771*c0909341SAndroid Build Coastguard Worker xvmadd.w xr21, xr23, xr2 772*c0909341SAndroid Build Coastguard Worker xvssrlrni.h.w xr21, xr20, 9 773*c0909341SAndroid Build Coastguard Worker xvpermi.d xr22, xr21, 0b11011000 774*c0909341SAndroid Build Coastguard Worker xvst xr22, t8, 0 775*c0909341SAndroid Build Coastguard Worker addi.d t8, t8, 32 776*c0909341SAndroid Build Coastguard Worker 777*c0909341SAndroid Build Coastguard Worker addi.d t0, t0, 64 778*c0909341SAndroid Build Coastguard Worker addi.d t1, t1, 64 779*c0909341SAndroid Build Coastguard Worker addi.d t2, t2, 64 780*c0909341SAndroid Build Coastguard Worker addi.d t3, t3, 32 781*c0909341SAndroid Build Coastguard Worker addi.d t4, t4, 32 782*c0909341SAndroid Build Coastguard Worker addi.d t5, t5, 32 783*c0909341SAndroid Build Coastguard Worker addi.d t6, t6, 16 784*c0909341SAndroid Build Coastguard Worker addi.w t7, t7, -16 785*c0909341SAndroid Build Coastguard Worker blt zero, t7, .LBS3SGF_V_W_LASX 786*c0909341SAndroid Build Coastguard Worker 787*c0909341SAndroid Build Coastguard Worker addi.w a5, a5, -1 788*c0909341SAndroid Build Coastguard Worker addi.d a0, a0, 384*2 789*c0909341SAndroid Build Coastguard Worker addi.d a1, a1, REST_UNIT_STRIDE 790*c0909341SAndroid Build Coastguard Worker addi.d a3, a3, REST_UNIT_STRIDE<<1 791*c0909341SAndroid Build Coastguard Worker addi.d a2, a2, REST_UNIT_STRIDE<<2 792*c0909341SAndroid Build Coastguard Worker bnez a5, .LBS3SGF_V_H_LASX 793*c0909341SAndroid Build Coastguard Workerendfunc 794*c0909341SAndroid Build Coastguard Worker 795*c0909341SAndroid Build Coastguard Worker#define FILTER_OUT_STRIDE (384) 796*c0909341SAndroid Build Coastguard Worker 797*c0909341SAndroid Build Coastguard Worker/* 798*c0909341SAndroid Build Coastguard Workersgr_3x3_finish_c(const pixel *p, const ptrdiff_t stride, 799*c0909341SAndroid Build Coastguard Worker const int16_t *dst, const int w1; 800*c0909341SAndroid Build Coastguard Worker const int w, const int h); 801*c0909341SAndroid Build Coastguard Worker*/ 802*c0909341SAndroid Build Coastguard Workerfunction sgr_3x3_finish_8bpc_lsx 803*c0909341SAndroid Build Coastguard Worker vreplgr2vr.w vr3, a3 // w1 804*c0909341SAndroid Build Coastguard Worker andi t4, a4, 0x7 805*c0909341SAndroid Build Coastguard Worker sub.w t5, a4, t4 806*c0909341SAndroid Build Coastguard Worker 807*c0909341SAndroid Build Coastguard Worker beq zero, t5, .LSGR3X3_REM 808*c0909341SAndroid Build Coastguard Worker 809*c0909341SAndroid Build Coastguard Worker.LSGR3X3_H: 810*c0909341SAndroid Build Coastguard Worker addi.d t0, a0, 0 811*c0909341SAndroid Build Coastguard Worker addi.d t1, a2, 0 812*c0909341SAndroid Build Coastguard Worker addi.w t2, t5, 0 813*c0909341SAndroid Build Coastguard Worker andi t4, a4, 0x7 814*c0909341SAndroid Build Coastguard Worker.LSGR3X3_W: 815*c0909341SAndroid Build Coastguard Worker vld vr0, t0, 0 816*c0909341SAndroid Build Coastguard Worker vld vr1, t1, 0 817*c0909341SAndroid Build Coastguard Worker vsllwil.hu.bu vr2, vr0, 4 // u 8 h 818*c0909341SAndroid Build Coastguard Worker vsllwil.wu.hu vr4, vr2, 0 // p 819*c0909341SAndroid Build Coastguard Worker vexth.wu.hu vr5, vr2 // p 820*c0909341SAndroid Build Coastguard Worker vslli.w vr6, vr4, 7 821*c0909341SAndroid Build Coastguard Worker vslli.w vr7, vr5, 7 822*c0909341SAndroid Build Coastguard Worker vsllwil.w.h vr8, vr1, 0 // dst 823*c0909341SAndroid Build Coastguard Worker vexth.w.h vr9, vr1 // dst 824*c0909341SAndroid Build Coastguard Worker vsub.w vr8, vr8, vr4 825*c0909341SAndroid Build Coastguard Worker vsub.w vr9, vr9, vr5 826*c0909341SAndroid Build Coastguard Worker vmadd.w vr6, vr8, vr3 // v 0 - 3 827*c0909341SAndroid Build Coastguard Worker vmadd.w vr7, vr9, vr3 // v 4 - 7 828*c0909341SAndroid Build Coastguard Worker vssrarni.hu.w vr7, vr6, 11 829*c0909341SAndroid Build Coastguard Worker vssrlni.bu.h vr7, vr7, 0 830*c0909341SAndroid Build Coastguard Worker vstelm.d vr7, t0, 0, 0 831*c0909341SAndroid Build Coastguard Worker addi.d t0, t0, 8 832*c0909341SAndroid Build Coastguard Worker addi.d t1, t1, 16 833*c0909341SAndroid Build Coastguard Worker addi.d t2, t2, -8 834*c0909341SAndroid Build Coastguard Worker bne zero, t2, .LSGR3X3_W 835*c0909341SAndroid Build Coastguard Worker 836*c0909341SAndroid Build Coastguard Worker beq t4, zero, .LSGR3X3_NOREM 837*c0909341SAndroid Build Coastguard Worker 838*c0909341SAndroid Build Coastguard Worker vld vr0, t0, 0 839*c0909341SAndroid Build Coastguard Worker vld vr1, t1, 0 840*c0909341SAndroid Build Coastguard Worker vsllwil.hu.bu vr2, vr0, 4 // u 8 h 841*c0909341SAndroid Build Coastguard Worker vsllwil.wu.hu vr4, vr2, 0 // p 842*c0909341SAndroid Build Coastguard Worker vexth.wu.hu vr5, vr2 // p 843*c0909341SAndroid Build Coastguard Worker vslli.w vr6, vr4, 7 844*c0909341SAndroid Build Coastguard Worker vslli.w vr7, vr5, 7 845*c0909341SAndroid Build Coastguard Worker vsllwil.w.h vr8, vr1, 0 // dst 846*c0909341SAndroid Build Coastguard Worker vexth.w.h vr9, vr1 // dst 847*c0909341SAndroid Build Coastguard Worker vsub.w vr8, vr8, vr4 848*c0909341SAndroid Build Coastguard Worker vsub.w vr9, vr9, vr5 849*c0909341SAndroid Build Coastguard Worker vmadd.w vr6, vr8, vr3 // v 0 - 3 850*c0909341SAndroid Build Coastguard Worker vmadd.w vr7, vr9, vr3 // v 4 - 7 851*c0909341SAndroid Build Coastguard Worker vssrarni.hu.w vr7, vr6, 11 852*c0909341SAndroid Build Coastguard Worker vssrlni.bu.h vr7, vr7, 0 853*c0909341SAndroid Build Coastguard Worker 854*c0909341SAndroid Build Coastguard Worker.LSGR3X3_ST: 855*c0909341SAndroid Build Coastguard Worker vstelm.b vr7, t0, 0, 0 856*c0909341SAndroid Build Coastguard Worker addi.d t0, t0, 1 857*c0909341SAndroid Build Coastguard Worker vbsrl.v vr7, vr7, 1 858*c0909341SAndroid Build Coastguard Worker addi.w t4, t4, -1 859*c0909341SAndroid Build Coastguard Worker bnez t4, .LSGR3X3_ST 860*c0909341SAndroid Build Coastguard Worker 861*c0909341SAndroid Build Coastguard Worker.LSGR3X3_NOREM: 862*c0909341SAndroid Build Coastguard Worker addi.w a5, a5, -1 863*c0909341SAndroid Build Coastguard Worker add.d a0, a0, a1 864*c0909341SAndroid Build Coastguard Worker addi.d a2, a2, (FILTER_OUT_STRIDE<<1) 865*c0909341SAndroid Build Coastguard Worker bnez a5, .LSGR3X3_H 866*c0909341SAndroid Build Coastguard Worker b .LSGR3X3_END 867*c0909341SAndroid Build Coastguard Worker 868*c0909341SAndroid Build Coastguard Worker.LSGR3X3_REM: 869*c0909341SAndroid Build Coastguard Worker andi t4, a4, 0x7 870*c0909341SAndroid Build Coastguard Worker addi.d t0, a0, 0 871*c0909341SAndroid Build Coastguard Worker vld vr0, t0, 0 872*c0909341SAndroid Build Coastguard Worker vld vr1, a2, 0 873*c0909341SAndroid Build Coastguard Worker vsllwil.hu.bu vr2, vr0, 4 // u 8 h 874*c0909341SAndroid Build Coastguard Worker vsllwil.wu.hu vr4, vr2, 0 // p 875*c0909341SAndroid Build Coastguard Worker vexth.wu.hu vr5, vr2 // p 876*c0909341SAndroid Build Coastguard Worker vslli.w vr6, vr4, 7 877*c0909341SAndroid Build Coastguard Worker vslli.w vr7, vr5, 7 878*c0909341SAndroid Build Coastguard Worker vsllwil.w.h vr8, vr1, 0 // dst 879*c0909341SAndroid Build Coastguard Worker vexth.w.h vr9, vr1 // dst 880*c0909341SAndroid Build Coastguard Worker vsub.w vr8, vr8, vr4 881*c0909341SAndroid Build Coastguard Worker vsub.w vr9, vr9, vr5 882*c0909341SAndroid Build Coastguard Worker vmadd.w vr6, vr8, vr3 // v 0 - 3 883*c0909341SAndroid Build Coastguard Worker vmadd.w vr7, vr9, vr3 // v 4 - 7 884*c0909341SAndroid Build Coastguard Worker vssrarni.hu.w vr7, vr6, 11 885*c0909341SAndroid Build Coastguard Worker vssrlni.bu.h vr7, vr7, 0 886*c0909341SAndroid Build Coastguard Worker 887*c0909341SAndroid Build Coastguard Worker.LSGR3X3_REM_ST: 888*c0909341SAndroid Build Coastguard Worker vstelm.b vr7, t0, 0, 0 889*c0909341SAndroid Build Coastguard Worker addi.d t0, t0, 1 890*c0909341SAndroid Build Coastguard Worker vbsrl.v vr7, vr7, 1 891*c0909341SAndroid Build Coastguard Worker addi.w t4, t4, -1 892*c0909341SAndroid Build Coastguard Worker bnez t4, .LSGR3X3_REM_ST 893*c0909341SAndroid Build Coastguard Worker addi.w a5, a5, -1 894*c0909341SAndroid Build Coastguard Worker add.d a0, a0, a1 895*c0909341SAndroid Build Coastguard Worker addi.d a2, a2, (FILTER_OUT_STRIDE<<1) 896*c0909341SAndroid Build Coastguard Worker bnez a5, .LSGR3X3_REM 897*c0909341SAndroid Build Coastguard Worker 898*c0909341SAndroid Build Coastguard Worker.LSGR3X3_END: 899*c0909341SAndroid Build Coastguard Workerendfunc 900*c0909341SAndroid Build Coastguard Worker 901*c0909341SAndroid Build Coastguard Worker/* 902*c0909341SAndroid Build Coastguard Workervoid boxsum5(int32_t *sumsq, coef *sum, 903*c0909341SAndroid Build Coastguard Worker const pixel *const src, 904*c0909341SAndroid Build Coastguard Worker const int w, const int h) 905*c0909341SAndroid Build Coastguard Worker*/ 906*c0909341SAndroid Build Coastguard Workerfunction boxsum5_h_8bpc_lsx 907*c0909341SAndroid Build Coastguard Worker addi.w a4, a4, -4 908*c0909341SAndroid Build Coastguard Worker addi.d a0, a0, REST_UNIT_STRIDE<<2 909*c0909341SAndroid Build Coastguard Worker addi.d a1, a1, REST_UNIT_STRIDE<<1 910*c0909341SAndroid Build Coastguard Worker li.w t6, 1 911*c0909341SAndroid Build Coastguard Worker.LBOXSUM5_H_H: 912*c0909341SAndroid Build Coastguard Worker addi.w t3, a3, 0 913*c0909341SAndroid Build Coastguard Worker addi.d t2, a2, 0 914*c0909341SAndroid Build Coastguard Worker addi.d t0, a0, 0 915*c0909341SAndroid Build Coastguard Worker addi.d t1, a1, 0 916*c0909341SAndroid Build Coastguard Worker 917*c0909341SAndroid Build Coastguard Worker.LBOXSUM5_H_W: 918*c0909341SAndroid Build Coastguard Worker vld vr0, t2, 0 // a 919*c0909341SAndroid Build Coastguard Worker vld vr1, t2, REST_UNIT_STRIDE // b 920*c0909341SAndroid Build Coastguard Worker vld vr2, t2, REST_UNIT_STRIDE<<1 // c 921*c0909341SAndroid Build Coastguard Worker vld vr3, t2, REST_UNIT_STRIDE*3 // d 922*c0909341SAndroid Build Coastguard Worker vld vr4, t2, REST_UNIT_STRIDE<<2 // e 923*c0909341SAndroid Build Coastguard Worker 924*c0909341SAndroid Build Coastguard Worker vilvl.b vr5, vr1, vr0 925*c0909341SAndroid Build Coastguard Worker vilvh.b vr6, vr1, vr0 926*c0909341SAndroid Build Coastguard Worker vilvl.b vr7, vr3, vr2 927*c0909341SAndroid Build Coastguard Worker vilvh.b vr8, vr3, vr2 928*c0909341SAndroid Build Coastguard Worker //sum_v 929*c0909341SAndroid Build Coastguard Worker vhaddw.hu.bu vr9, vr5, vr5 // 0 1 2 3 4 5 6 7 930*c0909341SAndroid Build Coastguard Worker vhaddw.hu.bu vr10, vr6, vr6 // 8 9 10 11 12 13 14 15 a+b 931*c0909341SAndroid Build Coastguard Worker vhaddw.hu.bu vr11, vr7, vr7 932*c0909341SAndroid Build Coastguard Worker vhaddw.hu.bu vr12, vr8, vr8 933*c0909341SAndroid Build Coastguard Worker vadd.h vr9, vr9, vr11 934*c0909341SAndroid Build Coastguard Worker vadd.h vr10, vr10, vr12 // a + b + c + d 935*c0909341SAndroid Build Coastguard Worker vsllwil.hu.bu vr11, vr4, 0 936*c0909341SAndroid Build Coastguard Worker vexth.hu.bu vr12, vr4 937*c0909341SAndroid Build Coastguard Worker vadd.h vr9, vr9, vr11 938*c0909341SAndroid Build Coastguard Worker vadd.h vr10, vr10, vr12 939*c0909341SAndroid Build Coastguard Worker vst vr9, t1, 0 940*c0909341SAndroid Build Coastguard Worker vst vr10, t1, 16 941*c0909341SAndroid Build Coastguard Worker addi.d t1, t1, 32 942*c0909341SAndroid Build Coastguard Worker 943*c0909341SAndroid Build Coastguard Worker // sumsq 944*c0909341SAndroid Build Coastguard Worker vmulwev.h.bu vr9, vr5, vr5 // a*a 0 1 2 3 4 5 6 7 945*c0909341SAndroid Build Coastguard Worker vmulwev.h.bu vr10, vr6, vr6 // a*a 8 9 10 11 12 13 14 15 946*c0909341SAndroid Build Coastguard Worker vmulwod.h.bu vr13, vr5, vr5 // b*b 0 1 2 3 4 5 6 7 947*c0909341SAndroid Build Coastguard Worker vmulwod.h.bu vr14, vr6, vr6 // b*b 8 9 10 11 12 13 14 15 948*c0909341SAndroid Build Coastguard Worker vmulwev.h.bu vr15, vr7, vr7 // c*c 0 1 2 3 4 5 6 7 949*c0909341SAndroid Build Coastguard Worker vmulwev.h.bu vr16, vr8, vr8 // c*c 8 9 10 11 12 13 14 15 950*c0909341SAndroid Build Coastguard Worker vmulwod.h.bu vr17, vr7, vr7 // d*d 0 1 2 3 4 5 6 7 951*c0909341SAndroid Build Coastguard Worker vmulwod.h.bu vr18, vr8, vr8 // d*d 8 9 10 11 12 13 14 15 952*c0909341SAndroid Build Coastguard Worker vaddwev.w.hu vr5, vr9, vr13 // 0 2 4 6 953*c0909341SAndroid Build Coastguard Worker vaddwod.w.hu vr6, vr9, vr13 // 1 3 5 7 954*c0909341SAndroid Build Coastguard Worker vaddwev.w.hu vr7, vr10, vr14 // 8 10 12 14 955*c0909341SAndroid Build Coastguard Worker vaddwod.w.hu vr8, vr10, vr14 // 9 11 13 15 a + b 956*c0909341SAndroid Build Coastguard Worker vaddwev.w.hu vr19, vr15, vr17 // 0 2 4 6 957*c0909341SAndroid Build Coastguard Worker vaddwod.w.hu vr20, vr15, vr17 // 1 3 5 7 958*c0909341SAndroid Build Coastguard Worker vaddwev.w.hu vr21, vr16, vr18 // 8 10 12 14 959*c0909341SAndroid Build Coastguard Worker vaddwod.w.hu vr22, vr16, vr18 // 9 11 13 15 c + d 960*c0909341SAndroid Build Coastguard Worker vadd.w vr5, vr5, vr19 961*c0909341SAndroid Build Coastguard Worker vadd.w vr6, vr6, vr20 962*c0909341SAndroid Build Coastguard Worker vadd.w vr7, vr7, vr21 963*c0909341SAndroid Build Coastguard Worker vadd.w vr8, vr8, vr22 964*c0909341SAndroid Build Coastguard Worker vmaddwev.w.hu vr5, vr11, vr11 965*c0909341SAndroid Build Coastguard Worker vmaddwod.w.hu vr6, vr11, vr11 966*c0909341SAndroid Build Coastguard Worker vmaddwev.w.hu vr7, vr12, vr12 967*c0909341SAndroid Build Coastguard Worker vmaddwod.w.hu vr8, vr12, vr12 968*c0909341SAndroid Build Coastguard Worker vilvl.w vr19, vr6, vr5 969*c0909341SAndroid Build Coastguard Worker vilvh.w vr20, vr6, vr5 970*c0909341SAndroid Build Coastguard Worker vilvl.w vr21, vr8, vr7 971*c0909341SAndroid Build Coastguard Worker vilvh.w vr22, vr8, vr7 972*c0909341SAndroid Build Coastguard Worker 973*c0909341SAndroid Build Coastguard Worker vst vr19, t0, 0 974*c0909341SAndroid Build Coastguard Worker vst vr20, t0, 16 975*c0909341SAndroid Build Coastguard Worker vst vr21, t0, 32 976*c0909341SAndroid Build Coastguard Worker vst vr22, t0, 48 977*c0909341SAndroid Build Coastguard Worker addi.d t0, t0, 64 978*c0909341SAndroid Build Coastguard Worker addi.d t2, t2, 16 979*c0909341SAndroid Build Coastguard Worker addi.w t3, t3, -16 980*c0909341SAndroid Build Coastguard Worker blt zero, t3, .LBOXSUM5_H_W 981*c0909341SAndroid Build Coastguard Worker 982*c0909341SAndroid Build Coastguard Worker addi.d a0, a0, REST_UNIT_STRIDE<<2 983*c0909341SAndroid Build Coastguard Worker addi.d a1, a1, REST_UNIT_STRIDE<<1 984*c0909341SAndroid Build Coastguard Worker addi.d a2, a2, REST_UNIT_STRIDE 985*c0909341SAndroid Build Coastguard Worker addi.d a4, a4, -1 986*c0909341SAndroid Build Coastguard Worker bnez a4, .LBOXSUM5_H_H 987*c0909341SAndroid Build Coastguard Workerendfunc 988*c0909341SAndroid Build Coastguard Worker 989*c0909341SAndroid Build Coastguard Worker/* 990*c0909341SAndroid Build Coastguard Workervoid boxsum5_h(int32_t *sumsq, coef *sum, 991*c0909341SAndroid Build Coastguard Worker const int w, const int h) 992*c0909341SAndroid Build Coastguard Worker*/ 993*c0909341SAndroid Build Coastguard Workerfunction boxsum5_v_8bpc_lsx 994*c0909341SAndroid Build Coastguard Worker addi.d a0, a0, (REST_UNIT_STRIDE<<2) 995*c0909341SAndroid Build Coastguard Worker addi.d a1, a1, (REST_UNIT_STRIDE<<1) 996*c0909341SAndroid Build Coastguard Worker addi.w a3, a3, -4 997*c0909341SAndroid Build Coastguard Worker addi.w a2, a2, -4 998*c0909341SAndroid Build Coastguard Worker 999*c0909341SAndroid Build Coastguard Worker.LBOXSUM5_V_H: 1000*c0909341SAndroid Build Coastguard Worker addi.w t3, a2, 0 1001*c0909341SAndroid Build Coastguard Worker addi.d t0, a0, 0 1002*c0909341SAndroid Build Coastguard Worker addi.d t1, a1, 0 1003*c0909341SAndroid Build Coastguard Worker addi.d t2, a0, 8 1004*c0909341SAndroid Build Coastguard Worker addi.d t3, a1, 4 1005*c0909341SAndroid Build Coastguard Worker addi.d t4, a2, 0 1006*c0909341SAndroid Build Coastguard Worker 1007*c0909341SAndroid Build Coastguard Worker vld vr0, t1, 0 // a 0 1 2 3 4 5 6 7 1008*c0909341SAndroid Build Coastguard Worker vld vr1, t1, 2 // b 1 2 3 4 5 6 7 8 1009*c0909341SAndroid Build Coastguard Worker vld vr2, t1, 4 // c 2 1010*c0909341SAndroid Build Coastguard Worker vld vr3, t1, 6 // d 3 1011*c0909341SAndroid Build Coastguard Worker vld vr4, t1, 8 // e 4 5 6 7 8 9 10 11 1012*c0909341SAndroid Build Coastguard Worker vadd.h vr5, vr0, vr1 1013*c0909341SAndroid Build Coastguard Worker vadd.h vr6, vr2, vr3 1014*c0909341SAndroid Build Coastguard Worker vpickve2gr.w t5, vr4, 2 1015*c0909341SAndroid Build Coastguard Worker vadd.h vr5, vr5, vr6 1016*c0909341SAndroid Build Coastguard Worker vadd.h vr5, vr5, vr4 1017*c0909341SAndroid Build Coastguard Worker vst vr5, t3, 0 1018*c0909341SAndroid Build Coastguard Worker 1019*c0909341SAndroid Build Coastguard Worker vld vr0, t0, 0 // 0 1 2 3 a 1020*c0909341SAndroid Build Coastguard Worker vld vr1, t0, 4 // 1 2 3 4 b 1021*c0909341SAndroid Build Coastguard Worker vld vr2, t0, 8 // 2 3 4 5 c 1022*c0909341SAndroid Build Coastguard Worker vld vr3, t0, 12 // 3 4 5 6 d 1023*c0909341SAndroid Build Coastguard Worker vld vr4, t0, 16 // 4 5 6 7 e a 1024*c0909341SAndroid Build Coastguard Worker vld vr5, t0, 20 // 5 6 7 8 b 1025*c0909341SAndroid Build Coastguard Worker vld vr6, t0, 24 // 6 7 8 9 c 1026*c0909341SAndroid Build Coastguard Worker vld vr7, t0, 28 // 7 8 9 10 d 1027*c0909341SAndroid Build Coastguard Worker vld vr8, t0, 32 // 8 9 10 11 e 1028*c0909341SAndroid Build Coastguard Worker 1029*c0909341SAndroid Build Coastguard Worker vadd.w vr9, vr0, vr1 1030*c0909341SAndroid Build Coastguard Worker vadd.w vr10, vr2, vr3 1031*c0909341SAndroid Build Coastguard Worker vadd.w vr9, vr9, vr10 1032*c0909341SAndroid Build Coastguard Worker vadd.w vr9, vr9, vr4 1033*c0909341SAndroid Build Coastguard Worker vadd.w vr10, vr4, vr5 1034*c0909341SAndroid Build Coastguard Worker vadd.w vr11, vr6, vr7 1035*c0909341SAndroid Build Coastguard Worker vadd.w vr10, vr10, vr8 1036*c0909341SAndroid Build Coastguard Worker vadd.w vr10, vr10, vr11 1037*c0909341SAndroid Build Coastguard Worker vst vr9, t2, 0 1038*c0909341SAndroid Build Coastguard Worker vst vr10, t2, 16 1039*c0909341SAndroid Build Coastguard Worker 1040*c0909341SAndroid Build Coastguard Worker addi.d t3, t3, 16 1041*c0909341SAndroid Build Coastguard Worker addi.d t1, t1, 16 1042*c0909341SAndroid Build Coastguard Worker addi.d t0, t0, 32 1043*c0909341SAndroid Build Coastguard Worker addi.d t2, t2, 32 1044*c0909341SAndroid Build Coastguard Worker addi.w t4, t4, -8 1045*c0909341SAndroid Build Coastguard Worker ble t4, zero, .LBOXSUM5_V_H1 1046*c0909341SAndroid Build Coastguard Worker 1047*c0909341SAndroid Build Coastguard Worker.LBOXSUM5_V_W: 1048*c0909341SAndroid Build Coastguard Worker vld vr0, t1, 0 // a 0 1 2 3 4 5 6 7 1049*c0909341SAndroid Build Coastguard Worker vld vr1, t1, 2 // b 1 2 3 4 5 6 7 8 1050*c0909341SAndroid Build Coastguard Worker vld vr2, t1, 4 // c 2 1051*c0909341SAndroid Build Coastguard Worker vld vr3, t1, 6 // d 3 1052*c0909341SAndroid Build Coastguard Worker vld vr4, t1, 8 // e 4 5 6 7 8 9 10 11 1053*c0909341SAndroid Build Coastguard Worker vinsgr2vr.w vr0, t5, 0 1054*c0909341SAndroid Build Coastguard Worker vpickve2gr.w t5, vr4, 2 1055*c0909341SAndroid Build Coastguard Worker vextrins.h vr1, vr0, 0x01 1056*c0909341SAndroid Build Coastguard Worker vadd.h vr5, vr0, vr1 1057*c0909341SAndroid Build Coastguard Worker vadd.h vr6, vr2, vr3 1058*c0909341SAndroid Build Coastguard Worker vadd.h vr5, vr5, vr6 1059*c0909341SAndroid Build Coastguard Worker vadd.h vr5, vr5, vr4 1060*c0909341SAndroid Build Coastguard Worker vst vr5, t3, 0 1061*c0909341SAndroid Build Coastguard Worker 1062*c0909341SAndroid Build Coastguard Worker vaddi.hu vr0, vr8, 0 // 8 9 10 11 a 1063*c0909341SAndroid Build Coastguard Worker vld vr1, t0, 4 // 9 10 11 12 b 1064*c0909341SAndroid Build Coastguard Worker vld vr2, t0, 8 // 10 11 12 13 c 1065*c0909341SAndroid Build Coastguard Worker vld vr3, t0, 12 // 14 15 16 17 d 1066*c0909341SAndroid Build Coastguard Worker vld vr4, t0, 16 // 15 16 17 18 e a 1067*c0909341SAndroid Build Coastguard Worker vld vr5, t0, 20 // 16 17 18 19 b 1068*c0909341SAndroid Build Coastguard Worker vld vr6, t0, 24 // 17 18 19 20 c 1069*c0909341SAndroid Build Coastguard Worker vld vr7, t0, 28 // 18 19 20 21 d 1070*c0909341SAndroid Build Coastguard Worker vld vr8, t0, 32 // 19 20 21 22 e 1071*c0909341SAndroid Build Coastguard Worker vextrins.w vr1, vr0, 0x01 1072*c0909341SAndroid Build Coastguard Worker vadd.w vr9, vr0, vr1 1073*c0909341SAndroid Build Coastguard Worker vadd.w vr10, vr2, vr3 1074*c0909341SAndroid Build Coastguard Worker vadd.w vr9, vr9, vr10 1075*c0909341SAndroid Build Coastguard Worker vadd.w vr9, vr9, vr4 1076*c0909341SAndroid Build Coastguard Worker vadd.w vr10, vr4, vr5 1077*c0909341SAndroid Build Coastguard Worker vadd.w vr11, vr6, vr7 1078*c0909341SAndroid Build Coastguard Worker vadd.w vr10, vr10, vr8 1079*c0909341SAndroid Build Coastguard Worker vadd.w vr10, vr10, vr11 1080*c0909341SAndroid Build Coastguard Worker vst vr9, t2, 0 1081*c0909341SAndroid Build Coastguard Worker vst vr10, t2, 16 1082*c0909341SAndroid Build Coastguard Worker 1083*c0909341SAndroid Build Coastguard Worker addi.d t3, t3, 16 1084*c0909341SAndroid Build Coastguard Worker addi.d t1, t1, 16 1085*c0909341SAndroid Build Coastguard Worker addi.d t0, t0, 32 1086*c0909341SAndroid Build Coastguard Worker addi.d t2, t2, 32 1087*c0909341SAndroid Build Coastguard Worker addi.w t4, t4, -8 1088*c0909341SAndroid Build Coastguard Worker blt zero, t4, .LBOXSUM5_V_W 1089*c0909341SAndroid Build Coastguard Worker 1090*c0909341SAndroid Build Coastguard Worker.LBOXSUM5_V_H1: 1091*c0909341SAndroid Build Coastguard Worker addi.d a1, a1, REST_UNIT_STRIDE<<1 1092*c0909341SAndroid Build Coastguard Worker addi.d a0, a0, REST_UNIT_STRIDE<<2 1093*c0909341SAndroid Build Coastguard Worker addi.w a3, a3, -1 1094*c0909341SAndroid Build Coastguard Worker bnez a3, .LBOXSUM5_V_H 1095*c0909341SAndroid Build Coastguard Workerendfunc 1096*c0909341SAndroid Build Coastguard Worker 1097*c0909341SAndroid Build Coastguard Worker/* 1098*c0909341SAndroid Build Coastguard Workerselfguided_filter(int32_t *sumsq, coef *sum, 1099*c0909341SAndroid Build Coastguard Worker const int w, const int h, 1100*c0909341SAndroid Build Coastguard Worker const unsigned s) 1101*c0909341SAndroid Build Coastguard Worker*/ 1102*c0909341SAndroid Build Coastguard Workerfunction boxsum5_sgf_h_8bpc_lsx 1103*c0909341SAndroid Build Coastguard Worker addi.d a0, a0, REST_UNIT_STRIDE<<2 1104*c0909341SAndroid Build Coastguard Worker addi.d a0, a0, 12 // AA 1105*c0909341SAndroid Build Coastguard Worker addi.d a1, a1, REST_UNIT_STRIDE<<1 1106*c0909341SAndroid Build Coastguard Worker addi.d a1, a1, 6 // BB 1107*c0909341SAndroid Build Coastguard Worker la.local t8, dav1d_sgr_x_by_x 1108*c0909341SAndroid Build Coastguard Worker li.w t6, 164 1109*c0909341SAndroid Build Coastguard Worker vreplgr2vr.w vr20, t6 1110*c0909341SAndroid Build Coastguard Worker li.w t6, 255 1111*c0909341SAndroid Build Coastguard Worker vreplgr2vr.w vr22, t6 1112*c0909341SAndroid Build Coastguard Worker vaddi.wu vr21, vr22, 1 // 256 1113*c0909341SAndroid Build Coastguard Worker vreplgr2vr.w vr6, a4 1114*c0909341SAndroid Build Coastguard Worker vldi vr19, 0x819 1115*c0909341SAndroid Build Coastguard Worker addi.w a2, a2, 2 // w + 2 1116*c0909341SAndroid Build Coastguard Worker addi.w a3, a3, 2 // h + 2 1117*c0909341SAndroid Build Coastguard Worker 1118*c0909341SAndroid Build Coastguard Worker.LBS5SGF_H_H: 1119*c0909341SAndroid Build Coastguard Worker addi.w t2, a2, 0 1120*c0909341SAndroid Build Coastguard Worker addi.d t0, a0, -4 1121*c0909341SAndroid Build Coastguard Worker addi.d t1, a1, -2 1122*c0909341SAndroid Build Coastguard Worker 1123*c0909341SAndroid Build Coastguard Worker.LBS5SGF_H_W: 1124*c0909341SAndroid Build Coastguard Worker vld vr0, t0, 0 // AA[i] 1125*c0909341SAndroid Build Coastguard Worker vld vr1, t0, 16 1126*c0909341SAndroid Build Coastguard Worker vld vr2, t1, 0 // BB[i] 1127*c0909341SAndroid Build Coastguard Worker 1128*c0909341SAndroid Build Coastguard Worker vmul.w vr4, vr0, vr19 // a * n 1129*c0909341SAndroid Build Coastguard Worker vmul.w vr5, vr1, vr19 // a * n 1130*c0909341SAndroid Build Coastguard Worker vsllwil.w.h vr9, vr2, 0 1131*c0909341SAndroid Build Coastguard Worker vexth.w.h vr10, vr2 1132*c0909341SAndroid Build Coastguard Worker vmsub.w vr4, vr9, vr9 // p 1133*c0909341SAndroid Build Coastguard Worker vmsub.w vr5, vr10, vr10 // p 1134*c0909341SAndroid Build Coastguard Worker vmaxi.w vr4, vr4, 0 1135*c0909341SAndroid Build Coastguard Worker vmaxi.w vr5, vr5, 0 // p 1136*c0909341SAndroid Build Coastguard Worker vmul.w vr4, vr4, vr6 // p * s 1137*c0909341SAndroid Build Coastguard Worker vmul.w vr5, vr5, vr6 // p * s 1138*c0909341SAndroid Build Coastguard Worker vsrlri.w vr4, vr4, 20 1139*c0909341SAndroid Build Coastguard Worker vsrlri.w vr5, vr5, 20 // z 1140*c0909341SAndroid Build Coastguard Worker vmin.w vr4, vr4, vr22 1141*c0909341SAndroid Build Coastguard Worker vmin.w vr5, vr5, vr22 1142*c0909341SAndroid Build Coastguard Worker 1143*c0909341SAndroid Build Coastguard Worker // load table data 1144*c0909341SAndroid Build Coastguard Worker vpickve2gr.w t6, vr4, 0 1145*c0909341SAndroid Build Coastguard Worker ldx.bu t7, t8, t6 1146*c0909341SAndroid Build Coastguard Worker vinsgr2vr.w vr7, t7, 0 1147*c0909341SAndroid Build Coastguard Worker vpickve2gr.w t6, vr4, 1 1148*c0909341SAndroid Build Coastguard Worker ldx.bu t7, t8, t6 1149*c0909341SAndroid Build Coastguard Worker vinsgr2vr.w vr7, t7, 1 1150*c0909341SAndroid Build Coastguard Worker vpickve2gr.w t6, vr4, 2 1151*c0909341SAndroid Build Coastguard Worker ldx.bu t7, t8, t6 1152*c0909341SAndroid Build Coastguard Worker vinsgr2vr.w vr7, t7, 2 1153*c0909341SAndroid Build Coastguard Worker vpickve2gr.w t6, vr4, 3 1154*c0909341SAndroid Build Coastguard Worker ldx.bu t7, t8, t6 1155*c0909341SAndroid Build Coastguard Worker vinsgr2vr.w vr7, t7, 3 1156*c0909341SAndroid Build Coastguard Worker 1157*c0909341SAndroid Build Coastguard Worker vpickve2gr.w t6, vr5, 0 1158*c0909341SAndroid Build Coastguard Worker ldx.bu t7, t8, t6 1159*c0909341SAndroid Build Coastguard Worker vinsgr2vr.w vr8, t7, 0 1160*c0909341SAndroid Build Coastguard Worker vpickve2gr.w t6, vr5, 1 1161*c0909341SAndroid Build Coastguard Worker ldx.bu t7, t8, t6 1162*c0909341SAndroid Build Coastguard Worker vinsgr2vr.w vr8, t7, 1 1163*c0909341SAndroid Build Coastguard Worker vpickve2gr.w t6, vr5, 2 1164*c0909341SAndroid Build Coastguard Worker ldx.bu t7, t8, t6 1165*c0909341SAndroid Build Coastguard Worker vinsgr2vr.w vr8, t7, 2 1166*c0909341SAndroid Build Coastguard Worker vpickve2gr.w t6, vr5, 3 1167*c0909341SAndroid Build Coastguard Worker ldx.bu t7, t8, t6 1168*c0909341SAndroid Build Coastguard Worker vinsgr2vr.w vr8, t7, 3 // x 1169*c0909341SAndroid Build Coastguard Worker 1170*c0909341SAndroid Build Coastguard Worker vmul.w vr9, vr7, vr9 // x * BB[i] 1171*c0909341SAndroid Build Coastguard Worker vmul.w vr10, vr8, vr10 1172*c0909341SAndroid Build Coastguard Worker vmul.w vr9, vr9, vr20 // x * BB[i] * sgr_one_by_x 1173*c0909341SAndroid Build Coastguard Worker vmul.w vr10, vr10, vr20 1174*c0909341SAndroid Build Coastguard Worker vsrlri.w vr9, vr9, 12 1175*c0909341SAndroid Build Coastguard Worker vsrlri.w vr10, vr10, 12 1176*c0909341SAndroid Build Coastguard Worker vsub.w vr7, vr21, vr7 1177*c0909341SAndroid Build Coastguard Worker vsub.w vr8, vr21, vr8 1178*c0909341SAndroid Build Coastguard Worker vpickev.h vr8, vr8, vr7 1179*c0909341SAndroid Build Coastguard Worker vst vr9, t0, 0 1180*c0909341SAndroid Build Coastguard Worker vst vr10, t0, 16 1181*c0909341SAndroid Build Coastguard Worker vst vr8, t1, 0 1182*c0909341SAndroid Build Coastguard Worker addi.d t0, t0, 32 1183*c0909341SAndroid Build Coastguard Worker addi.d t1, t1, 16 1184*c0909341SAndroid Build Coastguard Worker addi.w t2, t2, -8 1185*c0909341SAndroid Build Coastguard Worker blt zero, t2, .LBS5SGF_H_W 1186*c0909341SAndroid Build Coastguard Worker 1187*c0909341SAndroid Build Coastguard Worker addi.d a0, a0, REST_UNIT_STRIDE<<2 1188*c0909341SAndroid Build Coastguard Worker addi.d a0, a0, REST_UNIT_STRIDE<<2 1189*c0909341SAndroid Build Coastguard Worker addi.d a1, a1, REST_UNIT_STRIDE<<2 1190*c0909341SAndroid Build Coastguard Worker addi.w a3, a3, -2 1191*c0909341SAndroid Build Coastguard Worker blt zero, a3, .LBS5SGF_H_H 1192*c0909341SAndroid Build Coastguard Workerendfunc 1193*c0909341SAndroid Build Coastguard Worker 1194*c0909341SAndroid Build Coastguard Worker/* 1195*c0909341SAndroid Build Coastguard Workerselfguided_filter(coef *dst, pixel *src, 1196*c0909341SAndroid Build Coastguard Worker int32_t *sumsq, coef *sum, 1197*c0909341SAndroid Build Coastguard Worker const int w, const int h) 1198*c0909341SAndroid Build Coastguard Worker*/ 1199*c0909341SAndroid Build Coastguard Workerfunction boxsum5_sgf_v_8bpc_lsx 1200*c0909341SAndroid Build Coastguard Worker addi.d a1, a1, 3*REST_UNIT_STRIDE+3 // src 1201*c0909341SAndroid Build Coastguard Worker addi.d a2, a2, (2*REST_UNIT_STRIDE+3)<<1 // A 1202*c0909341SAndroid Build Coastguard Worker addi.d a2, a2, (2*REST_UNIT_STRIDE+3)<<1 1203*c0909341SAndroid Build Coastguard Worker addi.d a3, a3, (2*REST_UNIT_STRIDE+3)<<1 // B 1204*c0909341SAndroid Build Coastguard Worker addi.w a5, a5, -1 1205*c0909341SAndroid Build Coastguard Worker vldi vr10, 0x806 1206*c0909341SAndroid Build Coastguard Worker vldi vr11, 0x805 1207*c0909341SAndroid Build Coastguard Worker vldi vr22, 0x406 1208*c0909341SAndroid Build Coastguard Worker 1209*c0909341SAndroid Build Coastguard Worker.LBS5SGF_V_H: 1210*c0909341SAndroid Build Coastguard Worker addi.d t0, a0, 0 1211*c0909341SAndroid Build Coastguard Worker addi.d t1, a1, 0 1212*c0909341SAndroid Build Coastguard Worker addi.d t2, a2, 0 1213*c0909341SAndroid Build Coastguard Worker addi.d t3, a3, 0 1214*c0909341SAndroid Build Coastguard Worker addi.w t4, a4, 0 1215*c0909341SAndroid Build Coastguard Worker 1216*c0909341SAndroid Build Coastguard Worker addi.d t5, a0, 384*2 1217*c0909341SAndroid Build Coastguard Worker addi.d t6, a1, REST_UNIT_STRIDE 1218*c0909341SAndroid Build Coastguard Worker addi.d t7, a2, REST_UNIT_STRIDE<<2 1219*c0909341SAndroid Build Coastguard Worker addi.d t8, a3, REST_UNIT_STRIDE<<1 // B 1220*c0909341SAndroid Build Coastguard Worker.LBS5SGF_V_W: 1221*c0909341SAndroid Build Coastguard Worker // a 1222*c0909341SAndroid Build Coastguard Worker vld vr0, t3, -REST_UNIT_STRIDE*2 1223*c0909341SAndroid Build Coastguard Worker vld vr1, t3, REST_UNIT_STRIDE*2 1224*c0909341SAndroid Build Coastguard Worker vld vr2, t3, (-REST_UNIT_STRIDE-1)*2 1225*c0909341SAndroid Build Coastguard Worker vld vr3, t3, (REST_UNIT_STRIDE-1)*2 1226*c0909341SAndroid Build Coastguard Worker vld vr4, t3, (1-REST_UNIT_STRIDE)*2 1227*c0909341SAndroid Build Coastguard Worker vld vr5, t3, (1+REST_UNIT_STRIDE)*2 1228*c0909341SAndroid Build Coastguard Worker vaddwev.w.h vr6, vr0, vr1 1229*c0909341SAndroid Build Coastguard Worker vaddwod.w.h vr7, vr0, vr1 1230*c0909341SAndroid Build Coastguard Worker vmul.w vr6, vr6, vr10 1231*c0909341SAndroid Build Coastguard Worker vmul.w vr7, vr7, vr10 1232*c0909341SAndroid Build Coastguard Worker vaddwev.w.h vr8, vr2, vr3 1233*c0909341SAndroid Build Coastguard Worker vaddwod.w.h vr9, vr2, vr3 1234*c0909341SAndroid Build Coastguard Worker vaddwev.w.h vr12, vr4, vr5 1235*c0909341SAndroid Build Coastguard Worker vaddwod.w.h vr13, vr4, vr5 1236*c0909341SAndroid Build Coastguard Worker vadd.w vr8, vr8, vr12 1237*c0909341SAndroid Build Coastguard Worker vadd.w vr9, vr9, vr13 1238*c0909341SAndroid Build Coastguard Worker vmadd.w vr6, vr8, vr11 1239*c0909341SAndroid Build Coastguard Worker vmadd.w vr7, vr9, vr11 1240*c0909341SAndroid Build Coastguard Worker vilvl.w vr18, vr7, vr6 1241*c0909341SAndroid Build Coastguard Worker vilvh.w vr19, vr7, vr6 1242*c0909341SAndroid Build Coastguard Worker // b 1243*c0909341SAndroid Build Coastguard Worker vld vr0, t2, -REST_UNIT_STRIDE*4 1244*c0909341SAndroid Build Coastguard Worker vld vr1, t2, -REST_UNIT_STRIDE*4+16 1245*c0909341SAndroid Build Coastguard Worker vld vr2, t2, REST_UNIT_STRIDE*4 1246*c0909341SAndroid Build Coastguard Worker vld vr3, t2, REST_UNIT_STRIDE*4+16 1247*c0909341SAndroid Build Coastguard Worker vld vr4, t2, (-REST_UNIT_STRIDE-1)*4 1248*c0909341SAndroid Build Coastguard Worker vld vr5, t2, (-REST_UNIT_STRIDE-1)*4+16 1249*c0909341SAndroid Build Coastguard Worker vld vr8, t2, (REST_UNIT_STRIDE-1)*4 1250*c0909341SAndroid Build Coastguard Worker vld vr9, t2, (REST_UNIT_STRIDE-1)*4+16 1251*c0909341SAndroid Build Coastguard Worker vld vr12, t2, (1-REST_UNIT_STRIDE)*4 1252*c0909341SAndroid Build Coastguard Worker vld vr13, t2, (1-REST_UNIT_STRIDE)*4+16 1253*c0909341SAndroid Build Coastguard Worker vld vr14, t2, (1+REST_UNIT_STRIDE)*4 1254*c0909341SAndroid Build Coastguard Worker vld vr15, t2, (1+REST_UNIT_STRIDE)*4+16 1255*c0909341SAndroid Build Coastguard Worker vadd.w vr0, vr0, vr2 // 0 1 2 3 1256*c0909341SAndroid Build Coastguard Worker vadd.w vr1, vr1, vr3 // 4 5 6 7 1257*c0909341SAndroid Build Coastguard Worker vmul.w vr20, vr0, vr10 1258*c0909341SAndroid Build Coastguard Worker vmul.w vr21, vr1, vr10 1259*c0909341SAndroid Build Coastguard Worker vadd.w vr4, vr4, vr8 // 0 1 2 3 1260*c0909341SAndroid Build Coastguard Worker vadd.w vr5, vr5, vr9 // 4 5 6 7 1261*c0909341SAndroid Build Coastguard Worker vadd.w vr12, vr12, vr14 1262*c0909341SAndroid Build Coastguard Worker vadd.w vr13, vr13, vr15 1263*c0909341SAndroid Build Coastguard Worker vadd.w vr12, vr12, vr4 1264*c0909341SAndroid Build Coastguard Worker vadd.w vr13, vr13, vr5 1265*c0909341SAndroid Build Coastguard Worker vmadd.w vr20, vr12, vr11 1266*c0909341SAndroid Build Coastguard Worker vmadd.w vr21, vr13, vr11 1267*c0909341SAndroid Build Coastguard Worker vld vr2, t1, 0 1268*c0909341SAndroid Build Coastguard Worker vsllwil.hu.bu vr2, vr2, 0 1269*c0909341SAndroid Build Coastguard Worker vsllwil.wu.hu vr3, vr2, 0 1270*c0909341SAndroid Build Coastguard Worker vexth.wu.hu vr4, vr2 1271*c0909341SAndroid Build Coastguard Worker vmadd.w vr20, vr18, vr3 1272*c0909341SAndroid Build Coastguard Worker vmadd.w vr21, vr19, vr4 1273*c0909341SAndroid Build Coastguard Worker vssrlrni.h.w vr21, vr20, 9 1274*c0909341SAndroid Build Coastguard Worker vst vr21, t0, 0 1275*c0909341SAndroid Build Coastguard Worker 1276*c0909341SAndroid Build Coastguard Worker addi.d t1, t1, 8 1277*c0909341SAndroid Build Coastguard Worker addi.d t2, t2, 32 1278*c0909341SAndroid Build Coastguard Worker addi.d t3, t3, 16 1279*c0909341SAndroid Build Coastguard Worker 1280*c0909341SAndroid Build Coastguard Worker // a 1281*c0909341SAndroid Build Coastguard Worker vld vr0, t8, 0 1282*c0909341SAndroid Build Coastguard Worker vld vr1, t8, -2 1283*c0909341SAndroid Build Coastguard Worker vld vr2, t8, 2 1284*c0909341SAndroid Build Coastguard Worker vmulwev.w.h vr3, vr0, vr22 1285*c0909341SAndroid Build Coastguard Worker vmulwod.w.h vr4, vr0, vr22 1286*c0909341SAndroid Build Coastguard Worker vaddwev.w.h vr5, vr1, vr2 1287*c0909341SAndroid Build Coastguard Worker vaddwod.w.h vr6, vr1, vr2 1288*c0909341SAndroid Build Coastguard Worker vmadd.w vr3, vr5, vr11 1289*c0909341SAndroid Build Coastguard Worker vmadd.w vr4, vr6, vr11 1290*c0909341SAndroid Build Coastguard Worker vilvl.w vr19, vr4, vr3 1291*c0909341SAndroid Build Coastguard Worker vilvh.w vr20, vr4, vr3 1292*c0909341SAndroid Build Coastguard Worker // b 1293*c0909341SAndroid Build Coastguard Worker vld vr0, t7, 0 1294*c0909341SAndroid Build Coastguard Worker vld vr1, t7, -4 1295*c0909341SAndroid Build Coastguard Worker vld vr2, t7, 4 1296*c0909341SAndroid Build Coastguard Worker vld vr5, t7, 16 1297*c0909341SAndroid Build Coastguard Worker vld vr6, t7, 12 1298*c0909341SAndroid Build Coastguard Worker vld vr7, t7, 20 1299*c0909341SAndroid Build Coastguard Worker vmul.w vr8, vr0, vr10 1300*c0909341SAndroid Build Coastguard Worker vmul.w vr9, vr5, vr10 1301*c0909341SAndroid Build Coastguard Worker vadd.w vr12, vr1, vr2 1302*c0909341SAndroid Build Coastguard Worker vadd.w vr13, vr6, vr7 1303*c0909341SAndroid Build Coastguard Worker vmadd.w vr8, vr12, vr11 1304*c0909341SAndroid Build Coastguard Worker vmadd.w vr9, vr13, vr11 1305*c0909341SAndroid Build Coastguard Worker vld vr2, t6, 0 1306*c0909341SAndroid Build Coastguard Worker vsllwil.hu.bu vr2, vr2, 0 1307*c0909341SAndroid Build Coastguard Worker vsllwil.wu.hu vr3, vr2, 0 1308*c0909341SAndroid Build Coastguard Worker vexth.wu.hu vr4, vr2 1309*c0909341SAndroid Build Coastguard Worker vmadd.w vr8, vr19, vr3 1310*c0909341SAndroid Build Coastguard Worker vmadd.w vr9, vr20, vr4 1311*c0909341SAndroid Build Coastguard Worker vssrlrni.h.w vr9, vr8, 8 1312*c0909341SAndroid Build Coastguard Worker vst vr9, t0, 384*2 1313*c0909341SAndroid Build Coastguard Worker 1314*c0909341SAndroid Build Coastguard Worker addi.d t0, t0, 16 1315*c0909341SAndroid Build Coastguard Worker addi.d t8, t8, 16 1316*c0909341SAndroid Build Coastguard Worker addi.d t7, t7, 32 1317*c0909341SAndroid Build Coastguard Worker addi.d t6, t6, 8 1318*c0909341SAndroid Build Coastguard Worker addi.w t4, t4, -8 1319*c0909341SAndroid Build Coastguard Worker blt zero, t4, .LBS5SGF_V_W 1320*c0909341SAndroid Build Coastguard Worker 1321*c0909341SAndroid Build Coastguard Worker addi.w a5, a5, -2 1322*c0909341SAndroid Build Coastguard Worker addi.d a0, a0, 384*4 // dst 1323*c0909341SAndroid Build Coastguard Worker addi.d a1, a1, REST_UNIT_STRIDE<<1 // src 1324*c0909341SAndroid Build Coastguard Worker addi.d a2, a2, REST_UNIT_STRIDE<<2 // 1325*c0909341SAndroid Build Coastguard Worker addi.d a2, a2, REST_UNIT_STRIDE<<2 1326*c0909341SAndroid Build Coastguard Worker addi.d a3, a3, REST_UNIT_STRIDE<<2 // 1327*c0909341SAndroid Build Coastguard Worker blt zero, a5, .LBS5SGF_V_H 1328*c0909341SAndroid Build Coastguard Worker bnez a5, .LBS5SGF_END 1329*c0909341SAndroid Build Coastguard Worker.LBS5SGF_V_W1: 1330*c0909341SAndroid Build Coastguard Worker // a 1331*c0909341SAndroid Build Coastguard Worker vld vr0, a3, -REST_UNIT_STRIDE*2 1332*c0909341SAndroid Build Coastguard Worker vld vr1, a3, REST_UNIT_STRIDE*2 1333*c0909341SAndroid Build Coastguard Worker vld vr2, a3, (-REST_UNIT_STRIDE-1)*2 1334*c0909341SAndroid Build Coastguard Worker vld vr3, a3, (REST_UNIT_STRIDE-1)*2 1335*c0909341SAndroid Build Coastguard Worker vld vr4, a3, (1-REST_UNIT_STRIDE)*2 1336*c0909341SAndroid Build Coastguard Worker vld vr5, a3, (1+REST_UNIT_STRIDE)*2 1337*c0909341SAndroid Build Coastguard Worker vaddwev.w.h vr6, vr0, vr1 1338*c0909341SAndroid Build Coastguard Worker vaddwod.w.h vr7, vr0, vr1 1339*c0909341SAndroid Build Coastguard Worker vmul.w vr6, vr6, vr10 1340*c0909341SAndroid Build Coastguard Worker vmul.w vr7, vr7, vr10 1341*c0909341SAndroid Build Coastguard Worker vaddwev.w.h vr8, vr2, vr3 1342*c0909341SAndroid Build Coastguard Worker vaddwod.w.h vr9, vr2, vr3 1343*c0909341SAndroid Build Coastguard Worker vaddwev.w.h vr12, vr4, vr5 1344*c0909341SAndroid Build Coastguard Worker vaddwod.w.h vr13, vr4, vr5 1345*c0909341SAndroid Build Coastguard Worker vadd.w vr8, vr8, vr12 1346*c0909341SAndroid Build Coastguard Worker vadd.w vr9, vr9, vr13 1347*c0909341SAndroid Build Coastguard Worker vmadd.w vr6, vr8, vr11 1348*c0909341SAndroid Build Coastguard Worker vmadd.w vr7, vr9, vr11 1349*c0909341SAndroid Build Coastguard Worker vilvl.w vr18, vr7, vr6 1350*c0909341SAndroid Build Coastguard Worker vilvh.w vr19, vr7, vr6 1351*c0909341SAndroid Build Coastguard Worker // b 1352*c0909341SAndroid Build Coastguard Worker vld vr0, a2, -REST_UNIT_STRIDE*4 1353*c0909341SAndroid Build Coastguard Worker vld vr1, a2, -REST_UNIT_STRIDE*4+16 1354*c0909341SAndroid Build Coastguard Worker vld vr2, a2, REST_UNIT_STRIDE*4 1355*c0909341SAndroid Build Coastguard Worker vld vr3, a2, REST_UNIT_STRIDE*4+16 1356*c0909341SAndroid Build Coastguard Worker vld vr4, a2, (-REST_UNIT_STRIDE-1)*4 1357*c0909341SAndroid Build Coastguard Worker vld vr5, a2, (-REST_UNIT_STRIDE-1)*4+16 1358*c0909341SAndroid Build Coastguard Worker vld vr8, a2, (REST_UNIT_STRIDE-1)*4 1359*c0909341SAndroid Build Coastguard Worker vld vr9, a2, (REST_UNIT_STRIDE-1)*4+16 1360*c0909341SAndroid Build Coastguard Worker vld vr12, a2, (1-REST_UNIT_STRIDE)*4 1361*c0909341SAndroid Build Coastguard Worker vld vr13, a2, (1-REST_UNIT_STRIDE)*4+16 1362*c0909341SAndroid Build Coastguard Worker vld vr14, a2, (1+REST_UNIT_STRIDE)*4 1363*c0909341SAndroid Build Coastguard Worker vld vr15, a2, (1+REST_UNIT_STRIDE)*4+16 1364*c0909341SAndroid Build Coastguard Worker vadd.w vr0, vr0, vr2 // 0 1 2 3 1365*c0909341SAndroid Build Coastguard Worker vadd.w vr1, vr1, vr3 // 4 5 6 7 1366*c0909341SAndroid Build Coastguard Worker vmul.w vr20, vr0, vr10 1367*c0909341SAndroid Build Coastguard Worker vmul.w vr21, vr1, vr10 1368*c0909341SAndroid Build Coastguard Worker vadd.w vr4, vr4, vr8 // 0 1 2 3 1369*c0909341SAndroid Build Coastguard Worker vadd.w vr5, vr5, vr9 // 4 5 6 7 1370*c0909341SAndroid Build Coastguard Worker vadd.w vr12, vr12, vr14 1371*c0909341SAndroid Build Coastguard Worker vadd.w vr13, vr13, vr15 1372*c0909341SAndroid Build Coastguard Worker vadd.w vr12, vr12, vr4 1373*c0909341SAndroid Build Coastguard Worker vadd.w vr13, vr13, vr5 1374*c0909341SAndroid Build Coastguard Worker vmadd.w vr20, vr12, vr11 1375*c0909341SAndroid Build Coastguard Worker vmadd.w vr21, vr13, vr11 1376*c0909341SAndroid Build Coastguard Worker vld vr2, a1, 0 1377*c0909341SAndroid Build Coastguard Worker vsllwil.hu.bu vr2, vr2, 0 1378*c0909341SAndroid Build Coastguard Worker vsllwil.wu.hu vr3, vr2, 0 1379*c0909341SAndroid Build Coastguard Worker vexth.wu.hu vr4, vr2 1380*c0909341SAndroid Build Coastguard Worker vmadd.w vr20, vr18, vr3 1381*c0909341SAndroid Build Coastguard Worker vmadd.w vr21, vr19, vr4 1382*c0909341SAndroid Build Coastguard Worker vssrlrni.h.w vr21, vr20, 9 1383*c0909341SAndroid Build Coastguard Worker vst vr21, a0, 0 1384*c0909341SAndroid Build Coastguard Worker addi.d a3, a3, 16 1385*c0909341SAndroid Build Coastguard Worker addi.d a2, a2, 32 1386*c0909341SAndroid Build Coastguard Worker addi.d a1, a1, 8 1387*c0909341SAndroid Build Coastguard Worker addi.d a0, a0, 16 1388*c0909341SAndroid Build Coastguard Worker addi.w a4, a4, -8 1389*c0909341SAndroid Build Coastguard Worker blt zero, a4, .LBS5SGF_V_W1 1390*c0909341SAndroid Build Coastguard Worker.LBS5SGF_END: 1391*c0909341SAndroid Build Coastguard Workerendfunc 1392*c0909341SAndroid Build Coastguard Worker 1393*c0909341SAndroid Build Coastguard Worker/* 1394*c0909341SAndroid Build Coastguard Workervoid dav1d_sgr_mix_finish_lsx(uint8_t *p, const ptrdiff_t stride, 1395*c0909341SAndroid Build Coastguard Worker const int16_t *dst0, const int16_t *dst1, 1396*c0909341SAndroid Build Coastguard Worker const int w0, const int w1, 1397*c0909341SAndroid Build Coastguard Worker const int w, const int h); 1398*c0909341SAndroid Build Coastguard Worker*/ 1399*c0909341SAndroid Build Coastguard Workerfunction sgr_mix_finish_8bpc_lsx 1400*c0909341SAndroid Build Coastguard Worker vreplgr2vr.w vr3, a4 // w0 1401*c0909341SAndroid Build Coastguard Worker vreplgr2vr.w vr13, a5 // w1 1402*c0909341SAndroid Build Coastguard Worker andi t4, a6, 0x7 1403*c0909341SAndroid Build Coastguard Worker sub.w t5, a6, t4 1404*c0909341SAndroid Build Coastguard Worker 1405*c0909341SAndroid Build Coastguard Worker beq zero, t5, .LSGRMIX_REM 1406*c0909341SAndroid Build Coastguard Worker 1407*c0909341SAndroid Build Coastguard Worker.LSGRMIX_H: 1408*c0909341SAndroid Build Coastguard Worker addi.d t0, a0, 0 1409*c0909341SAndroid Build Coastguard Worker addi.d t1, a2, 0 // dst0 1410*c0909341SAndroid Build Coastguard Worker addi.d t3, a3, 0 // dst1 1411*c0909341SAndroid Build Coastguard Worker addi.w t2, t5, 0 1412*c0909341SAndroid Build Coastguard Worker andi t4, a6, 0x7 1413*c0909341SAndroid Build Coastguard Worker.LSGRMIX_W: 1414*c0909341SAndroid Build Coastguard Worker vld vr0, t0, 0 1415*c0909341SAndroid Build Coastguard Worker vld vr1, t1, 0 1416*c0909341SAndroid Build Coastguard Worker vld vr10, t3, 0 1417*c0909341SAndroid Build Coastguard Worker vsllwil.hu.bu vr2, vr0, 4 // u 8 h 1418*c0909341SAndroid Build Coastguard Worker vsllwil.wu.hu vr4, vr2, 0 // u 0 1 2 3 1419*c0909341SAndroid Build Coastguard Worker vexth.wu.hu vr5, vr2 // u 4 5 6 7 1420*c0909341SAndroid Build Coastguard Worker vslli.w vr6, vr4, 7 1421*c0909341SAndroid Build Coastguard Worker vslli.w vr7, vr5, 7 1422*c0909341SAndroid Build Coastguard Worker vsllwil.w.h vr8, vr1, 0 // dst0 1423*c0909341SAndroid Build Coastguard Worker vexth.w.h vr9, vr1 // dst0 1424*c0909341SAndroid Build Coastguard Worker vsub.w vr8, vr8, vr4 1425*c0909341SAndroid Build Coastguard Worker vsub.w vr9, vr9, vr5 1426*c0909341SAndroid Build Coastguard Worker vmadd.w vr6, vr8, vr3 // v 0 - 3 1427*c0909341SAndroid Build Coastguard Worker vmadd.w vr7, vr9, vr3 // v 4 - 7 1428*c0909341SAndroid Build Coastguard Worker 1429*c0909341SAndroid Build Coastguard Worker vsllwil.w.h vr11, vr10, 0 // dst1 1430*c0909341SAndroid Build Coastguard Worker vexth.w.h vr12, vr10 // dst1 1431*c0909341SAndroid Build Coastguard Worker vsub.w vr11, vr11, vr4 1432*c0909341SAndroid Build Coastguard Worker vsub.w vr12, vr12, vr5 1433*c0909341SAndroid Build Coastguard Worker vmadd.w vr6, vr11, vr13 1434*c0909341SAndroid Build Coastguard Worker vmadd.w vr7, vr12, vr13 1435*c0909341SAndroid Build Coastguard Worker 1436*c0909341SAndroid Build Coastguard Worker vssrarni.hu.w vr7, vr6, 11 1437*c0909341SAndroid Build Coastguard Worker vssrlni.bu.h vr7, vr7, 0 1438*c0909341SAndroid Build Coastguard Worker vstelm.d vr7, t0, 0, 0 1439*c0909341SAndroid Build Coastguard Worker addi.d t0, t0, 8 1440*c0909341SAndroid Build Coastguard Worker addi.d t1, t1, 16 1441*c0909341SAndroid Build Coastguard Worker addi.d t3, t3, 16 1442*c0909341SAndroid Build Coastguard Worker addi.d t2, t2, -8 1443*c0909341SAndroid Build Coastguard Worker bne zero, t2, .LSGRMIX_W 1444*c0909341SAndroid Build Coastguard Worker 1445*c0909341SAndroid Build Coastguard Worker beq t4, zero, .LSGRMIX_W8 1446*c0909341SAndroid Build Coastguard Worker 1447*c0909341SAndroid Build Coastguard Worker vld vr0, t0, 0 1448*c0909341SAndroid Build Coastguard Worker vld vr1, t1, 0 1449*c0909341SAndroid Build Coastguard Worker vld vr10, t3, 0 1450*c0909341SAndroid Build Coastguard Worker vsllwil.hu.bu vr2, vr0, 4 // u 8 h 1451*c0909341SAndroid Build Coastguard Worker vsllwil.wu.hu vr4, vr2, 0 // p 1452*c0909341SAndroid Build Coastguard Worker vexth.wu.hu vr5, vr2 // p 1453*c0909341SAndroid Build Coastguard Worker vslli.w vr6, vr4, 7 1454*c0909341SAndroid Build Coastguard Worker vslli.w vr7, vr5, 7 1455*c0909341SAndroid Build Coastguard Worker vsllwil.w.h vr8, vr1, 0 // dst 1456*c0909341SAndroid Build Coastguard Worker vexth.w.h vr9, vr1 // dst 1457*c0909341SAndroid Build Coastguard Worker vsub.w vr8, vr8, vr4 1458*c0909341SAndroid Build Coastguard Worker vsub.w vr9, vr9, vr5 1459*c0909341SAndroid Build Coastguard Worker vmadd.w vr6, vr8, vr3 // v 0 - 3 1460*c0909341SAndroid Build Coastguard Worker vmadd.w vr7, vr9, vr3 // v 4 - 7 1461*c0909341SAndroid Build Coastguard Worker 1462*c0909341SAndroid Build Coastguard Worker vsllwil.w.h vr11, vr10, 0 // dst1 1463*c0909341SAndroid Build Coastguard Worker vexth.w.h vr12, vr10 // dst1 1464*c0909341SAndroid Build Coastguard Worker vsub.w vr11, vr11, vr4 1465*c0909341SAndroid Build Coastguard Worker vsub.w vr12, vr12, vr5 1466*c0909341SAndroid Build Coastguard Worker vmadd.w vr6, vr11, vr13 1467*c0909341SAndroid Build Coastguard Worker vmadd.w vr7, vr12, vr13 1468*c0909341SAndroid Build Coastguard Worker 1469*c0909341SAndroid Build Coastguard Worker vssrarni.hu.w vr7, vr6, 11 1470*c0909341SAndroid Build Coastguard Worker vssrlni.bu.h vr7, vr7, 0 1471*c0909341SAndroid Build Coastguard Worker 1472*c0909341SAndroid Build Coastguard Worker.LSGRMIX_ST: 1473*c0909341SAndroid Build Coastguard Worker vstelm.b vr7, t0, 0, 0 1474*c0909341SAndroid Build Coastguard Worker addi.d t0, t0, 1 1475*c0909341SAndroid Build Coastguard Worker vbsrl.v vr7, vr7, 1 1476*c0909341SAndroid Build Coastguard Worker addi.w t4, t4, -1 1477*c0909341SAndroid Build Coastguard Worker bnez t4, .LSGRMIX_ST 1478*c0909341SAndroid Build Coastguard Worker 1479*c0909341SAndroid Build Coastguard Worker.LSGRMIX_W8: 1480*c0909341SAndroid Build Coastguard Worker addi.w a7, a7, -1 1481*c0909341SAndroid Build Coastguard Worker add.d a0, a0, a1 1482*c0909341SAndroid Build Coastguard Worker addi.d a2, a2, (FILTER_OUT_STRIDE<<1) 1483*c0909341SAndroid Build Coastguard Worker addi.d a3, a3, (FILTER_OUT_STRIDE<<1) 1484*c0909341SAndroid Build Coastguard Worker bnez a7, .LSGRMIX_H 1485*c0909341SAndroid Build Coastguard Worker b .LSGR_MIX_END 1486*c0909341SAndroid Build Coastguard Worker 1487*c0909341SAndroid Build Coastguard Worker.LSGRMIX_REM: 1488*c0909341SAndroid Build Coastguard Worker andi t4, a6, 0x7 1489*c0909341SAndroid Build Coastguard Worker vld vr0, a0, 0 1490*c0909341SAndroid Build Coastguard Worker vld vr1, a2, 0 1491*c0909341SAndroid Build Coastguard Worker vld vr10, a3, 0 1492*c0909341SAndroid Build Coastguard Worker vsllwil.hu.bu vr2, vr0, 4 // u 8 h 1493*c0909341SAndroid Build Coastguard Worker vsllwil.wu.hu vr4, vr2, 0 // p 1494*c0909341SAndroid Build Coastguard Worker vexth.wu.hu vr5, vr2 // p 1495*c0909341SAndroid Build Coastguard Worker vslli.w vr6, vr4, 7 1496*c0909341SAndroid Build Coastguard Worker vslli.w vr7, vr5, 7 1497*c0909341SAndroid Build Coastguard Worker vsllwil.w.h vr8, vr1, 0 // dst 1498*c0909341SAndroid Build Coastguard Worker vexth.w.h vr9, vr1 // dst 1499*c0909341SAndroid Build Coastguard Worker vsub.w vr8, vr8, vr4 1500*c0909341SAndroid Build Coastguard Worker vsub.w vr9, vr9, vr5 1501*c0909341SAndroid Build Coastguard Worker vmadd.w vr6, vr8, vr3 // v 0 - 3 1502*c0909341SAndroid Build Coastguard Worker vmadd.w vr7, vr9, vr3 // v 4 - 7 1503*c0909341SAndroid Build Coastguard Worker 1504*c0909341SAndroid Build Coastguard Worker vsllwil.w.h vr11, vr10, 0 // dst1 1505*c0909341SAndroid Build Coastguard Worker vexth.w.h vr12, vr10 // dst1 1506*c0909341SAndroid Build Coastguard Worker vsub.w vr11, vr11, vr4 1507*c0909341SAndroid Build Coastguard Worker vsub.w vr12, vr12, vr5 1508*c0909341SAndroid Build Coastguard Worker vmadd.w vr6, vr11, vr13 1509*c0909341SAndroid Build Coastguard Worker vmadd.w vr7, vr12, vr13 1510*c0909341SAndroid Build Coastguard Worker 1511*c0909341SAndroid Build Coastguard Worker vssrarni.hu.w vr7, vr6, 11 1512*c0909341SAndroid Build Coastguard Worker vssrlni.bu.h vr7, vr7, 0 1513*c0909341SAndroid Build Coastguard Worker addi.d t0, a0, 0 1514*c0909341SAndroid Build Coastguard Worker.LSGRMIX_REM_ST: 1515*c0909341SAndroid Build Coastguard Worker vstelm.b vr7, t0, 0, 0 1516*c0909341SAndroid Build Coastguard Worker addi.d t0, t0, 1 1517*c0909341SAndroid Build Coastguard Worker vbsrl.v vr7, vr7, 1 1518*c0909341SAndroid Build Coastguard Worker addi.w t4, t4, -1 1519*c0909341SAndroid Build Coastguard Worker bnez t4, .LSGRMIX_REM_ST 1520*c0909341SAndroid Build Coastguard Worker 1521*c0909341SAndroid Build Coastguard Worker addi.w a7, a7, -1 1522*c0909341SAndroid Build Coastguard Worker add.d a0, a0, a1 1523*c0909341SAndroid Build Coastguard Worker addi.d a2, a2, (FILTER_OUT_STRIDE<<1) 1524*c0909341SAndroid Build Coastguard Worker addi.d a3, a3, (FILTER_OUT_STRIDE<<1) 1525*c0909341SAndroid Build Coastguard Worker bnez a7, .LSGRMIX_REM 1526*c0909341SAndroid Build Coastguard Worker 1527*c0909341SAndroid Build Coastguard Worker.LSGR_MIX_END: 1528*c0909341SAndroid Build Coastguard Workerendfunc 1529*c0909341SAndroid Build Coastguard Worker 1530*c0909341SAndroid Build Coastguard Worker.macro MADD_HU_BU_LASX in0, in1, out0, out1 1531*c0909341SAndroid Build Coastguard Worker xvsllwil.hu.bu xr12, \in0, 0 1532*c0909341SAndroid Build Coastguard Worker xvexth.hu.bu xr13, \in0 1533*c0909341SAndroid Build Coastguard Worker xvmadd.h \out0, xr12, \in1 1534*c0909341SAndroid Build Coastguard Worker xvmadd.h \out1, xr13, \in1 1535*c0909341SAndroid Build Coastguard Worker.endm 1536*c0909341SAndroid Build Coastguard Worker 1537*c0909341SAndroid Build Coastguard Workerconst wiener_shuf_lasx 1538*c0909341SAndroid Build Coastguard Worker.byte 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18 1539*c0909341SAndroid Build Coastguard Worker.byte 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18 1540*c0909341SAndroid Build Coastguard Workerendconst 1541*c0909341SAndroid Build Coastguard Worker 1542*c0909341SAndroid Build Coastguard Workerfunction wiener_filter_h_8bpc_lasx 1543*c0909341SAndroid Build Coastguard Worker addi.d sp, sp, -40 1544*c0909341SAndroid Build Coastguard Worker fst.d f24, sp, 0 1545*c0909341SAndroid Build Coastguard Worker fst.d f25, sp, 8 1546*c0909341SAndroid Build Coastguard Worker fst.d f26, sp, 16 1547*c0909341SAndroid Build Coastguard Worker fst.d f27, sp, 24 1548*c0909341SAndroid Build Coastguard Worker fst.d f28, sp, 32 1549*c0909341SAndroid Build Coastguard Worker li.w t7, 1<<14 // clip_limit 1550*c0909341SAndroid Build Coastguard Worker 1551*c0909341SAndroid Build Coastguard Worker la.local t1, wiener_shuf_lasx 1552*c0909341SAndroid Build Coastguard Worker xvld xr4, t1, 0 1553*c0909341SAndroid Build Coastguard Worker vld vr27, a2, 0 // filter[0][k] 1554*c0909341SAndroid Build Coastguard Worker xvpermi.q xr14, xr27, 0b00000000 1555*c0909341SAndroid Build Coastguard Worker xvrepl128vei.h xr21, xr14, 0 1556*c0909341SAndroid Build Coastguard Worker xvrepl128vei.h xr22, xr14, 1 1557*c0909341SAndroid Build Coastguard Worker xvrepl128vei.h xr23, xr14, 2 1558*c0909341SAndroid Build Coastguard Worker xvrepl128vei.h xr24, xr14, 3 1559*c0909341SAndroid Build Coastguard Worker xvrepl128vei.h xr25, xr14, 4 1560*c0909341SAndroid Build Coastguard Worker xvrepl128vei.h xr26, xr14, 5 1561*c0909341SAndroid Build Coastguard Worker xvrepl128vei.h xr27, xr14, 6 1562*c0909341SAndroid Build Coastguard Worker xvreplgr2vr.w xr0, t7 1563*c0909341SAndroid Build Coastguard Worker 1564*c0909341SAndroid Build Coastguard Worker.WIENER_FILTER_H_H_LASX: 1565*c0909341SAndroid Build Coastguard Worker addi.w a4, a4, -1 // h 1566*c0909341SAndroid Build Coastguard Worker addi.w t0, a3, 0 // w 1567*c0909341SAndroid Build Coastguard Worker addi.d t1, a1, 0 // tmp_ptr 1568*c0909341SAndroid Build Coastguard Worker addi.d t2, a0, 0 // hor_ptr 1569*c0909341SAndroid Build Coastguard Worker 1570*c0909341SAndroid Build Coastguard Worker.WIENER_FILTER_H_W_LASX: 1571*c0909341SAndroid Build Coastguard Worker addi.w t0, t0, -32 1572*c0909341SAndroid Build Coastguard Worker xvld xr5, t1, 0 1573*c0909341SAndroid Build Coastguard Worker xvld xr13, t1, 16 1574*c0909341SAndroid Build Coastguard Worker 1575*c0909341SAndroid Build Coastguard Worker xvsubi.bu xr14, xr4, 2 1576*c0909341SAndroid Build Coastguard Worker xvsubi.bu xr15, xr4, 1 1577*c0909341SAndroid Build Coastguard Worker xvshuf.b xr6, xr13, xr5, xr14 // 1 ... 8, 9 ... 16 1578*c0909341SAndroid Build Coastguard Worker xvshuf.b xr7, xr13, xr5, xr15 // 2 ... 9, 10 ... 17 1579*c0909341SAndroid Build Coastguard Worker xvshuf.b xr8, xr13, xr5, xr4 // 3 ... 10, 11 ... 18 1580*c0909341SAndroid Build Coastguard Worker xvaddi.bu xr14, xr4, 1 1581*c0909341SAndroid Build Coastguard Worker xvaddi.bu xr15, xr4, 2 1582*c0909341SAndroid Build Coastguard Worker xvshuf.b xr9, xr13, xr5, xr14 // 4 ... 11, 12 ... 19 1583*c0909341SAndroid Build Coastguard Worker xvshuf.b xr10, xr13, xr5, xr15 // 5 ... 12, 13 ... 20 1584*c0909341SAndroid Build Coastguard Worker xvaddi.bu xr14, xr4, 3 1585*c0909341SAndroid Build Coastguard Worker xvshuf.b xr11, xr13, xr5, xr14 // 6 ... 13, 14 ... 21 1586*c0909341SAndroid Build Coastguard Worker 1587*c0909341SAndroid Build Coastguard Worker xvsllwil.hu.bu xr15, xr8, 0 // 3 4 5 6 7 8 9 10 1588*c0909341SAndroid Build Coastguard Worker xvexth.hu.bu xr16, xr8 // 11 12 13 14 15 16 17 18 1589*c0909341SAndroid Build Coastguard Worker xvsllwil.wu.hu xr17, xr15, 7 // 3 4 5 6 1590*c0909341SAndroid Build Coastguard Worker xvexth.wu.hu xr18, xr15 // 7 8 9 10 1591*c0909341SAndroid Build Coastguard Worker xvsllwil.wu.hu xr19, xr16, 7 // 11 12 13 14 1592*c0909341SAndroid Build Coastguard Worker xvexth.wu.hu xr20, xr16 // 15 16 17 18 1593*c0909341SAndroid Build Coastguard Worker xvslli.w xr18, xr18, 7 1594*c0909341SAndroid Build Coastguard Worker xvslli.w xr20, xr20, 7 1595*c0909341SAndroid Build Coastguard Worker xvxor.v xr15, xr15, xr15 1596*c0909341SAndroid Build Coastguard Worker xvxor.v xr14, xr14, xr14 1597*c0909341SAndroid Build Coastguard Worker 1598*c0909341SAndroid Build Coastguard Worker MADD_HU_BU_LASX xr5, xr21, xr14, xr15 1599*c0909341SAndroid Build Coastguard Worker MADD_HU_BU_LASX xr6, xr22, xr14, xr15 1600*c0909341SAndroid Build Coastguard Worker MADD_HU_BU_LASX xr7, xr23, xr14, xr15 1601*c0909341SAndroid Build Coastguard Worker MADD_HU_BU_LASX xr8, xr24, xr14, xr15 1602*c0909341SAndroid Build Coastguard Worker MADD_HU_BU_LASX xr9, xr25, xr14, xr15 1603*c0909341SAndroid Build Coastguard Worker MADD_HU_BU_LASX xr10, xr26, xr14, xr15 1604*c0909341SAndroid Build Coastguard Worker MADD_HU_BU_LASX xr11, xr27, xr14, xr15 1605*c0909341SAndroid Build Coastguard Worker 1606*c0909341SAndroid Build Coastguard Worker xvsllwil.w.h xr5, xr14, 0 // 0 1 2 3 1607*c0909341SAndroid Build Coastguard Worker xvexth.w.h xr6, xr14 // 4 5 6 7 1608*c0909341SAndroid Build Coastguard Worker xvsllwil.w.h xr7, xr15, 0 // 8 9 10 11 1609*c0909341SAndroid Build Coastguard Worker xvexth.w.h xr8, xr15 // 12 13 14 15 1610*c0909341SAndroid Build Coastguard Worker xvadd.w xr17, xr17, xr5 1611*c0909341SAndroid Build Coastguard Worker xvadd.w xr18, xr18, xr6 1612*c0909341SAndroid Build Coastguard Worker xvadd.w xr19, xr19, xr7 1613*c0909341SAndroid Build Coastguard Worker xvadd.w xr20, xr20, xr8 1614*c0909341SAndroid Build Coastguard Worker xvadd.w xr17, xr17, xr0 1615*c0909341SAndroid Build Coastguard Worker xvadd.w xr18, xr18, xr0 1616*c0909341SAndroid Build Coastguard Worker xvadd.w xr19, xr19, xr0 1617*c0909341SAndroid Build Coastguard Worker xvadd.w xr20, xr20, xr0 1618*c0909341SAndroid Build Coastguard Worker 1619*c0909341SAndroid Build Coastguard Worker xvsrli.w xr1, xr0, 1 1620*c0909341SAndroid Build Coastguard Worker xvsubi.wu xr1, xr1, 1 1621*c0909341SAndroid Build Coastguard Worker xvxor.v xr3, xr3, xr3 1622*c0909341SAndroid Build Coastguard Worker xvsrari.w xr17, xr17, 3 1623*c0909341SAndroid Build Coastguard Worker xvsrari.w xr18, xr18, 3 1624*c0909341SAndroid Build Coastguard Worker xvsrari.w xr19, xr19, 3 1625*c0909341SAndroid Build Coastguard Worker xvsrari.w xr20, xr20, 3 1626*c0909341SAndroid Build Coastguard Worker xvclip.w xr17, xr17, xr3, xr1 1627*c0909341SAndroid Build Coastguard Worker xvclip.w xr18, xr18, xr3, xr1 1628*c0909341SAndroid Build Coastguard Worker xvclip.w xr19, xr19, xr3, xr1 1629*c0909341SAndroid Build Coastguard Worker xvclip.w xr20, xr20, xr3, xr1 1630*c0909341SAndroid Build Coastguard Worker 1631*c0909341SAndroid Build Coastguard Worker xvor.v xr5, xr17, xr17 1632*c0909341SAndroid Build Coastguard Worker xvor.v xr6, xr19, xr19 1633*c0909341SAndroid Build Coastguard Worker xvpermi.q xr17, xr18, 0b00000010 1634*c0909341SAndroid Build Coastguard Worker xvpermi.q xr19, xr20, 0b00000010 1635*c0909341SAndroid Build Coastguard Worker 1636*c0909341SAndroid Build Coastguard Worker xvst xr17, t2, 0 1637*c0909341SAndroid Build Coastguard Worker xvst xr19, t2, 32 1638*c0909341SAndroid Build Coastguard Worker xvpermi.q xr18, xr5, 0b00110001 1639*c0909341SAndroid Build Coastguard Worker xvpermi.q xr20, xr6, 0b00110001 1640*c0909341SAndroid Build Coastguard Worker xvst xr18, t2, 64 1641*c0909341SAndroid Build Coastguard Worker xvst xr20, t2, 96 1642*c0909341SAndroid Build Coastguard Worker addi.d t1, t1, 32 1643*c0909341SAndroid Build Coastguard Worker addi.d t2, t2, 128 1644*c0909341SAndroid Build Coastguard Worker blt zero, t0, .WIENER_FILTER_H_W_LASX 1645*c0909341SAndroid Build Coastguard Worker 1646*c0909341SAndroid Build Coastguard Worker addi.d a1, a1, REST_UNIT_STRIDE 1647*c0909341SAndroid Build Coastguard Worker addi.d a0, a0, (REST_UNIT_STRIDE << 2) 1648*c0909341SAndroid Build Coastguard Worker bnez a4, .WIENER_FILTER_H_H_LASX 1649*c0909341SAndroid Build Coastguard Worker 1650*c0909341SAndroid Build Coastguard Worker fld.d f24, sp, 0 1651*c0909341SAndroid Build Coastguard Worker fld.d f25, sp, 8 1652*c0909341SAndroid Build Coastguard Worker fld.d f26, sp, 16 1653*c0909341SAndroid Build Coastguard Worker fld.d f27, sp, 24 1654*c0909341SAndroid Build Coastguard Worker fld.d f28, sp, 32 1655*c0909341SAndroid Build Coastguard Worker addi.d sp, sp, 40 1656*c0909341SAndroid Build Coastguard Workerendfunc 1657*c0909341SAndroid Build Coastguard Worker 1658*c0909341SAndroid Build Coastguard Worker.macro APPLY_FILTER_LASX in0, in1, in2 1659*c0909341SAndroid Build Coastguard Worker alsl.d t7, \in0, \in1, 2 1660*c0909341SAndroid Build Coastguard Worker xvld xr10, t7, 0 1661*c0909341SAndroid Build Coastguard Worker xvld xr12, t7, 32 1662*c0909341SAndroid Build Coastguard Worker xvmadd.w xr14, xr10, \in2 1663*c0909341SAndroid Build Coastguard Worker xvmadd.w xr16, xr12, \in2 1664*c0909341SAndroid Build Coastguard Worker.endm 1665*c0909341SAndroid Build Coastguard Worker 1666*c0909341SAndroid Build Coastguard Worker.macro wiener_filter_v_8bpc_core_lasx 1667*c0909341SAndroid Build Coastguard Worker xvreplgr2vr.w xr14, t6 1668*c0909341SAndroid Build Coastguard Worker xvreplgr2vr.w xr16, t6 1669*c0909341SAndroid Build Coastguard Worker 1670*c0909341SAndroid Build Coastguard Worker addi.w t7, t2, 0 // j + index k 1671*c0909341SAndroid Build Coastguard Worker mul.w t7, t7, t8 // (j + index) * REST_UNIT_STRIDE 1672*c0909341SAndroid Build Coastguard Worker add.w t7, t7, t4 // (j + index) * REST_UNIT_STRIDE + i 1673*c0909341SAndroid Build Coastguard Worker 1674*c0909341SAndroid Build Coastguard Worker APPLY_FILTER_LASX t7, a2, xr2 1675*c0909341SAndroid Build Coastguard Worker APPLY_FILTER_LASX t8, t7, xr3 1676*c0909341SAndroid Build Coastguard Worker APPLY_FILTER_LASX t8, t7, xr4 1677*c0909341SAndroid Build Coastguard Worker APPLY_FILTER_LASX t8, t7, xr5 1678*c0909341SAndroid Build Coastguard Worker APPLY_FILTER_LASX t8, t7, xr6 1679*c0909341SAndroid Build Coastguard Worker APPLY_FILTER_LASX t8, t7, xr7 1680*c0909341SAndroid Build Coastguard Worker APPLY_FILTER_LASX t8, t7, xr8 1681*c0909341SAndroid Build Coastguard Worker xvssrarni.hu.w xr16, xr14, 11 1682*c0909341SAndroid Build Coastguard Worker xvpermi.d xr17, xr16, 0b11011000 1683*c0909341SAndroid Build Coastguard Worker xvssrlni.bu.h xr17, xr17, 0 1684*c0909341SAndroid Build Coastguard Worker xvpermi.d xr17, xr17, 0b00001000 1685*c0909341SAndroid Build Coastguard Worker.endm 1686*c0909341SAndroid Build Coastguard Worker 1687*c0909341SAndroid Build Coastguard Workerfunction wiener_filter_v_8bpc_lasx 1688*c0909341SAndroid Build Coastguard Worker li.w t6, -(1 << 18) 1689*c0909341SAndroid Build Coastguard Worker 1690*c0909341SAndroid Build Coastguard Worker li.w t8, REST_UNIT_STRIDE 1691*c0909341SAndroid Build Coastguard Worker ld.h t0, a3, 0 1692*c0909341SAndroid Build Coastguard Worker ld.h t1, a3, 2 1693*c0909341SAndroid Build Coastguard Worker xvreplgr2vr.w xr2, t0 1694*c0909341SAndroid Build Coastguard Worker xvreplgr2vr.w xr3, t1 1695*c0909341SAndroid Build Coastguard Worker ld.h t0, a3, 4 1696*c0909341SAndroid Build Coastguard Worker ld.h t1, a3, 6 1697*c0909341SAndroid Build Coastguard Worker xvreplgr2vr.w xr4, t0 1698*c0909341SAndroid Build Coastguard Worker xvreplgr2vr.w xr5, t1 1699*c0909341SAndroid Build Coastguard Worker ld.h t0, a3, 8 1700*c0909341SAndroid Build Coastguard Worker ld.h t1, a3, 10 1701*c0909341SAndroid Build Coastguard Worker xvreplgr2vr.w xr6, t0 1702*c0909341SAndroid Build Coastguard Worker xvreplgr2vr.w xr7, t1 1703*c0909341SAndroid Build Coastguard Worker ld.h t0, a3, 12 1704*c0909341SAndroid Build Coastguard Worker xvreplgr2vr.w xr8, t0 1705*c0909341SAndroid Build Coastguard Worker 1706*c0909341SAndroid Build Coastguard Worker andi t1, a4, 0xf 1707*c0909341SAndroid Build Coastguard Worker sub.w t0, a4, t1 // w-w%16 1708*c0909341SAndroid Build Coastguard Worker or t2, zero, zero // j 1709*c0909341SAndroid Build Coastguard Worker or t4, zero, zero 1710*c0909341SAndroid Build Coastguard Worker beqz t0, .WIENER_FILTER_V_W_LT16_LASX 1711*c0909341SAndroid Build Coastguard Worker 1712*c0909341SAndroid Build Coastguard Worker.WIENER_FILTER_V_H_LASX: 1713*c0909341SAndroid Build Coastguard Worker andi t1, a4, 0xf 1714*c0909341SAndroid Build Coastguard Worker add.d t3, zero, a0 // p 1715*c0909341SAndroid Build Coastguard Worker or t4, zero, zero // i 1716*c0909341SAndroid Build Coastguard Worker 1717*c0909341SAndroid Build Coastguard Worker.WIENER_FILTER_V_W_LASX: 1718*c0909341SAndroid Build Coastguard Worker 1719*c0909341SAndroid Build Coastguard Worker wiener_filter_v_8bpc_core_lasx 1720*c0909341SAndroid Build Coastguard Worker 1721*c0909341SAndroid Build Coastguard Worker mul.w t5, t2, a1 // j * stride 1722*c0909341SAndroid Build Coastguard Worker add.w t5, t5, t4 // j * stride + i 1723*c0909341SAndroid Build Coastguard Worker add.d t3, a0, t5 1724*c0909341SAndroid Build Coastguard Worker addi.w t4, t4, 16 1725*c0909341SAndroid Build Coastguard Worker vst vr17, t3, 0 1726*c0909341SAndroid Build Coastguard Worker bne t0, t4, .WIENER_FILTER_V_W_LASX 1727*c0909341SAndroid Build Coastguard Worker 1728*c0909341SAndroid Build Coastguard Worker beqz t1, .WIENER_FILTER_V_W_EQ16_LASX 1729*c0909341SAndroid Build Coastguard Worker 1730*c0909341SAndroid Build Coastguard Worker wiener_filter_v_8bpc_core_lsx 1731*c0909341SAndroid Build Coastguard Worker 1732*c0909341SAndroid Build Coastguard Worker addi.d t3, t3, 16 1733*c0909341SAndroid Build Coastguard Worker andi t1, a4, 0xf 1734*c0909341SAndroid Build Coastguard Worker 1735*c0909341SAndroid Build Coastguard Worker.WIENER_FILTER_V_ST_REM_LASX: 1736*c0909341SAndroid Build Coastguard Worker vstelm.b vr17, t3, 0, 0 1737*c0909341SAndroid Build Coastguard Worker vbsrl.v vr17, vr17, 1 1738*c0909341SAndroid Build Coastguard Worker addi.d t3, t3, 1 1739*c0909341SAndroid Build Coastguard Worker addi.w t1, t1, -1 1740*c0909341SAndroid Build Coastguard Worker bnez t1, .WIENER_FILTER_V_ST_REM_LASX 1741*c0909341SAndroid Build Coastguard Worker.WIENER_FILTER_V_W_EQ16_LASX: 1742*c0909341SAndroid Build Coastguard Worker addi.w t2, t2, 1 1743*c0909341SAndroid Build Coastguard Worker blt t2, a5, .WIENER_FILTER_V_H_LASX 1744*c0909341SAndroid Build Coastguard Worker b .WIENER_FILTER_V_LASX_END 1745*c0909341SAndroid Build Coastguard Worker 1746*c0909341SAndroid Build Coastguard Worker.WIENER_FILTER_V_W_LT16_LASX: 1747*c0909341SAndroid Build Coastguard Worker andi t1, a4, 0xf 1748*c0909341SAndroid Build Coastguard Worker add.d t3, zero, a0 1749*c0909341SAndroid Build Coastguard Worker 1750*c0909341SAndroid Build Coastguard Worker wiener_filter_v_8bpc_core_lsx 1751*c0909341SAndroid Build Coastguard Worker 1752*c0909341SAndroid Build Coastguard Worker mul.w t5, t2, a1 // j * stride 1753*c0909341SAndroid Build Coastguard Worker add.d t3, a0, t5 1754*c0909341SAndroid Build Coastguard Worker 1755*c0909341SAndroid Build Coastguard Worker.WIENER_FILTER_V_ST_REM_1_LASX: 1756*c0909341SAndroid Build Coastguard Worker vstelm.b vr17, t3, 0, 0 1757*c0909341SAndroid Build Coastguard Worker vbsrl.v vr17, vr17, 1 1758*c0909341SAndroid Build Coastguard Worker addi.d t3, t3, 1 1759*c0909341SAndroid Build Coastguard Worker addi.w t1, t1, -1 1760*c0909341SAndroid Build Coastguard Worker bnez t1, .WIENER_FILTER_V_ST_REM_1_LASX 1761*c0909341SAndroid Build Coastguard Worker 1762*c0909341SAndroid Build Coastguard Worker addi.w t2, t2, 1 1763*c0909341SAndroid Build Coastguard Worker blt t2, a5, .WIENER_FILTER_V_W_LT16_LASX 1764*c0909341SAndroid Build Coastguard Worker 1765*c0909341SAndroid Build Coastguard Worker.WIENER_FILTER_V_LASX_END: 1766*c0909341SAndroid Build Coastguard Workerendfunc 1767*c0909341SAndroid Build Coastguard Worker 1768*c0909341SAndroid Build Coastguard Workerfunction boxsum3_sgf_h_8bpc_lasx 1769*c0909341SAndroid Build Coastguard Worker addi.d a0, a0, (REST_UNIT_STRIDE<<2)+12 // AA 1770*c0909341SAndroid Build Coastguard Worker //addi.d a0, a0, 12 // AA 1771*c0909341SAndroid Build Coastguard Worker addi.d a1, a1, (REST_UNIT_STRIDE<<1)+6 // BB 1772*c0909341SAndroid Build Coastguard Worker //addi.d a1, a1, 6 // BB 1773*c0909341SAndroid Build Coastguard Worker la.local t8, dav1d_sgr_x_by_x 1774*c0909341SAndroid Build Coastguard Worker li.w t6, 455 1775*c0909341SAndroid Build Coastguard Worker xvreplgr2vr.w xr20, t6 1776*c0909341SAndroid Build Coastguard Worker li.w t6, 255 1777*c0909341SAndroid Build Coastguard Worker xvreplgr2vr.w xr22, t6 1778*c0909341SAndroid Build Coastguard Worker xvaddi.wu xr21, xr22, 1 // 256 1779*c0909341SAndroid Build Coastguard Worker xvreplgr2vr.w xr6, a4 1780*c0909341SAndroid Build Coastguard Worker xvldi xr19, 0x809 1781*c0909341SAndroid Build Coastguard Worker addi.w a2, a2, 2 // w + 2 1782*c0909341SAndroid Build Coastguard Worker addi.w a3, a3, 2 // h + 2 1783*c0909341SAndroid Build Coastguard Worker 1784*c0909341SAndroid Build Coastguard Worker.LBS3SGF_H_H_LASX: 1785*c0909341SAndroid Build Coastguard Worker addi.w t2, a2, 0 1786*c0909341SAndroid Build Coastguard Worker addi.d t0, a0, -4 1787*c0909341SAndroid Build Coastguard Worker addi.d t1, a1, -2 1788*c0909341SAndroid Build Coastguard Worker 1789*c0909341SAndroid Build Coastguard Worker.LBS3SGF_H_W_LASX: 1790*c0909341SAndroid Build Coastguard Worker addi.w t2, t2, -16 1791*c0909341SAndroid Build Coastguard Worker xvld xr0, t0, 0 // AA[i] 1792*c0909341SAndroid Build Coastguard Worker xvld xr1, t0, 32 1793*c0909341SAndroid Build Coastguard Worker xvld xr2, t1, 0 // BB[i] 1794*c0909341SAndroid Build Coastguard Worker 1795*c0909341SAndroid Build Coastguard Worker xvmul.w xr4, xr0, xr19 // a * n 1796*c0909341SAndroid Build Coastguard Worker xvmul.w xr5, xr1, xr19 1797*c0909341SAndroid Build Coastguard Worker vext2xv.w.h xr9, xr2 1798*c0909341SAndroid Build Coastguard Worker xvpermi.q xr10, xr2, 0b00000001 1799*c0909341SAndroid Build Coastguard Worker vext2xv.w.h xr10, xr10 1800*c0909341SAndroid Build Coastguard Worker xvmsub.w xr4, xr9, xr9 // p 1801*c0909341SAndroid Build Coastguard Worker xvmsub.w xr5, xr10, xr10 1802*c0909341SAndroid Build Coastguard Worker xvmaxi.w xr4, xr4, 0 1803*c0909341SAndroid Build Coastguard Worker xvmaxi.w xr5, xr5, 0 1804*c0909341SAndroid Build Coastguard Worker xvmul.w xr4, xr4, xr6 // p * s 1805*c0909341SAndroid Build Coastguard Worker xvmul.w xr5, xr5, xr6 1806*c0909341SAndroid Build Coastguard Worker xvsrlri.w xr4, xr4, 20 1807*c0909341SAndroid Build Coastguard Worker xvsrlri.w xr5, xr5, 20 1808*c0909341SAndroid Build Coastguard Worker xvmin.w xr4, xr4, xr22 1809*c0909341SAndroid Build Coastguard Worker xvmin.w xr5, xr5, xr22 1810*c0909341SAndroid Build Coastguard Worker 1811*c0909341SAndroid Build Coastguard Worker vpickve2gr.w t6, vr4, 0 1812*c0909341SAndroid Build Coastguard Worker ldx.bu t7, t8, t6 1813*c0909341SAndroid Build Coastguard Worker vinsgr2vr.w vr7, t7, 0 1814*c0909341SAndroid Build Coastguard Worker vpickve2gr.w t6, vr4, 1 1815*c0909341SAndroid Build Coastguard Worker ldx.bu t7, t8, t6 1816*c0909341SAndroid Build Coastguard Worker vinsgr2vr.w vr7, t7, 1 1817*c0909341SAndroid Build Coastguard Worker vpickve2gr.w t6, vr4, 2 1818*c0909341SAndroid Build Coastguard Worker ldx.bu t7, t8, t6 1819*c0909341SAndroid Build Coastguard Worker vinsgr2vr.w vr7, t7, 2 1820*c0909341SAndroid Build Coastguard Worker vpickve2gr.w t6, vr4, 3 1821*c0909341SAndroid Build Coastguard Worker ldx.bu t7, t8, t6 1822*c0909341SAndroid Build Coastguard Worker vinsgr2vr.w vr7, t7, 3 1823*c0909341SAndroid Build Coastguard Worker 1824*c0909341SAndroid Build Coastguard Worker xvpickve2gr.w t6, xr4, 4 1825*c0909341SAndroid Build Coastguard Worker ldx.bu t7, t8, t6 1826*c0909341SAndroid Build Coastguard Worker xvinsgr2vr.w xr7, t7, 4 1827*c0909341SAndroid Build Coastguard Worker xvpickve2gr.w t6, xr4, 5 1828*c0909341SAndroid Build Coastguard Worker ldx.bu t7, t8, t6 1829*c0909341SAndroid Build Coastguard Worker xvinsgr2vr.w xr7, t7, 5 1830*c0909341SAndroid Build Coastguard Worker xvpickve2gr.w t6, xr4, 6 1831*c0909341SAndroid Build Coastguard Worker ldx.bu t7, t8, t6 1832*c0909341SAndroid Build Coastguard Worker xvinsgr2vr.w xr7, t7, 6 1833*c0909341SAndroid Build Coastguard Worker xvpickve2gr.w t6, xr4, 7 1834*c0909341SAndroid Build Coastguard Worker ldx.bu t7, t8, t6 1835*c0909341SAndroid Build Coastguard Worker xvinsgr2vr.w xr7, t7, 7 // x 1836*c0909341SAndroid Build Coastguard Worker 1837*c0909341SAndroid Build Coastguard Worker vpickve2gr.w t6, vr5, 0 1838*c0909341SAndroid Build Coastguard Worker ldx.bu t7, t8, t6 1839*c0909341SAndroid Build Coastguard Worker vinsgr2vr.w vr8, t7, 0 1840*c0909341SAndroid Build Coastguard Worker vpickve2gr.w t6, vr5, 1 1841*c0909341SAndroid Build Coastguard Worker ldx.bu t7, t8, t6 1842*c0909341SAndroid Build Coastguard Worker vinsgr2vr.w vr8, t7, 1 1843*c0909341SAndroid Build Coastguard Worker vpickve2gr.w t6, vr5, 2 1844*c0909341SAndroid Build Coastguard Worker ldx.bu t7, t8, t6 1845*c0909341SAndroid Build Coastguard Worker vinsgr2vr.w vr8, t7, 2 1846*c0909341SAndroid Build Coastguard Worker vpickve2gr.w t6, vr5, 3 1847*c0909341SAndroid Build Coastguard Worker ldx.bu t7, t8, t6 1848*c0909341SAndroid Build Coastguard Worker vinsgr2vr.w vr8, t7, 3 1849*c0909341SAndroid Build Coastguard Worker 1850*c0909341SAndroid Build Coastguard Worker xvpickve2gr.w t6, xr5, 4 1851*c0909341SAndroid Build Coastguard Worker ldx.bu t7, t8, t6 1852*c0909341SAndroid Build Coastguard Worker xvinsgr2vr.w xr8, t7, 4 1853*c0909341SAndroid Build Coastguard Worker xvpickve2gr.w t6, xr5, 5 1854*c0909341SAndroid Build Coastguard Worker ldx.bu t7, t8, t6 1855*c0909341SAndroid Build Coastguard Worker xvinsgr2vr.w xr8, t7, 5 1856*c0909341SAndroid Build Coastguard Worker xvpickve2gr.w t6, xr5, 6 1857*c0909341SAndroid Build Coastguard Worker ldx.bu t7, t8, t6 1858*c0909341SAndroid Build Coastguard Worker xvinsgr2vr.w xr8, t7, 6 1859*c0909341SAndroid Build Coastguard Worker xvpickve2gr.w t6, xr5, 7 1860*c0909341SAndroid Build Coastguard Worker ldx.bu t7, t8, t6 1861*c0909341SAndroid Build Coastguard Worker xvinsgr2vr.w xr8, t7, 7 // x 1862*c0909341SAndroid Build Coastguard Worker 1863*c0909341SAndroid Build Coastguard Worker xvmul.w xr9, xr7, xr9 // x * BB[i] 1864*c0909341SAndroid Build Coastguard Worker xvmul.w xr10, xr8, xr10 1865*c0909341SAndroid Build Coastguard Worker xvmul.w xr9, xr9, xr20 // x * BB[i] * sgr_one_by_x 1866*c0909341SAndroid Build Coastguard Worker xvmul.w xr10, xr10, xr20 1867*c0909341SAndroid Build Coastguard Worker xvsrlri.w xr9, xr9, 12 1868*c0909341SAndroid Build Coastguard Worker xvsrlri.w xr10, xr10, 12 1869*c0909341SAndroid Build Coastguard Worker xvsub.w xr7, xr21, xr7 1870*c0909341SAndroid Build Coastguard Worker xvsub.w xr8, xr21, xr8 1871*c0909341SAndroid Build Coastguard Worker xvpickev.h xr12, xr8, xr7 1872*c0909341SAndroid Build Coastguard Worker xvpermi.d xr11, xr12, 0b11011000 1873*c0909341SAndroid Build Coastguard Worker 1874*c0909341SAndroid Build Coastguard Worker xvst xr9, t0, 0 1875*c0909341SAndroid Build Coastguard Worker xvst xr10, t0, 32 1876*c0909341SAndroid Build Coastguard Worker xvst xr11, t1, 0 1877*c0909341SAndroid Build Coastguard Worker addi.d t0, t0, 64 1878*c0909341SAndroid Build Coastguard Worker addi.d t1, t1, 32 1879*c0909341SAndroid Build Coastguard Worker blt zero, t2, .LBS3SGF_H_W_LASX 1880*c0909341SAndroid Build Coastguard Worker 1881*c0909341SAndroid Build Coastguard Worker addi.d a0, a0, REST_UNIT_STRIDE<<2 1882*c0909341SAndroid Build Coastguard Worker addi.d a1, a1, REST_UNIT_STRIDE<<1 1883*c0909341SAndroid Build Coastguard Worker addi.w a3, a3, -1 1884*c0909341SAndroid Build Coastguard Worker bnez a3, .LBS3SGF_H_H_LASX 1885*c0909341SAndroid Build Coastguard Workerendfunc 1886*c0909341SAndroid Build Coastguard Worker 1887*c0909341SAndroid Build Coastguard Workerfunction boxsum3_h_8bpc_lasx 1888*c0909341SAndroid Build Coastguard Worker addi.d a2, a2, REST_UNIT_STRIDE 1889*c0909341SAndroid Build Coastguard Worker li.w t0, 1 1890*c0909341SAndroid Build Coastguard Worker addi.w a3, a3, -2 1891*c0909341SAndroid Build Coastguard Worker addi.w a4, a4, -4 1892*c0909341SAndroid Build Coastguard Worker.LBS3_H_H_LASX: 1893*c0909341SAndroid Build Coastguard Worker alsl.d t1, t0, a1, 1 // sum_v *sum_v = sum + x 1894*c0909341SAndroid Build Coastguard Worker alsl.d t2, t0, a0, 2 // sumsq_v *sumsq_v = sumsq + x 1895*c0909341SAndroid Build Coastguard Worker add.d t3, t0, a2 // s 1896*c0909341SAndroid Build Coastguard Worker addi.w t5, a3, 0 1897*c0909341SAndroid Build Coastguard Worker 1898*c0909341SAndroid Build Coastguard Worker.LBS3_H_W_LASX: 1899*c0909341SAndroid Build Coastguard Worker xvld xr0, t3, 0 1900*c0909341SAndroid Build Coastguard Worker xvld xr1, t3, REST_UNIT_STRIDE 1901*c0909341SAndroid Build Coastguard Worker xvld xr2, t3, (REST_UNIT_STRIDE<<1) 1902*c0909341SAndroid Build Coastguard Worker 1903*c0909341SAndroid Build Coastguard Worker xvilvl.b xr3, xr1, xr0 1904*c0909341SAndroid Build Coastguard Worker xvhaddw.hu.bu xr4, xr3, xr3 1905*c0909341SAndroid Build Coastguard Worker xvilvh.b xr5, xr1, xr0 1906*c0909341SAndroid Build Coastguard Worker xvhaddw.hu.bu xr6, xr5, xr5 1907*c0909341SAndroid Build Coastguard Worker xvsllwil.hu.bu xr7, xr2, 0 1908*c0909341SAndroid Build Coastguard Worker xvexth.hu.bu xr8, xr2 1909*c0909341SAndroid Build Coastguard Worker // sum_v 1910*c0909341SAndroid Build Coastguard Worker xvadd.h xr4, xr4, xr7 // 0 2 1911*c0909341SAndroid Build Coastguard Worker xvadd.h xr6, xr6, xr8 // 1 3 1912*c0909341SAndroid Build Coastguard Worker xvor.v xr9, xr4, xr4 1913*c0909341SAndroid Build Coastguard Worker xvpermi.q xr4, xr6, 0b00000010 1914*c0909341SAndroid Build Coastguard Worker xvpermi.q xr6, xr9, 0b00110001 1915*c0909341SAndroid Build Coastguard Worker xvst xr4, t1, REST_UNIT_STRIDE<<1 1916*c0909341SAndroid Build Coastguard Worker xvst xr6, t1, (REST_UNIT_STRIDE<<1)+32 1917*c0909341SAndroid Build Coastguard Worker addi.d t1, t1, 64 1918*c0909341SAndroid Build Coastguard Worker // sumsq 1919*c0909341SAndroid Build Coastguard Worker xvmulwev.h.bu xr9, xr3, xr3 1920*c0909341SAndroid Build Coastguard Worker xvmulwod.h.bu xr10, xr3, xr3 1921*c0909341SAndroid Build Coastguard Worker xvmulwev.h.bu xr11, xr5, xr5 1922*c0909341SAndroid Build Coastguard Worker xvmulwod.h.bu xr12, xr5, xr5 1923*c0909341SAndroid Build Coastguard Worker xvaddwev.w.hu xr13, xr10, xr9 1924*c0909341SAndroid Build Coastguard Worker xvaddwod.w.hu xr14, xr10, xr9 1925*c0909341SAndroid Build Coastguard Worker xvaddwev.w.hu xr15, xr12, xr11 1926*c0909341SAndroid Build Coastguard Worker xvaddwod.w.hu xr16, xr12, xr11 1927*c0909341SAndroid Build Coastguard Worker xvmaddwev.w.hu xr13, xr7, xr7 1928*c0909341SAndroid Build Coastguard Worker xvmaddwod.w.hu xr14, xr7, xr7 1929*c0909341SAndroid Build Coastguard Worker xvmaddwev.w.hu xr15, xr8, xr8 1930*c0909341SAndroid Build Coastguard Worker xvmaddwod.w.hu xr16, xr8, xr8 1931*c0909341SAndroid Build Coastguard Worker xvilvl.w xr9, xr14, xr13 1932*c0909341SAndroid Build Coastguard Worker xvilvh.w xr10, xr14, xr13 1933*c0909341SAndroid Build Coastguard Worker xvilvl.w xr11, xr16, xr15 1934*c0909341SAndroid Build Coastguard Worker xvilvh.w xr12, xr16, xr15 1935*c0909341SAndroid Build Coastguard Worker xvor.v xr7, xr9, xr9 1936*c0909341SAndroid Build Coastguard Worker xvor.v xr8, xr11, xr11 1937*c0909341SAndroid Build Coastguard Worker xvpermi.q xr9, xr10, 0b00000010 1938*c0909341SAndroid Build Coastguard Worker xvpermi.q xr10, xr7, 0b00110001 1939*c0909341SAndroid Build Coastguard Worker xvpermi.q xr11, xr12, 0b00000010 1940*c0909341SAndroid Build Coastguard Worker xvpermi.q xr12, xr8, 0b00110001 1941*c0909341SAndroid Build Coastguard Worker xvst xr9, t2, REST_UNIT_STRIDE<<2 1942*c0909341SAndroid Build Coastguard Worker xvst xr11, t2, (REST_UNIT_STRIDE<<2)+32 1943*c0909341SAndroid Build Coastguard Worker xvst xr10, t2, (REST_UNIT_STRIDE<<2)+64 1944*c0909341SAndroid Build Coastguard Worker xvst xr12, t2, (REST_UNIT_STRIDE<<2)+96 1945*c0909341SAndroid Build Coastguard Worker 1946*c0909341SAndroid Build Coastguard Worker addi.d t2, t2, 128 1947*c0909341SAndroid Build Coastguard Worker addi.w t5, t5, -32 1948*c0909341SAndroid Build Coastguard Worker addi.d t3, t3, 32 1949*c0909341SAndroid Build Coastguard Worker blt zero, t5, .LBS3_H_W_LASX 1950*c0909341SAndroid Build Coastguard Worker 1951*c0909341SAndroid Build Coastguard Worker addi.d a0, a0, REST_UNIT_STRIDE<<2 1952*c0909341SAndroid Build Coastguard Worker addi.d a1, a1, REST_UNIT_STRIDE<<1 1953*c0909341SAndroid Build Coastguard Worker addi.d a2, a2, REST_UNIT_STRIDE 1954*c0909341SAndroid Build Coastguard Worker addi.d a4, a4, -1 1955*c0909341SAndroid Build Coastguard Worker blt zero, a4, .LBS3_H_H_LASX 1956*c0909341SAndroid Build Coastguard Workerendfunc 1957