1*c0909341SAndroid Build Coastguard Worker/* 2*c0909341SAndroid Build Coastguard Worker * Copyright © 2024, Arm Limited 3*c0909341SAndroid Build Coastguard Worker * All rights reserved. 4*c0909341SAndroid Build Coastguard Worker * 5*c0909341SAndroid Build Coastguard Worker * Redistribution and use in source and binary forms, with or without 6*c0909341SAndroid Build Coastguard Worker * modification, are permitted provided that the following conditions are met: 7*c0909341SAndroid Build Coastguard Worker * 8*c0909341SAndroid Build Coastguard Worker * 1. Redistributions of source code must retain the above copyright notice, this 9*c0909341SAndroid Build Coastguard Worker * list of conditions and the following disclaimer. 10*c0909341SAndroid Build Coastguard Worker * 11*c0909341SAndroid Build Coastguard Worker * 2. Redistributions in binary form must reproduce the above copyright notice, 12*c0909341SAndroid Build Coastguard Worker * this list of conditions and the following disclaimer in the documentation 13*c0909341SAndroid Build Coastguard Worker * and/or other materials provided with the distribution. 14*c0909341SAndroid Build Coastguard Worker * 15*c0909341SAndroid Build Coastguard Worker * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND 16*c0909341SAndroid Build Coastguard Worker * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED 17*c0909341SAndroid Build Coastguard Worker * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 18*c0909341SAndroid Build Coastguard Worker * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR 19*c0909341SAndroid Build Coastguard Worker * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES 20*c0909341SAndroid Build Coastguard Worker * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 21*c0909341SAndroid Build Coastguard Worker * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND 22*c0909341SAndroid Build Coastguard Worker * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 23*c0909341SAndroid Build Coastguard Worker * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 24*c0909341SAndroid Build Coastguard Worker * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 25*c0909341SAndroid Build Coastguard Worker */ 26*c0909341SAndroid Build Coastguard Worker 27*c0909341SAndroid Build Coastguard Worker#include "src/arm/asm.S" 28*c0909341SAndroid Build Coastguard Worker#include "util.S" 29*c0909341SAndroid Build Coastguard Worker 30*c0909341SAndroid Build Coastguard Worker#define PREP_BIAS 32, lsl #8 // 8192 31*c0909341SAndroid Build Coastguard Worker#define PREP_BIAS_NEG 224, lsl #8 // -8192 32*c0909341SAndroid Build Coastguard Worker 33*c0909341SAndroid Build Coastguard Worker#if HAVE_SVE2 34*c0909341SAndroid Build Coastguard WorkerENABLE_SVE 35*c0909341SAndroid Build Coastguard WorkerENABLE_SVE2 36*c0909341SAndroid Build Coastguard Worker 37*c0909341SAndroid Build Coastguard Worker// No spaces in these expressions, due to gas-preprocessor. It is translated by 38*c0909341SAndroid Build Coastguard Worker// -1 to save the negative offset when getting the address of `mc_subpel_filters`. 39*c0909341SAndroid Build Coastguard Worker#define REGULAR1 (((0*15-1)<<7)|(3*15-1)) 40*c0909341SAndroid Build Coastguard Worker#define SMOOTH1 (((1*15-1)<<7)|(4*15-1)) 41*c0909341SAndroid Build Coastguard Worker#define SHARP1 (((2*15-1)<<7)|(3*15-1)) 42*c0909341SAndroid Build Coastguard Worker 43*c0909341SAndroid Build Coastguard Worker#define FUNC_ALIGN 2 44*c0909341SAndroid Build Coastguard Worker#define JUMP_ALIGN 2 45*c0909341SAndroid Build Coastguard Worker#define LOOP_ALIGN 2 46*c0909341SAndroid Build Coastguard Worker 47*c0909341SAndroid Build Coastguard Worker 48*c0909341SAndroid Build Coastguard Worker// Shuffle indices to permute horizontal samples in preparation for input to 49*c0909341SAndroid Build Coastguard Worker// 16-bit SDOT instructions. The 8-tap horizontal convolution uses sample 50*c0909341SAndroid Build Coastguard Worker// indices in the interval of [-3, 4] relative to the current sample position. 51*c0909341SAndroid Build Coastguard Workerconst h_tbl_sve, align=4 52*c0909341SAndroid Build Coastguard Worker .byte 0, 1, 2, 3, 4, 5, 6, 7, 2, 3, 4, 5, 6, 7, 8, 9 53*c0909341SAndroid Build Coastguard Worker .byte 4, 5, 6, 7, 8, 9, 10, 11, 6, 7, 8, 9, 10, 11, 12, 13 54*c0909341SAndroid Build Coastguard Workerendconst 55*c0909341SAndroid Build Coastguard Worker 56*c0909341SAndroid Build Coastguard Worker// Vertical convolutions also use 16-bit SDOT instructions, where two 128-bit 57*c0909341SAndroid Build Coastguard Worker// registers contain a transposed 4x4 matrix of values. Subsequent iterations 58*c0909341SAndroid Build Coastguard Worker// of the vertical convolution can reuse the 3x4 sub-matrix from the previous 59*c0909341SAndroid Build Coastguard Worker// loop iteration. These shuffle indices shift and merge this 4x4 matrix with 60*c0909341SAndroid Build Coastguard Worker// the values of a new line. 61*c0909341SAndroid Build Coastguard Workerconst v_tbl_sve, align=4 62*c0909341SAndroid Build Coastguard Worker .byte 2, 3, 4, 5, 6, 7, 16, 17, 10, 11, 12, 13, 14, 15, 24, 25 63*c0909341SAndroid Build Coastguard Worker .byte 2, 3, 4, 5, 6, 7, 16, 17, 10, 11, 12, 13, 14, 15, 18, 19 64*c0909341SAndroid Build Coastguard Worker .byte 2, 3, 4, 5, 6, 7, 20, 21, 10, 11, 12, 13, 14, 15, 22, 23 65*c0909341SAndroid Build Coastguard Worker .byte 2, 3, 4, 5, 6, 7, 24, 25, 10, 11, 12, 13, 14, 15, 26, 27 66*c0909341SAndroid Build Coastguard Worker .byte 2, 3, 4, 5, 6, 7, 28, 29, 10, 11, 12, 13, 14, 15, 30, 31 67*c0909341SAndroid Build Coastguard Workerendconst 68*c0909341SAndroid Build Coastguard Worker 69*c0909341SAndroid Build Coastguard Worker 70*c0909341SAndroid Build Coastguard Worker.macro make_8tap_fn op, type, type_h, type_v, isa, jump=1 71*c0909341SAndroid Build Coastguard Workerfunction \op\()_8tap_\type\()_16bpc_\isa, export=1, align=FUNC_ALIGN 72*c0909341SAndroid Build Coastguard Worker mov x9, \type_h 73*c0909341SAndroid Build Coastguard Worker mov x10, \type_v 74*c0909341SAndroid Build Coastguard Worker .if \jump 75*c0909341SAndroid Build Coastguard Worker b \op\()_8tap_\isa 76*c0909341SAndroid Build Coastguard Worker .endif 77*c0909341SAndroid Build Coastguard Workerendfunc 78*c0909341SAndroid Build Coastguard Worker.endm 79*c0909341SAndroid Build Coastguard Worker 80*c0909341SAndroid Build Coastguard Worker.macro filter_8tap_fn type, isa, dst, d_strd, src, s_strd, w, h, mx, my, bdmax, xmx, xmy, ldst, lsrc, wd_strd, ws_strd 81*c0909341SAndroid Build Coastguard Workermake_8tap_fn \type, sharp, SHARP1, SHARP1, \isa 82*c0909341SAndroid Build Coastguard Workermake_8tap_fn \type, sharp_smooth, SHARP1, SMOOTH1, \isa 83*c0909341SAndroid Build Coastguard Workermake_8tap_fn \type, sharp_regular, SHARP1, REGULAR1, \isa 84*c0909341SAndroid Build Coastguard Workermake_8tap_fn \type, smooth_sharp, SMOOTH1, SHARP1, \isa 85*c0909341SAndroid Build Coastguard Workermake_8tap_fn \type, smooth, SMOOTH1, SMOOTH1, \isa 86*c0909341SAndroid Build Coastguard Workermake_8tap_fn \type, smooth_regular, SMOOTH1, REGULAR1, \isa 87*c0909341SAndroid Build Coastguard Workermake_8tap_fn \type, regular_sharp, REGULAR1, SHARP1, \isa 88*c0909341SAndroid Build Coastguard Workermake_8tap_fn \type, regular_smooth, REGULAR1, SMOOTH1, \isa 89*c0909341SAndroid Build Coastguard Workermake_8tap_fn \type, regular, REGULAR1, REGULAR1, \isa, jump=0 90*c0909341SAndroid Build Coastguard Worker 91*c0909341SAndroid Build Coastguard Workerfunction \type\()_8tap_\isa, align=FUNC_ALIGN 92*c0909341SAndroid Build Coastguard Worker clz w8, \w 93*c0909341SAndroid Build Coastguard Worker mov w11, #0x4081 // (1<<14) | (1<<7) | 1 94*c0909341SAndroid Build Coastguard Worker ptrue p0.b, vl16 95*c0909341SAndroid Build Coastguard Worker sub w8, w8, #24 // for jump tables 96*c0909341SAndroid Build Coastguard Worker movrel x12, X(mc_subpel_filters) 97*c0909341SAndroid Build Coastguard Worker cbnz \mx, L(\type\()_8tap_h_hv_\isa) 98*c0909341SAndroid Build Coastguard Worker.ifc \type, prep 99*c0909341SAndroid Build Coastguard Worker cbz \my, prep_sve 100*c0909341SAndroid Build Coastguard Worker.else // put 101*c0909341SAndroid Build Coastguard Worker cbnz \my, L(\type\()_8tap_v_\isa) 102*c0909341SAndroid Build Coastguard Worker mov w9, w8 103*c0909341SAndroid Build Coastguard Worker b X(put_16bpc_neon) 104*c0909341SAndroid Build Coastguard Worker 105*c0909341SAndroid Build Coastguard Worker .align JUMP_ALIGN 106*c0909341SAndroid Build Coastguard Worker.endif 107*c0909341SAndroid Build Coastguard Worker 108*c0909341SAndroid Build Coastguard WorkerL(\type\()_8tap_v_\isa): 109*c0909341SAndroid Build Coastguard Worker madd \my, \my, w11, w10 110*c0909341SAndroid Build Coastguard Worker movrel x13, v_tbl_sve 111*c0909341SAndroid Build Coastguard Worker.ifc \bdmax, w8 // put case, but skip 112*c0909341SAndroid Build Coastguard Worker ld1r {v5.8h}, [sp] // loading into w8 113*c0909341SAndroid Build Coastguard Worker.endif 114*c0909341SAndroid Build Coastguard Worker sub \src, \src, \s_strd // src - s_strd 115*c0909341SAndroid Build Coastguard Worker ubfx w11, \my, #7, #7 116*c0909341SAndroid Build Coastguard Worker and \my, \my, #0x7F 117*c0909341SAndroid Build Coastguard Worker ldr q6, [x13] 118*c0909341SAndroid Build Coastguard Worker cmp \h, #4 119*c0909341SAndroid Build Coastguard Worker csel \my, \my, w11, le 120*c0909341SAndroid Build Coastguard Worker sub \src, \src, \s_strd, lsl #1 // src - 3 * s_strd 121*c0909341SAndroid Build Coastguard Worker add \xmy, x12, \xmy, lsl #3 // subpel V filter address 122*c0909341SAndroid Build Coastguard Worker ldp q28, q29, [x13, #16] 123*c0909341SAndroid Build Coastguard Worker ld1sb {z7.h}, p0/z, [\xmy] 124*c0909341SAndroid Build Coastguard Worker.ifc \type, prep 125*c0909341SAndroid Build Coastguard Worker clz \bdmax, \bdmax 126*c0909341SAndroid Build Coastguard Worker sub \bdmax, \bdmax, #24 127*c0909341SAndroid Build Coastguard Worker dup v5.4s, \bdmax 128*c0909341SAndroid Build Coastguard Worker.endif 129*c0909341SAndroid Build Coastguard Worker cmp \w, #8 130*c0909341SAndroid Build Coastguard Worker b.lt 40f 131*c0909341SAndroid Build Coastguard Worker 132*c0909341SAndroid Build Coastguard Worker // .align JUMP_ALIGN // fallthrough 133*c0909341SAndroid Build Coastguard Worker80: // V - 8xN+ 134*c0909341SAndroid Build Coastguard Worker ldp q30, q31, [x13, #48] 135*c0909341SAndroid Build Coastguard Worker.ifc \type, prep 136*c0909341SAndroid Build Coastguard Worker add \wd_strd, \w, \w // d_strd = 2 * w 137*c0909341SAndroid Build Coastguard Worker.endif 138*c0909341SAndroid Build Coastguard Worker .align LOOP_ALIGN 139*c0909341SAndroid Build Coastguard Worker81: 140*c0909341SAndroid Build Coastguard Worker add \lsrc, \src, \s_strd, lsl #1 141*c0909341SAndroid Build Coastguard Worker 142*c0909341SAndroid Build Coastguard Worker ldr q16, [\src] 143*c0909341SAndroid Build Coastguard Worker ldr q17, [\src, \s_strd] 144*c0909341SAndroid Build Coastguard Worker ldr q18, [\lsrc] 145*c0909341SAndroid Build Coastguard Worker ldr q19, [\lsrc, \s_strd] 146*c0909341SAndroid Build Coastguard Worker add \lsrc, \lsrc, \s_strd, lsl #1 147*c0909341SAndroid Build Coastguard Worker mov \ldst, \dst 148*c0909341SAndroid Build Coastguard Worker 149*c0909341SAndroid Build Coastguard Worker ldr q20, [\lsrc] 150*c0909341SAndroid Build Coastguard Worker ldr q21, [\lsrc, \s_strd] 151*c0909341SAndroid Build Coastguard Worker add \lsrc, \lsrc, \s_strd, lsl #1 152*c0909341SAndroid Build Coastguard Worker ldr q22, [\lsrc] 153*c0909341SAndroid Build Coastguard Worker ldr q23, [\lsrc, \s_strd] 154*c0909341SAndroid Build Coastguard Worker add \lsrc, \lsrc, \s_strd, lsl #1 155*c0909341SAndroid Build Coastguard Worker sub w8, \h, #1 156*c0909341SAndroid Build Coastguard Worker 157*c0909341SAndroid Build Coastguard Worker zip1 v0.8h, v16.8h, v17.8h 158*c0909341SAndroid Build Coastguard Worker zip2 v1.8h, v16.8h, v17.8h 159*c0909341SAndroid Build Coastguard Worker zip1 v2.8h, v18.8h, v19.8h 160*c0909341SAndroid Build Coastguard Worker zip2 v3.8h, v18.8h, v19.8h 161*c0909341SAndroid Build Coastguard Worker 162*c0909341SAndroid Build Coastguard Worker zip1 v18.8h, v20.8h, v21.8h 163*c0909341SAndroid Build Coastguard Worker zip2 v21.8h, v20.8h, v21.8h 164*c0909341SAndroid Build Coastguard Worker zip1 v24.8h, v22.8h, v23.8h 165*c0909341SAndroid Build Coastguard Worker zip2 v27.8h, v22.8h, v23.8h 166*c0909341SAndroid Build Coastguard Worker 167*c0909341SAndroid Build Coastguard Worker zip1 v16.4s, v0.4s, v2.4s 168*c0909341SAndroid Build Coastguard Worker zip2 v19.4s, v0.4s, v2.4s 169*c0909341SAndroid Build Coastguard Worker zip1 v22.4s, v1.4s, v3.4s 170*c0909341SAndroid Build Coastguard Worker zip2 v25.4s, v1.4s, v3.4s 171*c0909341SAndroid Build Coastguard Worker 172*c0909341SAndroid Build Coastguard Worker zip1 v17.4s, v18.4s, v24.4s 173*c0909341SAndroid Build Coastguard Worker zip2 v20.4s, v18.4s, v24.4s 174*c0909341SAndroid Build Coastguard Worker zip1 v23.4s, v21.4s, v27.4s 175*c0909341SAndroid Build Coastguard Worker zip2 v26.4s, v21.4s, v27.4s 176*c0909341SAndroid Build Coastguard Worker 177*c0909341SAndroid Build Coastguard Worker .align LOOP_ALIGN 178*c0909341SAndroid Build Coastguard Worker8: 179*c0909341SAndroid Build Coastguard Worker ld1 {v18.16b}, [\lsrc], \s_strd 180*c0909341SAndroid Build Coastguard Worker 181*c0909341SAndroid Build Coastguard Worker movi v0.2d, #0 182*c0909341SAndroid Build Coastguard Worker movi v1.2d, #0 183*c0909341SAndroid Build Coastguard Worker movi v2.2d, #0 184*c0909341SAndroid Build Coastguard Worker movi v3.2d, #0 185*c0909341SAndroid Build Coastguard Worker mov v21.16b, v18.16b 186*c0909341SAndroid Build Coastguard Worker mov v24.16b, v18.16b 187*c0909341SAndroid Build Coastguard Worker mov v27.16b, v18.16b 188*c0909341SAndroid Build Coastguard Worker 189*c0909341SAndroid Build Coastguard Worker sdot z0.d, z16.h, z7.h[0] 190*c0909341SAndroid Build Coastguard Worker tbl v16.16b, {v16.16b, v17.16b}, v6.16b 191*c0909341SAndroid Build Coastguard Worker sdot z1.d, z19.h, z7.h[0] 192*c0909341SAndroid Build Coastguard Worker tbl v19.16b, {v19.16b, v20.16b}, v6.16b 193*c0909341SAndroid Build Coastguard Worker sdot z2.d, z22.h, z7.h[0] 194*c0909341SAndroid Build Coastguard Worker tbl v22.16b, {v22.16b, v23.16b}, v6.16b 195*c0909341SAndroid Build Coastguard Worker subs w8, w8, #1 196*c0909341SAndroid Build Coastguard Worker sdot z3.d, z25.h, z7.h[0] 197*c0909341SAndroid Build Coastguard Worker tbl v25.16b, {v25.16b, v26.16b}, v6.16b 198*c0909341SAndroid Build Coastguard Worker 199*c0909341SAndroid Build Coastguard Worker sdot z0.d, z17.h, z7.h[1] 200*c0909341SAndroid Build Coastguard Worker tbl v17.16b, {v17.16b, v18.16b}, v28.16b 201*c0909341SAndroid Build Coastguard Worker sdot z1.d, z20.h, z7.h[1] 202*c0909341SAndroid Build Coastguard Worker tbl v20.16b, {v20.16b, v21.16b}, v29.16b 203*c0909341SAndroid Build Coastguard Worker sdot z2.d, z23.h, z7.h[1] 204*c0909341SAndroid Build Coastguard Worker tbl v23.16b, {v23.16b, v24.16b}, v30.16b 205*c0909341SAndroid Build Coastguard Worker sdot z3.d, z26.h, z7.h[1] 206*c0909341SAndroid Build Coastguard Worker tbl v26.16b, {v26.16b, v27.16b}, v31.16b 207*c0909341SAndroid Build Coastguard Worker 208*c0909341SAndroid Build Coastguard Worker uzp1 v0.4s, v0.4s, v1.4s 209*c0909341SAndroid Build Coastguard Worker uzp1 v1.4s, v2.4s, v3.4s 210*c0909341SAndroid Build Coastguard Worker.ifc \type, prep 211*c0909341SAndroid Build Coastguard Worker srshl v0.4s, v0.4s, v5.4s 212*c0909341SAndroid Build Coastguard Worker srshl v1.4s, v1.4s, v5.4s 213*c0909341SAndroid Build Coastguard Worker uzp1 v0.8h, v0.8h, v1.8h 214*c0909341SAndroid Build Coastguard Worker sub z0.h, z0.h, #PREP_BIAS 215*c0909341SAndroid Build Coastguard Worker.else // put 216*c0909341SAndroid Build Coastguard Worker sqrshrun v0.4h, v0.4s, #6 217*c0909341SAndroid Build Coastguard Worker sqrshrun2 v0.8h, v1.4s, #6 218*c0909341SAndroid Build Coastguard Worker umin v0.8h, v0.8h, v5.8h 219*c0909341SAndroid Build Coastguard Worker.endif 220*c0909341SAndroid Build Coastguard Worker st1 {v0.16b}, [\ldst], \d_strd 221*c0909341SAndroid Build Coastguard Worker b.gt 8b 222*c0909341SAndroid Build Coastguard Worker 223*c0909341SAndroid Build Coastguard Worker movi v0.2d, #0 224*c0909341SAndroid Build Coastguard Worker movi v1.2d, #0 225*c0909341SAndroid Build Coastguard Worker movi v2.2d, #0 226*c0909341SAndroid Build Coastguard Worker movi v3.2d, #0 227*c0909341SAndroid Build Coastguard Worker 228*c0909341SAndroid Build Coastguard Worker sdot z0.d, z16.h, z7.h[0] 229*c0909341SAndroid Build Coastguard Worker sdot z1.d, z19.h, z7.h[0] 230*c0909341SAndroid Build Coastguard Worker sdot z2.d, z22.h, z7.h[0] 231*c0909341SAndroid Build Coastguard Worker sdot z3.d, z25.h, z7.h[0] 232*c0909341SAndroid Build Coastguard Worker 233*c0909341SAndroid Build Coastguard Worker sdot z0.d, z17.h, z7.h[1] 234*c0909341SAndroid Build Coastguard Worker sdot z1.d, z20.h, z7.h[1] 235*c0909341SAndroid Build Coastguard Worker sdot z2.d, z23.h, z7.h[1] 236*c0909341SAndroid Build Coastguard Worker sdot z3.d, z26.h, z7.h[1] 237*c0909341SAndroid Build Coastguard Worker subs \w, \w, #8 238*c0909341SAndroid Build Coastguard Worker 239*c0909341SAndroid Build Coastguard Worker uzp1 v0.4s, v0.4s, v1.4s 240*c0909341SAndroid Build Coastguard Worker uzp1 v1.4s, v2.4s, v3.4s 241*c0909341SAndroid Build Coastguard Worker.ifc \type, prep 242*c0909341SAndroid Build Coastguard Worker srshl v0.4s, v0.4s, v5.4s 243*c0909341SAndroid Build Coastguard Worker srshl v1.4s, v1.4s, v5.4s 244*c0909341SAndroid Build Coastguard Worker uzp1 v0.8h, v0.8h, v1.8h 245*c0909341SAndroid Build Coastguard Worker sub z0.h, z0.h, #PREP_BIAS 246*c0909341SAndroid Build Coastguard Worker.else // put 247*c0909341SAndroid Build Coastguard Worker sqrshrun v0.4h, v0.4s, #6 248*c0909341SAndroid Build Coastguard Worker sqrshrun2 v0.8h, v1.4s, #6 249*c0909341SAndroid Build Coastguard Worker umin v0.8h, v0.8h, v5.8h 250*c0909341SAndroid Build Coastguard Worker.endif 251*c0909341SAndroid Build Coastguard Worker str q0, [\ldst] 252*c0909341SAndroid Build Coastguard Worker 253*c0909341SAndroid Build Coastguard Worker add \dst, \dst, #16 254*c0909341SAndroid Build Coastguard Worker add \src, \src, #16 255*c0909341SAndroid Build Coastguard Worker b.gt 81b 256*c0909341SAndroid Build Coastguard Worker ret 257*c0909341SAndroid Build Coastguard Worker 258*c0909341SAndroid Build Coastguard Worker .align JUMP_ALIGN 259*c0909341SAndroid Build Coastguard Worker40: // V - 4xN, put only: 2xN 260*c0909341SAndroid Build Coastguard Worker.ifc \type, put 261*c0909341SAndroid Build Coastguard Worker lsr \d_strd, \d_strd, #1 // hword index for `st1h` 262*c0909341SAndroid Build Coastguard Worker whilelt p1.h, wzr, \w // masking for writes 263*c0909341SAndroid Build Coastguard Worker.endif 264*c0909341SAndroid Build Coastguard Worker cmp \h, #4 265*c0909341SAndroid Build Coastguard Worker b.le 44f 266*c0909341SAndroid Build Coastguard Worker 267*c0909341SAndroid Build Coastguard Worker ldr d16, [\src] 268*c0909341SAndroid Build Coastguard Worker ldr d17, [\src, \s_strd] 269*c0909341SAndroid Build Coastguard Worker add \src, \src, \s_strd, lsl #1 270*c0909341SAndroid Build Coastguard Worker ldr d18, [\src] 271*c0909341SAndroid Build Coastguard Worker ldr d19, [\src, \s_strd] 272*c0909341SAndroid Build Coastguard Worker add \src, \src, \s_strd, lsl #1 273*c0909341SAndroid Build Coastguard Worker 274*c0909341SAndroid Build Coastguard Worker ldr d20, [\src] 275*c0909341SAndroid Build Coastguard Worker ldr d21, [\src, \s_strd] 276*c0909341SAndroid Build Coastguard Worker add \src, \src, \s_strd, lsl #1 277*c0909341SAndroid Build Coastguard Worker ldr d22, [\src] 278*c0909341SAndroid Build Coastguard Worker ldr d23, [\src, \s_strd] 279*c0909341SAndroid Build Coastguard Worker add \src, \src, \s_strd, lsl #1 280*c0909341SAndroid Build Coastguard Worker sub \h, \h, #2 281*c0909341SAndroid Build Coastguard Worker 282*c0909341SAndroid Build Coastguard Worker zip1 v0.8h, v16.8h, v17.8h 283*c0909341SAndroid Build Coastguard Worker zip1 v2.8h, v18.8h, v19.8h 284*c0909341SAndroid Build Coastguard Worker zip1 v18.8h, v20.8h, v21.8h 285*c0909341SAndroid Build Coastguard Worker zip1 v24.8h, v22.8h, v23.8h 286*c0909341SAndroid Build Coastguard Worker 287*c0909341SAndroid Build Coastguard Worker zip1 v16.4s, v0.4s, v2.4s 288*c0909341SAndroid Build Coastguard Worker zip2 v19.4s, v0.4s, v2.4s 289*c0909341SAndroid Build Coastguard Worker zip1 v17.4s, v18.4s, v24.4s 290*c0909341SAndroid Build Coastguard Worker zip2 v20.4s, v18.4s, v24.4s 291*c0909341SAndroid Build Coastguard Worker 292*c0909341SAndroid Build Coastguard Worker .align LOOP_ALIGN 293*c0909341SAndroid Build Coastguard Worker4: 294*c0909341SAndroid Build Coastguard Worker ldr d18, [\src] 295*c0909341SAndroid Build Coastguard Worker ldr d24, [\src, \s_strd] 296*c0909341SAndroid Build Coastguard Worker add \src, \src, \s_strd, lsl #1 297*c0909341SAndroid Build Coastguard Worker 298*c0909341SAndroid Build Coastguard Worker movi v0.2d, #0 299*c0909341SAndroid Build Coastguard Worker movi v1.2d, #0 300*c0909341SAndroid Build Coastguard Worker movi v2.2d, #0 301*c0909341SAndroid Build Coastguard Worker movi v3.2d, #0 302*c0909341SAndroid Build Coastguard Worker mov v21.16b, v18.16b 303*c0909341SAndroid Build Coastguard Worker mov v27.16b, v24.16b 304*c0909341SAndroid Build Coastguard Worker 305*c0909341SAndroid Build Coastguard Worker sdot z0.d, z16.h, z7.h[0] 306*c0909341SAndroid Build Coastguard Worker tbl v22.16b, {v16.16b, v17.16b}, v6.16b 307*c0909341SAndroid Build Coastguard Worker sdot z1.d, z19.h, z7.h[0] 308*c0909341SAndroid Build Coastguard Worker tbl v25.16b, {v19.16b, v20.16b}, v6.16b 309*c0909341SAndroid Build Coastguard Worker sdot z0.d, z17.h, z7.h[1] 310*c0909341SAndroid Build Coastguard Worker tbl v23.16b, {v17.16b, v18.16b}, v28.16b 311*c0909341SAndroid Build Coastguard Worker sdot z1.d, z20.h, z7.h[1] 312*c0909341SAndroid Build Coastguard Worker tbl v26.16b, {v20.16b, v21.16b}, v29.16b 313*c0909341SAndroid Build Coastguard Worker subs \h, \h, #2 314*c0909341SAndroid Build Coastguard Worker 315*c0909341SAndroid Build Coastguard Worker sdot z2.d, z22.h, z7.h[0] 316*c0909341SAndroid Build Coastguard Worker tbl v16.16b, {v22.16b, v23.16b}, v6.16b 317*c0909341SAndroid Build Coastguard Worker sdot z3.d, z25.h, z7.h[0] 318*c0909341SAndroid Build Coastguard Worker tbl v19.16b, {v25.16b, v26.16b}, v6.16b 319*c0909341SAndroid Build Coastguard Worker sdot z2.d, z23.h, z7.h[1] 320*c0909341SAndroid Build Coastguard Worker tbl v17.16b, {v23.16b, v24.16b}, v28.16b 321*c0909341SAndroid Build Coastguard Worker sdot z3.d, z26.h, z7.h[1] 322*c0909341SAndroid Build Coastguard Worker tbl v20.16b, {v26.16b, v27.16b}, v29.16b 323*c0909341SAndroid Build Coastguard Worker 324*c0909341SAndroid Build Coastguard Worker uzp1 v0.4s, v0.4s, v1.4s 325*c0909341SAndroid Build Coastguard Worker uzp1 v1.4s, v2.4s, v3.4s 326*c0909341SAndroid Build Coastguard Worker.ifc \type, prep 327*c0909341SAndroid Build Coastguard Worker srshl v0.4s, v0.4s, v5.4s 328*c0909341SAndroid Build Coastguard Worker srshl v1.4s, v1.4s, v5.4s 329*c0909341SAndroid Build Coastguard Worker uzp1 v0.8h, v0.8h, v1.8h 330*c0909341SAndroid Build Coastguard Worker sub z0.h, z0.h, #PREP_BIAS 331*c0909341SAndroid Build Coastguard Worker str q0, [\dst], #16 332*c0909341SAndroid Build Coastguard Worker.else // put 333*c0909341SAndroid Build Coastguard Worker sqrshrun v0.4h, v0.4s, #6 334*c0909341SAndroid Build Coastguard Worker sqrshrun v1.4h, v1.4s, #6 335*c0909341SAndroid Build Coastguard Worker umin v0.4h, v0.4h, v5.4h 336*c0909341SAndroid Build Coastguard Worker umin v1.4h, v1.4h, v5.4h 337*c0909341SAndroid Build Coastguard Worker st1h {z0.h}, p1, [\dst] 338*c0909341SAndroid Build Coastguard Worker st1h {z1.h}, p1, [\dst, \d_strd, lsl #1] 339*c0909341SAndroid Build Coastguard Worker add \dst, \dst, \d_strd, lsl #2 340*c0909341SAndroid Build Coastguard Worker.endif 341*c0909341SAndroid Build Coastguard Worker b.gt 4b 342*c0909341SAndroid Build Coastguard Worker 343*c0909341SAndroid Build Coastguard Worker ldr d18, [\src] 344*c0909341SAndroid Build Coastguard Worker 345*c0909341SAndroid Build Coastguard Worker movi v0.2d, #0 346*c0909341SAndroid Build Coastguard Worker movi v1.2d, #0 347*c0909341SAndroid Build Coastguard Worker movi v2.2d, #0 348*c0909341SAndroid Build Coastguard Worker movi v3.2d, #0 349*c0909341SAndroid Build Coastguard Worker mov v21.16b, v18.16b 350*c0909341SAndroid Build Coastguard Worker 351*c0909341SAndroid Build Coastguard Worker sdot z0.d, z16.h, z7.h[0] 352*c0909341SAndroid Build Coastguard Worker tbl v22.16b, {v16.16b, v17.16b}, v6.16b 353*c0909341SAndroid Build Coastguard Worker sdot z1.d, z19.h, z7.h[0] 354*c0909341SAndroid Build Coastguard Worker tbl v25.16b, {v19.16b, v20.16b}, v6.16b 355*c0909341SAndroid Build Coastguard Worker sdot z0.d, z17.h, z7.h[1] 356*c0909341SAndroid Build Coastguard Worker tbl v23.16b, {v17.16b, v18.16b}, v28.16b 357*c0909341SAndroid Build Coastguard Worker sdot z1.d, z20.h, z7.h[1] 358*c0909341SAndroid Build Coastguard Worker tbl v26.16b, {v20.16b, v21.16b}, v29.16b 359*c0909341SAndroid Build Coastguard Worker 360*c0909341SAndroid Build Coastguard Worker sdot z2.d, z22.h, z7.h[0] 361*c0909341SAndroid Build Coastguard Worker sdot z3.d, z25.h, z7.h[0] 362*c0909341SAndroid Build Coastguard Worker sdot z2.d, z23.h, z7.h[1] 363*c0909341SAndroid Build Coastguard Worker sdot z3.d, z26.h, z7.h[1] 364*c0909341SAndroid Build Coastguard Worker 365*c0909341SAndroid Build Coastguard Worker uzp1 v0.4s, v0.4s, v1.4s 366*c0909341SAndroid Build Coastguard Worker uzp1 v1.4s, v2.4s, v3.4s 367*c0909341SAndroid Build Coastguard Worker.ifc \type, prep 368*c0909341SAndroid Build Coastguard Worker srshl v0.4s, v0.4s, v5.4s 369*c0909341SAndroid Build Coastguard Worker srshl v1.4s, v1.4s, v5.4s 370*c0909341SAndroid Build Coastguard Worker uzp1 v0.8h, v0.8h, v1.8h 371*c0909341SAndroid Build Coastguard Worker sub z0.h, z0.h, #PREP_BIAS 372*c0909341SAndroid Build Coastguard Worker str q0, [\dst] 373*c0909341SAndroid Build Coastguard Worker.else // put 374*c0909341SAndroid Build Coastguard Worker sqrshrun v0.4h, v0.4s, #6 375*c0909341SAndroid Build Coastguard Worker sqrshrun v1.4h, v1.4s, #6 376*c0909341SAndroid Build Coastguard Worker umin v0.4h, v0.4h, v5.4h 377*c0909341SAndroid Build Coastguard Worker umin v1.4h, v1.4h, v5.4h 378*c0909341SAndroid Build Coastguard Worker st1h {z0.h}, p1, [\dst] 379*c0909341SAndroid Build Coastguard Worker st1h {z1.h}, p1, [\dst, \d_strd, lsl #1] 380*c0909341SAndroid Build Coastguard Worker.endif 381*c0909341SAndroid Build Coastguard Worker ret 382*c0909341SAndroid Build Coastguard Worker 383*c0909341SAndroid Build Coastguard Worker .align JUMP_ALIGN 384*c0909341SAndroid Build Coastguard Worker44: // V - 4x4, put only: 4x2, 2x4, 2x2 385*c0909341SAndroid Build Coastguard Worker add \src, \src, \s_strd, lsl #1 // src - s_strd 386*c0909341SAndroid Build Coastguard Worker subs \h, \h, #2 387*c0909341SAndroid Build Coastguard Worker 388*c0909341SAndroid Build Coastguard Worker ldr d16, [\src] 389*c0909341SAndroid Build Coastguard Worker ldr d17, [\src, \s_strd] 390*c0909341SAndroid Build Coastguard Worker add \src, \src, \s_strd, lsl #1 391*c0909341SAndroid Build Coastguard Worker ldr d18, [\src] 392*c0909341SAndroid Build Coastguard Worker ldr d19, [\src, \s_strd] 393*c0909341SAndroid Build Coastguard Worker add \src, \src, \s_strd, lsl #1 394*c0909341SAndroid Build Coastguard Worker 395*c0909341SAndroid Build Coastguard Worker ext v7.16b, v7.16b, v7.16b, #4 // [\xmy + 2 * 2] 396*c0909341SAndroid Build Coastguard Worker 397*c0909341SAndroid Build Coastguard Worker zip1 v0.8h, v16.8h, v17.8h 398*c0909341SAndroid Build Coastguard Worker zip1 v2.8h, v18.8h, v19.8h 399*c0909341SAndroid Build Coastguard Worker zip1 v16.4s, v0.4s, v2.4s 400*c0909341SAndroid Build Coastguard Worker zip2 v19.4s, v0.4s, v2.4s 401*c0909341SAndroid Build Coastguard Worker 402*c0909341SAndroid Build Coastguard Worker.ifc \type, put 403*c0909341SAndroid Build Coastguard Worker b.eq 42f 404*c0909341SAndroid Build Coastguard Worker.endif 405*c0909341SAndroid Build Coastguard Worker ldr d17, [\src] 406*c0909341SAndroid Build Coastguard Worker ldr d23, [\src, \s_strd] 407*c0909341SAndroid Build Coastguard Worker add \src, \src, \s_strd, lsl #1 408*c0909341SAndroid Build Coastguard Worker 409*c0909341SAndroid Build Coastguard Worker movi v0.2d, #0 410*c0909341SAndroid Build Coastguard Worker movi v1.2d, #0 411*c0909341SAndroid Build Coastguard Worker movi v2.2d, #0 412*c0909341SAndroid Build Coastguard Worker movi v3.2d, #0 413*c0909341SAndroid Build Coastguard Worker mov v20.16b, v17.16b 414*c0909341SAndroid Build Coastguard Worker mov v26.16b, v23.16b 415*c0909341SAndroid Build Coastguard Worker 416*c0909341SAndroid Build Coastguard Worker sdot z0.d, z16.h, z7.h[0] 417*c0909341SAndroid Build Coastguard Worker tbl v22.16b, {v16.16b, v17.16b}, v28.16b 418*c0909341SAndroid Build Coastguard Worker sdot z1.d, z19.h, z7.h[0] 419*c0909341SAndroid Build Coastguard Worker tbl v25.16b, {v19.16b, v20.16b}, v29.16b 420*c0909341SAndroid Build Coastguard Worker sdot z2.d, z22.h, z7.h[0] 421*c0909341SAndroid Build Coastguard Worker tbl v16.16b, {v22.16b, v23.16b}, v28.16b 422*c0909341SAndroid Build Coastguard Worker sdot z3.d, z25.h, z7.h[0] 423*c0909341SAndroid Build Coastguard Worker tbl v19.16b, {v25.16b, v26.16b}, v29.16b 424*c0909341SAndroid Build Coastguard Worker 425*c0909341SAndroid Build Coastguard Worker uzp1 v0.4s, v0.4s, v1.4s 426*c0909341SAndroid Build Coastguard Worker uzp1 v1.4s, v2.4s, v3.4s 427*c0909341SAndroid Build Coastguard Worker.ifc \type, prep 428*c0909341SAndroid Build Coastguard Worker srshl v0.4s, v0.4s, v5.4s 429*c0909341SAndroid Build Coastguard Worker srshl v1.4s, v1.4s, v5.4s 430*c0909341SAndroid Build Coastguard Worker uzp1 v0.8h, v0.8h, v1.8h 431*c0909341SAndroid Build Coastguard Worker sub z0.h, z0.h, #PREP_BIAS 432*c0909341SAndroid Build Coastguard Worker str q0, [\dst], #16 433*c0909341SAndroid Build Coastguard Worker.else // put 434*c0909341SAndroid Build Coastguard Worker sqrshrun v0.4h, v0.4s, #6 435*c0909341SAndroid Build Coastguard Worker sqrshrun v1.4h, v1.4s, #6 436*c0909341SAndroid Build Coastguard Worker umin v0.4h, v0.4h, v5.4h 437*c0909341SAndroid Build Coastguard Worker umin v1.4h, v1.4h, v5.4h 438*c0909341SAndroid Build Coastguard Worker st1h {z0.h}, p1, [\dst] 439*c0909341SAndroid Build Coastguard Worker st1h {z1.h}, p1, [\dst, \d_strd, lsl #1] 440*c0909341SAndroid Build Coastguard Worker add \dst, \dst, \d_strd, lsl #2 441*c0909341SAndroid Build Coastguard Worker.endif 442*c0909341SAndroid Build Coastguard Worker 443*c0909341SAndroid Build Coastguard Worker.ifc \type, put 444*c0909341SAndroid Build Coastguard Worker .align JUMP_ALIGN 445*c0909341SAndroid Build Coastguard Worker42: 446*c0909341SAndroid Build Coastguard Worker.endif 447*c0909341SAndroid Build Coastguard Worker ldr d17, [\src] 448*c0909341SAndroid Build Coastguard Worker 449*c0909341SAndroid Build Coastguard Worker movi v0.2d, #0 450*c0909341SAndroid Build Coastguard Worker movi v1.2d, #0 451*c0909341SAndroid Build Coastguard Worker movi v2.2d, #0 452*c0909341SAndroid Build Coastguard Worker movi v3.2d, #0 453*c0909341SAndroid Build Coastguard Worker mov v20.16b, v17.16b 454*c0909341SAndroid Build Coastguard Worker 455*c0909341SAndroid Build Coastguard Worker sdot z0.d, z16.h, z7.h[0] 456*c0909341SAndroid Build Coastguard Worker tbl v22.16b, {v16.16b, v17.16b}, v28.16b 457*c0909341SAndroid Build Coastguard Worker sdot z1.d, z19.h, z7.h[0] 458*c0909341SAndroid Build Coastguard Worker tbl v25.16b, {v19.16b, v20.16b}, v29.16b 459*c0909341SAndroid Build Coastguard Worker 460*c0909341SAndroid Build Coastguard Worker sdot z2.d, z22.h, z7.h[0] 461*c0909341SAndroid Build Coastguard Worker sdot z3.d, z25.h, z7.h[0] 462*c0909341SAndroid Build Coastguard Worker 463*c0909341SAndroid Build Coastguard Worker uzp1 v0.4s, v0.4s, v1.4s 464*c0909341SAndroid Build Coastguard Worker uzp1 v1.4s, v2.4s, v3.4s 465*c0909341SAndroid Build Coastguard Worker.ifc \type, prep 466*c0909341SAndroid Build Coastguard Worker srshl v0.4s, v0.4s, v5.4s 467*c0909341SAndroid Build Coastguard Worker srshl v1.4s, v1.4s, v5.4s 468*c0909341SAndroid Build Coastguard Worker uzp1 v0.8h, v0.8h, v1.8h 469*c0909341SAndroid Build Coastguard Worker sub z0.h, z0.h, #PREP_BIAS 470*c0909341SAndroid Build Coastguard Worker str q0, [\dst] 471*c0909341SAndroid Build Coastguard Worker.else // put 472*c0909341SAndroid Build Coastguard Worker sqrshrun v0.4h, v0.4s, #6 473*c0909341SAndroid Build Coastguard Worker sqrshrun v1.4h, v1.4s, #6 474*c0909341SAndroid Build Coastguard Worker umin v0.4h, v0.4h, v5.4h 475*c0909341SAndroid Build Coastguard Worker umin v1.4h, v1.4h, v5.4h 476*c0909341SAndroid Build Coastguard Worker st1h {z0.h}, p1, [\dst] 477*c0909341SAndroid Build Coastguard Worker st1h {z1.h}, p1, [\dst, \d_strd, lsl #1] 478*c0909341SAndroid Build Coastguard Worker.endif 479*c0909341SAndroid Build Coastguard Worker ret 480*c0909341SAndroid Build Coastguard Worker 481*c0909341SAndroid Build Coastguard Worker .align JUMP_ALIGN 482*c0909341SAndroid Build Coastguard WorkerL(\type\()_8tap_h_hv_\isa): 483*c0909341SAndroid Build Coastguard Worker madd \mx, \mx, w11, w9 484*c0909341SAndroid Build Coastguard Worker movrel x13, h_tbl_sve 485*c0909341SAndroid Build Coastguard Worker sub \src, \src, #6 // src - 3 * 2 486*c0909341SAndroid Build Coastguard Worker ubfx w9, \mx, #7, #7 487*c0909341SAndroid Build Coastguard Worker and \mx, \mx, #0x7F 488*c0909341SAndroid Build Coastguard Worker cmp \w, #4 489*c0909341SAndroid Build Coastguard Worker csel \mx, \mx, w9, le 490*c0909341SAndroid Build Coastguard Worker ldp q30, q31, [x13] 491*c0909341SAndroid Build Coastguard Worker add \xmx, x12, \xmx, lsl #3 // subpel H filter address 492*c0909341SAndroid Build Coastguard Worker cbz \my, L(\type\()_8tap_h_\isa) 493*c0909341SAndroid Build Coastguard Worker 494*c0909341SAndroid Build Coastguard Worker // HV cases 495*c0909341SAndroid Build Coastguard Worker madd w14, \my, w11, w10 496*c0909341SAndroid Build Coastguard Worker.ifc \bdmax, w8 497*c0909341SAndroid Build Coastguard Worker ldr \bdmax, [sp] 498*c0909341SAndroid Build Coastguard Worker.endif 499*c0909341SAndroid Build Coastguard Worker ubfx w11, w14, #7, #7 500*c0909341SAndroid Build Coastguard Worker and w14, w14, #0x7F 501*c0909341SAndroid Build Coastguard Worker ld1sb {z4.h}, p0/z, [\xmx] 502*c0909341SAndroid Build Coastguard Worker cmp \h, #4 503*c0909341SAndroid Build Coastguard Worker csel w14, w14, w11, le 504*c0909341SAndroid Build Coastguard Worker.ifc \type, put 505*c0909341SAndroid Build Coastguard Worker dup v29.8h, \bdmax 506*c0909341SAndroid Build Coastguard Worker.endif 507*c0909341SAndroid Build Coastguard Worker clz \bdmax, \bdmax 508*c0909341SAndroid Build Coastguard Worker add \xmy, x12, x14, lsl #3 // subpel V filter address 509*c0909341SAndroid Build Coastguard Worker ld1sb {z7.h}, p0/z, [\xmy] 510*c0909341SAndroid Build Coastguard Worker.ifc \type, put 511*c0909341SAndroid Build Coastguard Worker mov w9, #12 512*c0909341SAndroid Build Coastguard Worker sub w9, w9, \bdmax 513*c0909341SAndroid Build Coastguard Worker dup v6.4s, w9 514*c0909341SAndroid Build Coastguard Worker.endif 515*c0909341SAndroid Build Coastguard Worker sub \bdmax, \bdmax, #24 516*c0909341SAndroid Build Coastguard Worker mov x15, x30 517*c0909341SAndroid Build Coastguard Worker sub \src, \src, \s_strd // src - s_strd - 3 * 2 518*c0909341SAndroid Build Coastguard Worker dup v5.4s, \bdmax 519*c0909341SAndroid Build Coastguard Worker cmp w10, SHARP1 520*c0909341SAndroid Build Coastguard Worker b.ne L(\type\()_6tap_hv_\isa) // vertical != SHARP1 521*c0909341SAndroid Build Coastguard Worker 522*c0909341SAndroid Build Coastguard Worker // HV 8-tap cases 523*c0909341SAndroid Build Coastguard Worker cmp \w, #4 524*c0909341SAndroid Build Coastguard Worker b.le 40f 525*c0909341SAndroid Build Coastguard Worker 526*c0909341SAndroid Build Coastguard Worker // .align JUMP_ALIGN // fallthrough 527*c0909341SAndroid Build Coastguard Worker80: // HV8 - 8xN+ 528*c0909341SAndroid Build Coastguard Worker.ifc \type, prep 529*c0909341SAndroid Build Coastguard Worker add \wd_strd, \w, \w // d_strd = 2 * w 530*c0909341SAndroid Build Coastguard Worker.endif 531*c0909341SAndroid Build Coastguard Worker cmp \h, #4 532*c0909341SAndroid Build Coastguard Worker b.le 84f 533*c0909341SAndroid Build Coastguard Worker sub \src, \src, \s_strd, lsl #1 // src - 3 * s_strd - 3 * 2 534*c0909341SAndroid Build Coastguard Worker 535*c0909341SAndroid Build Coastguard Worker .align LOOP_ALIGN 536*c0909341SAndroid Build Coastguard Worker81: 537*c0909341SAndroid Build Coastguard Worker mov \lsrc, \src 538*c0909341SAndroid Build Coastguard Worker mov \ldst, \dst 539*c0909341SAndroid Build Coastguard Worker mov w8, \h 540*c0909341SAndroid Build Coastguard Worker 541*c0909341SAndroid Build Coastguard Worker bl L(\type\()_hv_filter8_\isa) 542*c0909341SAndroid Build Coastguard Worker uzp1 v16.8h, v23.8h, v24.8h 543*c0909341SAndroid Build Coastguard Worker bl L(\type\()_hv_filter8_\isa) 544*c0909341SAndroid Build Coastguard Worker uzp1 v17.8h, v23.8h, v24.8h 545*c0909341SAndroid Build Coastguard Worker bl L(\type\()_hv_filter8_\isa) 546*c0909341SAndroid Build Coastguard Worker uzp1 v18.8h, v23.8h, v24.8h 547*c0909341SAndroid Build Coastguard Worker bl L(\type\()_hv_filter8_\isa) 548*c0909341SAndroid Build Coastguard Worker uzp1 v19.8h, v23.8h, v24.8h 549*c0909341SAndroid Build Coastguard Worker bl L(\type\()_hv_filter8_\isa) 550*c0909341SAndroid Build Coastguard Worker uzp1 v20.8h, v23.8h, v24.8h 551*c0909341SAndroid Build Coastguard Worker bl L(\type\()_hv_filter8_\isa) 552*c0909341SAndroid Build Coastguard Worker uzp1 v21.8h, v23.8h, v24.8h 553*c0909341SAndroid Build Coastguard Worker bl L(\type\()_hv_filter8_\isa) 554*c0909341SAndroid Build Coastguard Worker uzp1 v22.8h, v23.8h, v24.8h 555*c0909341SAndroid Build Coastguard Worker 556*c0909341SAndroid Build Coastguard Worker .align LOOP_ALIGN 557*c0909341SAndroid Build Coastguard Worker8: 558*c0909341SAndroid Build Coastguard Worker ldp q24, q28, [\lsrc] 559*c0909341SAndroid Build Coastguard Worker smull v0.4s, v16.4h, v7.h[0] 560*c0909341SAndroid Build Coastguard Worker smull2 v1.4s, v16.8h, v7.h[0] 561*c0909341SAndroid Build Coastguard Worker mov v16.16b, v17.16b 562*c0909341SAndroid Build Coastguard Worker 563*c0909341SAndroid Build Coastguard Worker movi v2.2d, #0 564*c0909341SAndroid Build Coastguard Worker movi v3.2d, #0 565*c0909341SAndroid Build Coastguard Worker tbl v23.16b, {v24.16b}, v30.16b 566*c0909341SAndroid Build Coastguard Worker tbl v24.16b, {v24.16b}, v31.16b 567*c0909341SAndroid Build Coastguard Worker 568*c0909341SAndroid Build Coastguard Worker ldur q26, [\lsrc, #8] 569*c0909341SAndroid Build Coastguard Worker smlal v0.4s, v17.4h, v7.h[1] 570*c0909341SAndroid Build Coastguard Worker smlal2 v1.4s, v17.8h, v7.h[1] 571*c0909341SAndroid Build Coastguard Worker mov v17.16b, v18.16b 572*c0909341SAndroid Build Coastguard Worker add \lsrc, \lsrc, \s_strd 573*c0909341SAndroid Build Coastguard Worker 574*c0909341SAndroid Build Coastguard Worker sdot z2.d, z23.h, z4.h[0] 575*c0909341SAndroid Build Coastguard Worker sdot z3.d, z24.h, z4.h[0] 576*c0909341SAndroid Build Coastguard Worker movi v23.2d, #0 577*c0909341SAndroid Build Coastguard Worker movi v24.2d, #0 578*c0909341SAndroid Build Coastguard Worker tbl v25.16b, {v26.16b}, v30.16b 579*c0909341SAndroid Build Coastguard Worker tbl v26.16b, {v26.16b}, v31.16b 580*c0909341SAndroid Build Coastguard Worker smlal v0.4s, v18.4h, v7.h[2] 581*c0909341SAndroid Build Coastguard Worker smlal2 v1.4s, v18.8h, v7.h[2] 582*c0909341SAndroid Build Coastguard Worker mov v18.16b, v19.16b 583*c0909341SAndroid Build Coastguard Worker 584*c0909341SAndroid Build Coastguard Worker sdot z23.d, z25.h, z4.h[0] 585*c0909341SAndroid Build Coastguard Worker sdot z24.d, z26.h, z4.h[0] 586*c0909341SAndroid Build Coastguard Worker tbl v27.16b, {v28.16b}, v30.16b 587*c0909341SAndroid Build Coastguard Worker tbl v28.16b, {v28.16b}, v31.16b 588*c0909341SAndroid Build Coastguard Worker smlal v0.4s, v19.4h, v7.h[3] 589*c0909341SAndroid Build Coastguard Worker smlal2 v1.4s, v19.8h, v7.h[3] 590*c0909341SAndroid Build Coastguard Worker mov v19.16b, v20.16b 591*c0909341SAndroid Build Coastguard Worker 592*c0909341SAndroid Build Coastguard Worker subs w8, w8, #1 593*c0909341SAndroid Build Coastguard Worker sdot z2.d, z25.h, z4.h[1] 594*c0909341SAndroid Build Coastguard Worker sdot z3.d, z26.h, z4.h[1] 595*c0909341SAndroid Build Coastguard Worker sdot z23.d, z27.h, z4.h[1] 596*c0909341SAndroid Build Coastguard Worker sdot z24.d, z28.h, z4.h[1] 597*c0909341SAndroid Build Coastguard Worker 598*c0909341SAndroid Build Coastguard Worker smlal v0.4s, v20.4h, v7.h[4] 599*c0909341SAndroid Build Coastguard Worker smlal2 v1.4s, v20.8h, v7.h[4] 600*c0909341SAndroid Build Coastguard Worker mov v20.16b, v21.16b 601*c0909341SAndroid Build Coastguard Worker 602*c0909341SAndroid Build Coastguard Worker uzp1 v3.4s, v2.4s, v3.4s 603*c0909341SAndroid Build Coastguard Worker uzp1 v24.4s, v23.4s, v24.4s 604*c0909341SAndroid Build Coastguard Worker smlal v0.4s, v21.4h, v7.h[5] 605*c0909341SAndroid Build Coastguard Worker smlal2 v1.4s, v21.8h, v7.h[5] 606*c0909341SAndroid Build Coastguard Worker mov v21.16b, v22.16b 607*c0909341SAndroid Build Coastguard Worker 608*c0909341SAndroid Build Coastguard Worker srshl v23.4s, v3.4s, v5.4s 609*c0909341SAndroid Build Coastguard Worker srshl v24.4s, v24.4s, v5.4s 610*c0909341SAndroid Build Coastguard Worker smlal v0.4s, v22.4h, v7.h[6] 611*c0909341SAndroid Build Coastguard Worker smlal2 v1.4s, v22.8h, v7.h[6] 612*c0909341SAndroid Build Coastguard Worker 613*c0909341SAndroid Build Coastguard Worker uzp1 v22.8h, v23.8h, v24.8h 614*c0909341SAndroid Build Coastguard Worker smlal v0.4s, v22.4h, v7.h[7] 615*c0909341SAndroid Build Coastguard Worker smlal2 v1.4s, v22.8h, v7.h[7] 616*c0909341SAndroid Build Coastguard Worker 617*c0909341SAndroid Build Coastguard Worker.ifc \type, prep 618*c0909341SAndroid Build Coastguard Worker rshrn v0.4h, v0.4s, #6 619*c0909341SAndroid Build Coastguard Worker rshrn2 v0.8h, v1.4s, #6 620*c0909341SAndroid Build Coastguard Worker sub z0.h, z0.h, #PREP_BIAS 621*c0909341SAndroid Build Coastguard Worker.else // put 622*c0909341SAndroid Build Coastguard Worker srshl v0.4s, v0.4s, v6.4s 623*c0909341SAndroid Build Coastguard Worker srshl v1.4s, v1.4s, v6.4s 624*c0909341SAndroid Build Coastguard Worker sqxtun v0.4h, v0.4s 625*c0909341SAndroid Build Coastguard Worker sqxtun2 v0.8h, v1.4s 626*c0909341SAndroid Build Coastguard Worker umin v0.8h, v0.8h, v29.8h 627*c0909341SAndroid Build Coastguard Worker.endif 628*c0909341SAndroid Build Coastguard Worker st1 {v0.8h}, [\ldst], \d_strd 629*c0909341SAndroid Build Coastguard Worker b.gt 8b 630*c0909341SAndroid Build Coastguard Worker 631*c0909341SAndroid Build Coastguard Worker subs \w, \w, #8 632*c0909341SAndroid Build Coastguard Worker add \src, \src, #16 633*c0909341SAndroid Build Coastguard Worker add \dst, \dst, #16 634*c0909341SAndroid Build Coastguard Worker b.gt 81b 635*c0909341SAndroid Build Coastguard Worker ret x15 636*c0909341SAndroid Build Coastguard Worker 637*c0909341SAndroid Build Coastguard Worker .align JUMP_ALIGN 638*c0909341SAndroid Build Coastguard Worker40: // HV8 - 4xN, put only: 2xN 639*c0909341SAndroid Build Coastguard Worker.ifc \type, put 640*c0909341SAndroid Build Coastguard Worker lsr \d_strd, \d_strd, #1 // hword index for `st1h` 641*c0909341SAndroid Build Coastguard Worker whilelt p1.h, wzr, \w // masking for writes 642*c0909341SAndroid Build Coastguard Worker.endif 643*c0909341SAndroid Build Coastguard Worker ext v4.16b, v4.16b, v4.16b, #4 // [\xmy + 2 * 2] 644*c0909341SAndroid Build Coastguard Worker add \src, \src, #4 645*c0909341SAndroid Build Coastguard Worker 646*c0909341SAndroid Build Coastguard Worker cmp \h, #4 647*c0909341SAndroid Build Coastguard Worker b.le 44f 648*c0909341SAndroid Build Coastguard Worker 649*c0909341SAndroid Build Coastguard Worker sub \src, \src, \s_strd, lsl #1 // src - 3 * s_strd - 3 * 2 650*c0909341SAndroid Build Coastguard Worker bl L(\type\()_hv_filter4_\isa) 651*c0909341SAndroid Build Coastguard Worker xtn v16.4h, v0.4s 652*c0909341SAndroid Build Coastguard Worker bl L(\type\()_hv_filter4_\isa) 653*c0909341SAndroid Build Coastguard Worker xtn v17.4h, v0.4s 654*c0909341SAndroid Build Coastguard Worker bl L(\type\()_hv_filter4_\isa) 655*c0909341SAndroid Build Coastguard Worker xtn v18.4h, v0.4s 656*c0909341SAndroid Build Coastguard Worker bl L(\type\()_hv_filter4_\isa) 657*c0909341SAndroid Build Coastguard Worker xtn v19.4h, v0.4s 658*c0909341SAndroid Build Coastguard Worker bl L(\type\()_hv_filter4_\isa) 659*c0909341SAndroid Build Coastguard Worker xtn v20.4h, v0.4s 660*c0909341SAndroid Build Coastguard Worker bl L(\type\()_hv_filter4_\isa) 661*c0909341SAndroid Build Coastguard Worker xtn v21.4h, v0.4s 662*c0909341SAndroid Build Coastguard Worker bl L(\type\()_hv_filter4_\isa) 663*c0909341SAndroid Build Coastguard Worker xtn v22.4h, v0.4s 664*c0909341SAndroid Build Coastguard Worker 665*c0909341SAndroid Build Coastguard Worker .align LOOP_ALIGN 666*c0909341SAndroid Build Coastguard Worker4: 667*c0909341SAndroid Build Coastguard Worker ld1 {v3.16b}, [\src], \s_strd 668*c0909341SAndroid Build Coastguard Worker 669*c0909341SAndroid Build Coastguard Worker smull v24.4s, v16.4h, v7.h[0] 670*c0909341SAndroid Build Coastguard Worker smlal v24.4s, v17.4h, v7.h[1] 671*c0909341SAndroid Build Coastguard Worker tbl v2.16b, {v3.16b}, v30.16b 672*c0909341SAndroid Build Coastguard Worker tbl v3.16b, {v3.16b}, v31.16b 673*c0909341SAndroid Build Coastguard Worker movi v0.2d, #0 674*c0909341SAndroid Build Coastguard Worker movi v1.2d, #0 675*c0909341SAndroid Build Coastguard Worker mov v16.16b, v17.16b 676*c0909341SAndroid Build Coastguard Worker mov v17.16b, v18.16b 677*c0909341SAndroid Build Coastguard Worker 678*c0909341SAndroid Build Coastguard Worker smlal v24.4s, v18.4h, v7.h[2] 679*c0909341SAndroid Build Coastguard Worker smlal v24.4s, v19.4h, v7.h[3] 680*c0909341SAndroid Build Coastguard Worker sdot z0.d, z2.h, z4.h[0] 681*c0909341SAndroid Build Coastguard Worker sdot z1.d, z3.h, z4.h[0] 682*c0909341SAndroid Build Coastguard Worker mov v18.16b, v19.16b 683*c0909341SAndroid Build Coastguard Worker mov v19.16b, v20.16b 684*c0909341SAndroid Build Coastguard Worker uzp1 v0.4s, v0.4s, v1.4s 685*c0909341SAndroid Build Coastguard Worker 686*c0909341SAndroid Build Coastguard Worker smlal v24.4s, v20.4h, v7.h[4] 687*c0909341SAndroid Build Coastguard Worker smlal v24.4s, v21.4h, v7.h[5] 688*c0909341SAndroid Build Coastguard Worker srshl v0.4s, v0.4s, v5.4s 689*c0909341SAndroid Build Coastguard Worker mov v20.16b, v21.16b 690*c0909341SAndroid Build Coastguard Worker mov v21.16b, v22.16b 691*c0909341SAndroid Build Coastguard Worker 692*c0909341SAndroid Build Coastguard Worker subs \h, \h, #1 693*c0909341SAndroid Build Coastguard Worker smlal v24.4s, v22.4h, v7.h[6] 694*c0909341SAndroid Build Coastguard Worker xtn v22.4h, v0.4s 695*c0909341SAndroid Build Coastguard Worker smlal v24.4s, v22.4h, v7.h[7] 696*c0909341SAndroid Build Coastguard Worker 697*c0909341SAndroid Build Coastguard Worker.ifc \type, prep 698*c0909341SAndroid Build Coastguard Worker rshrn v0.4h, v24.4s, #6 699*c0909341SAndroid Build Coastguard Worker sub z0.h, z0.h, #PREP_BIAS 700*c0909341SAndroid Build Coastguard Worker str d0, [\dst], #8 701*c0909341SAndroid Build Coastguard Worker.else // put 702*c0909341SAndroid Build Coastguard Worker srshl v0.4s, v24.4s, v6.4s 703*c0909341SAndroid Build Coastguard Worker sqxtun v0.4h, v0.4s 704*c0909341SAndroid Build Coastguard Worker umin v0.4h, v0.4h, v29.4h 705*c0909341SAndroid Build Coastguard Worker st1h {z0.h}, p1, [\dst] 706*c0909341SAndroid Build Coastguard Worker add \dst, \dst, \d_strd, lsl #1 707*c0909341SAndroid Build Coastguard Worker.endif 708*c0909341SAndroid Build Coastguard Worker b.gt 4b 709*c0909341SAndroid Build Coastguard Worker ret x15 710*c0909341SAndroid Build Coastguard Worker 711*c0909341SAndroid Build Coastguard Worker .align JUMP_ALIGN 712*c0909341SAndroid Build Coastguard WorkerL(\type\()_6tap_hv_\isa): 713*c0909341SAndroid Build Coastguard Worker cmp \w, #4 714*c0909341SAndroid Build Coastguard Worker b.le 46f 715*c0909341SAndroid Build Coastguard Worker 716*c0909341SAndroid Build Coastguard Worker // .align JUMP_ALIGN // fallthrough 717*c0909341SAndroid Build Coastguard Worker80: // HV6 - 8xN+ 718*c0909341SAndroid Build Coastguard Worker.ifc \type, prep 719*c0909341SAndroid Build Coastguard Worker add \wd_strd, \w, \w // d_strd = 2 * w 720*c0909341SAndroid Build Coastguard Worker.endif 721*c0909341SAndroid Build Coastguard Worker cmp \h, #4 722*c0909341SAndroid Build Coastguard Worker b.le 84f 723*c0909341SAndroid Build Coastguard Worker sub \src, \src, \s_strd // src - 2 * s_strd - 3 * 2 724*c0909341SAndroid Build Coastguard Worker 725*c0909341SAndroid Build Coastguard Worker .align LOOP_ALIGN 726*c0909341SAndroid Build Coastguard Worker81: 727*c0909341SAndroid Build Coastguard Worker mov \lsrc, \src 728*c0909341SAndroid Build Coastguard Worker mov \ldst, \dst 729*c0909341SAndroid Build Coastguard Worker mov w8, \h 730*c0909341SAndroid Build Coastguard Worker 731*c0909341SAndroid Build Coastguard Worker bl L(\type\()_hv_filter8_\isa) 732*c0909341SAndroid Build Coastguard Worker uzp1 v16.8h, v23.8h, v24.8h 733*c0909341SAndroid Build Coastguard Worker bl L(\type\()_hv_filter8_\isa) 734*c0909341SAndroid Build Coastguard Worker uzp1 v17.8h, v23.8h, v24.8h 735*c0909341SAndroid Build Coastguard Worker bl L(\type\()_hv_filter8_\isa) 736*c0909341SAndroid Build Coastguard Worker uzp1 v18.8h, v23.8h, v24.8h 737*c0909341SAndroid Build Coastguard Worker bl L(\type\()_hv_filter8_\isa) 738*c0909341SAndroid Build Coastguard Worker uzp1 v19.8h, v23.8h, v24.8h 739*c0909341SAndroid Build Coastguard Worker bl L(\type\()_hv_filter8_\isa) 740*c0909341SAndroid Build Coastguard Worker uzp1 v20.8h, v23.8h, v24.8h 741*c0909341SAndroid Build Coastguard Worker 742*c0909341SAndroid Build Coastguard Worker .align LOOP_ALIGN 743*c0909341SAndroid Build Coastguard Worker8: 744*c0909341SAndroid Build Coastguard Worker ldp q24, q28, [\lsrc] 745*c0909341SAndroid Build Coastguard Worker 746*c0909341SAndroid Build Coastguard Worker smull v0.4s, v16.4h, v7.h[1] 747*c0909341SAndroid Build Coastguard Worker smull2 v1.4s, v16.8h, v7.h[1] 748*c0909341SAndroid Build Coastguard Worker mov v16.16b, v17.16b 749*c0909341SAndroid Build Coastguard Worker 750*c0909341SAndroid Build Coastguard Worker tbl v23.16b, {v24.16b}, v30.16b 751*c0909341SAndroid Build Coastguard Worker tbl v24.16b, {v24.16b}, v31.16b 752*c0909341SAndroid Build Coastguard Worker movi v2.2d, #0 753*c0909341SAndroid Build Coastguard Worker movi v3.2d, #0 754*c0909341SAndroid Build Coastguard Worker 755*c0909341SAndroid Build Coastguard Worker ldur q26, [\lsrc, #8] 756*c0909341SAndroid Build Coastguard Worker add \lsrc, \lsrc, \s_strd 757*c0909341SAndroid Build Coastguard Worker 758*c0909341SAndroid Build Coastguard Worker sdot z2.d, z23.h, z4.h[0] 759*c0909341SAndroid Build Coastguard Worker sdot z3.d, z24.h, z4.h[0] 760*c0909341SAndroid Build Coastguard Worker tbl v25.16b, {v26.16b}, v30.16b 761*c0909341SAndroid Build Coastguard Worker tbl v26.16b, {v26.16b}, v31.16b 762*c0909341SAndroid Build Coastguard Worker movi v23.2d, #0 763*c0909341SAndroid Build Coastguard Worker movi v24.2d, #0 764*c0909341SAndroid Build Coastguard Worker 765*c0909341SAndroid Build Coastguard Worker sdot z23.d, z25.h, z4.h[0] 766*c0909341SAndroid Build Coastguard Worker sdot z24.d, z26.h, z4.h[0] 767*c0909341SAndroid Build Coastguard Worker tbl v27.16b, {v28.16b}, v30.16b 768*c0909341SAndroid Build Coastguard Worker tbl v28.16b, {v28.16b}, v31.16b 769*c0909341SAndroid Build Coastguard Worker smlal v0.4s, v17.4h, v7.h[2] 770*c0909341SAndroid Build Coastguard Worker smlal2 v1.4s, v17.8h, v7.h[2] 771*c0909341SAndroid Build Coastguard Worker mov v17.16b, v18.16b 772*c0909341SAndroid Build Coastguard Worker 773*c0909341SAndroid Build Coastguard Worker sdot z2.d, z25.h, z4.h[1] 774*c0909341SAndroid Build Coastguard Worker sdot z3.d, z26.h, z4.h[1] 775*c0909341SAndroid Build Coastguard Worker sdot z23.d, z27.h, z4.h[1] 776*c0909341SAndroid Build Coastguard Worker sdot z24.d, z28.h, z4.h[1] 777*c0909341SAndroid Build Coastguard Worker 778*c0909341SAndroid Build Coastguard Worker smlal v0.4s, v18.4h, v7.h[3] 779*c0909341SAndroid Build Coastguard Worker smlal2 v1.4s, v18.8h, v7.h[3] 780*c0909341SAndroid Build Coastguard Worker mov v18.16b, v19.16b 781*c0909341SAndroid Build Coastguard Worker 782*c0909341SAndroid Build Coastguard Worker uzp1 v3.4s, v2.4s, v3.4s 783*c0909341SAndroid Build Coastguard Worker uzp1 v24.4s, v23.4s, v24.4s 784*c0909341SAndroid Build Coastguard Worker smlal v0.4s, v19.4h, v7.h[4] 785*c0909341SAndroid Build Coastguard Worker smlal2 v1.4s, v19.8h, v7.h[4] 786*c0909341SAndroid Build Coastguard Worker mov v19.16b, v20.16b 787*c0909341SAndroid Build Coastguard Worker 788*c0909341SAndroid Build Coastguard Worker srshl v23.4s, v3.4s, v5.4s 789*c0909341SAndroid Build Coastguard Worker srshl v24.4s, v24.4s, v5.4s 790*c0909341SAndroid Build Coastguard Worker smlal v0.4s, v20.4h, v7.h[5] 791*c0909341SAndroid Build Coastguard Worker smlal2 v1.4s, v20.8h, v7.h[5] 792*c0909341SAndroid Build Coastguard Worker 793*c0909341SAndroid Build Coastguard Worker subs w8, w8, #1 794*c0909341SAndroid Build Coastguard Worker uzp1 v20.8h, v23.8h, v24.8h 795*c0909341SAndroid Build Coastguard Worker smlal v0.4s, v20.4h, v7.h[6] 796*c0909341SAndroid Build Coastguard Worker smlal2 v1.4s, v20.8h, v7.h[6] 797*c0909341SAndroid Build Coastguard Worker 798*c0909341SAndroid Build Coastguard Worker.ifc \type, prep 799*c0909341SAndroid Build Coastguard Worker rshrn v0.4h, v0.4s, #6 800*c0909341SAndroid Build Coastguard Worker rshrn2 v0.8h, v1.4s, #6 801*c0909341SAndroid Build Coastguard Worker sub z0.h, z0.h, #PREP_BIAS 802*c0909341SAndroid Build Coastguard Worker.else // put 803*c0909341SAndroid Build Coastguard Worker srshl v0.4s, v0.4s, v6.4s 804*c0909341SAndroid Build Coastguard Worker srshl v1.4s, v1.4s, v6.4s 805*c0909341SAndroid Build Coastguard Worker sqxtun v0.4h, v0.4s 806*c0909341SAndroid Build Coastguard Worker sqxtun2 v0.8h, v1.4s 807*c0909341SAndroid Build Coastguard Worker umin v0.8h, v0.8h, v29.8h 808*c0909341SAndroid Build Coastguard Worker.endif 809*c0909341SAndroid Build Coastguard Worker st1 {v0.8h}, [\ldst], \d_strd 810*c0909341SAndroid Build Coastguard Worker b.gt 8b 811*c0909341SAndroid Build Coastguard Worker 812*c0909341SAndroid Build Coastguard Worker add \dst, \dst, #16 813*c0909341SAndroid Build Coastguard Worker subs \w, \w, #8 814*c0909341SAndroid Build Coastguard Worker add \src, \src, #16 815*c0909341SAndroid Build Coastguard Worker b.gt 81b 816*c0909341SAndroid Build Coastguard Worker ret x15 817*c0909341SAndroid Build Coastguard Worker 818*c0909341SAndroid Build Coastguard Worker .align LOOP_ALIGN 819*c0909341SAndroid Build Coastguard Worker84: // HV4 - 8x4, 8x2 820*c0909341SAndroid Build Coastguard Worker mov \lsrc, \src 821*c0909341SAndroid Build Coastguard Worker mov \ldst, \dst 822*c0909341SAndroid Build Coastguard Worker mov w8, \h 823*c0909341SAndroid Build Coastguard Worker 824*c0909341SAndroid Build Coastguard Worker bl L(\type\()_hv_filter8_\isa) 825*c0909341SAndroid Build Coastguard Worker uzp1 v17.8h, v23.8h, v24.8h 826*c0909341SAndroid Build Coastguard Worker bl L(\type\()_hv_filter8_\isa) 827*c0909341SAndroid Build Coastguard Worker uzp1 v18.8h, v23.8h, v24.8h 828*c0909341SAndroid Build Coastguard Worker bl L(\type\()_hv_filter8_\isa) 829*c0909341SAndroid Build Coastguard Worker uzp1 v19.8h, v23.8h, v24.8h 830*c0909341SAndroid Build Coastguard Worker 831*c0909341SAndroid Build Coastguard Worker .align LOOP_ALIGN 832*c0909341SAndroid Build Coastguard Worker81: 833*c0909341SAndroid Build Coastguard Worker ldp q24, q28, [\lsrc] 834*c0909341SAndroid Build Coastguard Worker ldur q26, [\lsrc, #8] 835*c0909341SAndroid Build Coastguard Worker add \lsrc, \lsrc, \s_strd 836*c0909341SAndroid Build Coastguard Worker 837*c0909341SAndroid Build Coastguard Worker tbl v23.16b, {v24.16b}, v30.16b 838*c0909341SAndroid Build Coastguard Worker tbl v24.16b, {v24.16b}, v31.16b 839*c0909341SAndroid Build Coastguard Worker movi v2.2d, #0 840*c0909341SAndroid Build Coastguard Worker movi v3.2d, #0 841*c0909341SAndroid Build Coastguard Worker sdot z2.d, z23.h, z4.h[0] 842*c0909341SAndroid Build Coastguard Worker sdot z3.d, z24.h, z4.h[0] 843*c0909341SAndroid Build Coastguard Worker 844*c0909341SAndroid Build Coastguard Worker tbl v25.16b, {v26.16b}, v30.16b 845*c0909341SAndroid Build Coastguard Worker tbl v26.16b, {v26.16b}, v31.16b 846*c0909341SAndroid Build Coastguard Worker movi v23.2d, #0 847*c0909341SAndroid Build Coastguard Worker movi v24.2d, #0 848*c0909341SAndroid Build Coastguard Worker sdot z23.d, z25.h, z4.h[0] 849*c0909341SAndroid Build Coastguard Worker sdot z24.d, z26.h, z4.h[0] 850*c0909341SAndroid Build Coastguard Worker 851*c0909341SAndroid Build Coastguard Worker tbl v27.16b, {v28.16b}, v30.16b 852*c0909341SAndroid Build Coastguard Worker tbl v28.16b, {v28.16b}, v31.16b 853*c0909341SAndroid Build Coastguard Worker sdot z2.d, z25.h, z4.h[1] 854*c0909341SAndroid Build Coastguard Worker sdot z3.d, z26.h, z4.h[1] 855*c0909341SAndroid Build Coastguard Worker sdot z23.d, z27.h, z4.h[1] 856*c0909341SAndroid Build Coastguard Worker sdot z24.d, z28.h, z4.h[1] 857*c0909341SAndroid Build Coastguard Worker 858*c0909341SAndroid Build Coastguard Worker smull v0.4s, v17.4h, v7.h[2] 859*c0909341SAndroid Build Coastguard Worker smull2 v1.4s, v17.8h, v7.h[2] 860*c0909341SAndroid Build Coastguard Worker mov v17.16b, v18.16b 861*c0909341SAndroid Build Coastguard Worker 862*c0909341SAndroid Build Coastguard Worker subs w8, w8, #1 863*c0909341SAndroid Build Coastguard Worker uzp1 v3.4s, v2.4s, v3.4s 864*c0909341SAndroid Build Coastguard Worker uzp1 v24.4s, v23.4s, v24.4s 865*c0909341SAndroid Build Coastguard Worker smlal v0.4s, v18.4h, v7.h[3] 866*c0909341SAndroid Build Coastguard Worker smlal2 v1.4s, v18.8h, v7.h[3] 867*c0909341SAndroid Build Coastguard Worker mov v18.16b, v19.16b 868*c0909341SAndroid Build Coastguard Worker 869*c0909341SAndroid Build Coastguard Worker srshl v23.4s, v3.4s, v5.4s 870*c0909341SAndroid Build Coastguard Worker srshl v24.4s, v24.4s, v5.4s 871*c0909341SAndroid Build Coastguard Worker smlal v0.4s, v19.4h, v7.h[4] 872*c0909341SAndroid Build Coastguard Worker smlal2 v1.4s, v19.8h, v7.h[4] 873*c0909341SAndroid Build Coastguard Worker 874*c0909341SAndroid Build Coastguard Worker uzp1 v19.8h, v23.8h, v24.8h 875*c0909341SAndroid Build Coastguard Worker smlal v0.4s, v19.4h, v7.h[5] 876*c0909341SAndroid Build Coastguard Worker smlal2 v1.4s, v19.8h, v7.h[5] 877*c0909341SAndroid Build Coastguard Worker 878*c0909341SAndroid Build Coastguard Worker.ifc \type, prep 879*c0909341SAndroid Build Coastguard Worker rshrn v0.4h, v0.4s, #6 880*c0909341SAndroid Build Coastguard Worker rshrn2 v0.8h, v1.4s, #6 881*c0909341SAndroid Build Coastguard Worker sub z0.h, z0.h, #PREP_BIAS 882*c0909341SAndroid Build Coastguard Worker.else // put 883*c0909341SAndroid Build Coastguard Worker srshl v0.4s, v0.4s, v6.4s 884*c0909341SAndroid Build Coastguard Worker srshl v1.4s, v1.4s, v6.4s 885*c0909341SAndroid Build Coastguard Worker sqxtun v0.4h, v0.4s 886*c0909341SAndroid Build Coastguard Worker sqxtun2 v0.8h, v1.4s 887*c0909341SAndroid Build Coastguard Worker umin v0.8h, v0.8h, v29.8h 888*c0909341SAndroid Build Coastguard Worker.endif 889*c0909341SAndroid Build Coastguard Worker st1 {v0.8h}, [\ldst], \d_strd 890*c0909341SAndroid Build Coastguard Worker b.gt 81b 891*c0909341SAndroid Build Coastguard Worker 892*c0909341SAndroid Build Coastguard Worker subs \w, \w, #8 893*c0909341SAndroid Build Coastguard Worker add \dst, \dst, #16 894*c0909341SAndroid Build Coastguard Worker add \src, \src, #16 895*c0909341SAndroid Build Coastguard Worker b.gt 84b 896*c0909341SAndroid Build Coastguard Worker ret x15 897*c0909341SAndroid Build Coastguard Worker 898*c0909341SAndroid Build Coastguard Worker .align FUNC_ALIGN 899*c0909341SAndroid Build Coastguard WorkerL(\type\()_hv_filter8_\isa): 900*c0909341SAndroid Build Coastguard Worker ldp q24, q28, [\lsrc] 901*c0909341SAndroid Build Coastguard Worker ldur q26, [\lsrc, #8] 902*c0909341SAndroid Build Coastguard Worker add \lsrc, \lsrc, \s_strd 903*c0909341SAndroid Build Coastguard Worker 904*c0909341SAndroid Build Coastguard Worker tbl v23.16b, {v24.16b}, v30.16b 905*c0909341SAndroid Build Coastguard Worker tbl v24.16b, {v24.16b}, v31.16b 906*c0909341SAndroid Build Coastguard Worker movi v2.2d, #0 907*c0909341SAndroid Build Coastguard Worker movi v3.2d, #0 908*c0909341SAndroid Build Coastguard Worker sdot z2.d, z23.h, z4.h[0] 909*c0909341SAndroid Build Coastguard Worker sdot z3.d, z24.h, z4.h[0] 910*c0909341SAndroid Build Coastguard Worker 911*c0909341SAndroid Build Coastguard Worker tbl v25.16b, {v26.16b}, v30.16b 912*c0909341SAndroid Build Coastguard Worker tbl v26.16b, {v26.16b}, v31.16b 913*c0909341SAndroid Build Coastguard Worker movi v23.2d, #0 914*c0909341SAndroid Build Coastguard Worker movi v24.2d, #0 915*c0909341SAndroid Build Coastguard Worker sdot z23.d, z25.h, z4.h[0] 916*c0909341SAndroid Build Coastguard Worker sdot z24.d, z26.h, z4.h[0] 917*c0909341SAndroid Build Coastguard Worker 918*c0909341SAndroid Build Coastguard Worker tbl v27.16b, {v28.16b}, v30.16b 919*c0909341SAndroid Build Coastguard Worker tbl v28.16b, {v28.16b}, v31.16b 920*c0909341SAndroid Build Coastguard Worker sdot z2.d, z25.h, z4.h[1] 921*c0909341SAndroid Build Coastguard Worker sdot z3.d, z26.h, z4.h[1] 922*c0909341SAndroid Build Coastguard Worker sdot z23.d, z27.h, z4.h[1] 923*c0909341SAndroid Build Coastguard Worker sdot z24.d, z28.h, z4.h[1] 924*c0909341SAndroid Build Coastguard Worker 925*c0909341SAndroid Build Coastguard Worker uzp1 v3.4s, v2.4s, v3.4s 926*c0909341SAndroid Build Coastguard Worker uzp1 v24.4s, v23.4s, v24.4s 927*c0909341SAndroid Build Coastguard Worker srshl v23.4s, v3.4s, v5.4s 928*c0909341SAndroid Build Coastguard Worker srshl v24.4s, v24.4s, v5.4s 929*c0909341SAndroid Build Coastguard Worker ret 930*c0909341SAndroid Build Coastguard Worker 931*c0909341SAndroid Build Coastguard Worker .align FUNC_ALIGN 932*c0909341SAndroid Build Coastguard WorkerL(\type\()_hv_filter4_\isa): 933*c0909341SAndroid Build Coastguard Worker ld1 {v3.16b}, [\src], \s_strd 934*c0909341SAndroid Build Coastguard Worker 935*c0909341SAndroid Build Coastguard Worker tbl v2.16b, {v3.16b}, v30.16b 936*c0909341SAndroid Build Coastguard Worker tbl v3.16b, {v3.16b}, v31.16b 937*c0909341SAndroid Build Coastguard Worker movi v0.2d, #0 938*c0909341SAndroid Build Coastguard Worker movi v1.2d, #0 939*c0909341SAndroid Build Coastguard Worker sdot z0.d, z2.h, z4.h[0] 940*c0909341SAndroid Build Coastguard Worker sdot z1.d, z3.h, z4.h[0] 941*c0909341SAndroid Build Coastguard Worker 942*c0909341SAndroid Build Coastguard Worker uzp1 v0.4s, v0.4s, v1.4s 943*c0909341SAndroid Build Coastguard Worker srshl v0.4s, v0.4s, v5.4s 944*c0909341SAndroid Build Coastguard Worker ret 945*c0909341SAndroid Build Coastguard Worker 946*c0909341SAndroid Build Coastguard Worker .align JUMP_ALIGN 947*c0909341SAndroid Build Coastguard Worker46: // H4V6 - 4xN, put only: 2xN 948*c0909341SAndroid Build Coastguard Worker.ifc \type, put 949*c0909341SAndroid Build Coastguard Worker lsr \d_strd, \d_strd, #1 // hword index for `st1h` 950*c0909341SAndroid Build Coastguard Worker whilelt p1.h, wzr, \w // masking for writes 951*c0909341SAndroid Build Coastguard Worker.endif 952*c0909341SAndroid Build Coastguard Worker ext v4.16b, v4.16b, v4.16b, #4 // [\xmy + 2 * 2] 953*c0909341SAndroid Build Coastguard Worker add \src, \src, #4 954*c0909341SAndroid Build Coastguard Worker 955*c0909341SAndroid Build Coastguard Worker cmp \h, #4 956*c0909341SAndroid Build Coastguard Worker b.le 44f 957*c0909341SAndroid Build Coastguard Worker 958*c0909341SAndroid Build Coastguard Worker sub \src, \src, \s_strd // src - 2 * s_strd - 3 * 2 959*c0909341SAndroid Build Coastguard Worker bl L(\type\()_hv_filter4_\isa) 960*c0909341SAndroid Build Coastguard Worker xtn v16.4h, v0.4s 961*c0909341SAndroid Build Coastguard Worker bl L(\type\()_hv_filter4_\isa) 962*c0909341SAndroid Build Coastguard Worker xtn v17.4h, v0.4s 963*c0909341SAndroid Build Coastguard Worker bl L(\type\()_hv_filter4_\isa) 964*c0909341SAndroid Build Coastguard Worker xtn v18.4h, v0.4s 965*c0909341SAndroid Build Coastguard Worker bl L(\type\()_hv_filter4_\isa) 966*c0909341SAndroid Build Coastguard Worker xtn v19.4h, v0.4s 967*c0909341SAndroid Build Coastguard Worker bl L(\type\()_hv_filter4_\isa) 968*c0909341SAndroid Build Coastguard Worker xtn v20.4h, v0.4s 969*c0909341SAndroid Build Coastguard Worker 970*c0909341SAndroid Build Coastguard Worker .align LOOP_ALIGN 971*c0909341SAndroid Build Coastguard Worker4: 972*c0909341SAndroid Build Coastguard Worker ld1 {v3.16b}, [\src], \s_strd 973*c0909341SAndroid Build Coastguard Worker smull v24.4s, v16.4h, v7.h[1] 974*c0909341SAndroid Build Coastguard Worker smlal v24.4s, v17.4h, v7.h[2] 975*c0909341SAndroid Build Coastguard Worker 976*c0909341SAndroid Build Coastguard Worker tbl v2.16b, {v3.16b}, v30.16b 977*c0909341SAndroid Build Coastguard Worker tbl v3.16b, {v3.16b}, v31.16b 978*c0909341SAndroid Build Coastguard Worker movi v0.2d, #0 979*c0909341SAndroid Build Coastguard Worker movi v1.2d, #0 980*c0909341SAndroid Build Coastguard Worker sdot z0.d, z2.h, z4.h[0] 981*c0909341SAndroid Build Coastguard Worker sdot z1.d, z3.h, z4.h[0] 982*c0909341SAndroid Build Coastguard Worker 983*c0909341SAndroid Build Coastguard Worker mov v16.16b, v17.16b 984*c0909341SAndroid Build Coastguard Worker mov v17.16b, v18.16b 985*c0909341SAndroid Build Coastguard Worker smlal v24.4s, v18.4h, v7.h[3] 986*c0909341SAndroid Build Coastguard Worker smlal v24.4s, v19.4h, v7.h[4] 987*c0909341SAndroid Build Coastguard Worker uzp1 v0.4s, v0.4s, v1.4s 988*c0909341SAndroid Build Coastguard Worker 989*c0909341SAndroid Build Coastguard Worker mov v18.16b, v19.16b 990*c0909341SAndroid Build Coastguard Worker mov v19.16b, v20.16b 991*c0909341SAndroid Build Coastguard Worker subs \h, \h, #1 992*c0909341SAndroid Build Coastguard Worker srshl v0.4s, v0.4s, v5.4s 993*c0909341SAndroid Build Coastguard Worker smlal v24.4s, v20.4h, v7.h[5] 994*c0909341SAndroid Build Coastguard Worker xtn v20.4h, v0.4s 995*c0909341SAndroid Build Coastguard Worker smlal v24.4s, v20.4h, v7.h[6] 996*c0909341SAndroid Build Coastguard Worker 997*c0909341SAndroid Build Coastguard Worker.ifc \type, prep 998*c0909341SAndroid Build Coastguard Worker rshrn v0.4h, v24.4s, #6 999*c0909341SAndroid Build Coastguard Worker sub z0.h, z0.h, #PREP_BIAS 1000*c0909341SAndroid Build Coastguard Worker str d0, [\dst], #8 1001*c0909341SAndroid Build Coastguard Worker.else // put 1002*c0909341SAndroid Build Coastguard Worker srshl v0.4s, v24.4s, v6.4s 1003*c0909341SAndroid Build Coastguard Worker sqxtun v0.4h, v0.4s 1004*c0909341SAndroid Build Coastguard Worker umin v0.4h, v0.4h, v29.4h 1005*c0909341SAndroid Build Coastguard Worker st1h {z0.h}, p1, [\dst] 1006*c0909341SAndroid Build Coastguard Worker add \dst, \dst, \d_strd, lsl #1 1007*c0909341SAndroid Build Coastguard Worker.endif 1008*c0909341SAndroid Build Coastguard Worker b.gt 4b 1009*c0909341SAndroid Build Coastguard Worker ret x15 1010*c0909341SAndroid Build Coastguard Worker 1011*c0909341SAndroid Build Coastguard Worker .align JUMP_ALIGN 1012*c0909341SAndroid Build Coastguard Worker44: // H4V4 - 4x4, put only: 4x2, 2x4, 2x2 1013*c0909341SAndroid Build Coastguard Worker bl L(\type\()_hv_filter4_\isa) 1014*c0909341SAndroid Build Coastguard Worker xtn v17.4h, v0.4s 1015*c0909341SAndroid Build Coastguard Worker bl L(\type\()_hv_filter4_\isa) 1016*c0909341SAndroid Build Coastguard Worker xtn v18.4h, v0.4s 1017*c0909341SAndroid Build Coastguard Worker bl L(\type\()_hv_filter4_\isa) 1018*c0909341SAndroid Build Coastguard Worker xtn v19.4h, v0.4s 1019*c0909341SAndroid Build Coastguard Worker 1020*c0909341SAndroid Build Coastguard Worker .align LOOP_ALIGN 1021*c0909341SAndroid Build Coastguard Worker4: 1022*c0909341SAndroid Build Coastguard Worker ld1 {v3.16b}, [\src], \s_strd 1023*c0909341SAndroid Build Coastguard Worker smull v24.4s, v17.4h, v7.h[2] 1024*c0909341SAndroid Build Coastguard Worker smlal v24.4s, v18.4h, v7.h[3] 1025*c0909341SAndroid Build Coastguard Worker 1026*c0909341SAndroid Build Coastguard Worker tbl v2.16b, {v3.16b}, v30.16b 1027*c0909341SAndroid Build Coastguard Worker tbl v3.16b, {v3.16b}, v31.16b 1028*c0909341SAndroid Build Coastguard Worker movi v0.2d, #0 1029*c0909341SAndroid Build Coastguard Worker movi v1.2d, #0 1030*c0909341SAndroid Build Coastguard Worker sdot z0.d, z2.h, z4.h[0] 1031*c0909341SAndroid Build Coastguard Worker sdot z1.d, z3.h, z4.h[0] 1032*c0909341SAndroid Build Coastguard Worker uzp1 v0.4s, v0.4s, v1.4s 1033*c0909341SAndroid Build Coastguard Worker 1034*c0909341SAndroid Build Coastguard Worker mov v17.16b, v18.16b 1035*c0909341SAndroid Build Coastguard Worker mov v18.16b, v19.16b 1036*c0909341SAndroid Build Coastguard Worker subs \h, \h, #1 1037*c0909341SAndroid Build Coastguard Worker srshl v0.4s, v0.4s, v5.4s 1038*c0909341SAndroid Build Coastguard Worker smlal v24.4s, v19.4h, v7.h[4] 1039*c0909341SAndroid Build Coastguard Worker xtn v19.4h, v0.4s 1040*c0909341SAndroid Build Coastguard Worker smlal v24.4s, v19.4h, v7.h[5] 1041*c0909341SAndroid Build Coastguard Worker 1042*c0909341SAndroid Build Coastguard Worker.ifc \type, prep 1043*c0909341SAndroid Build Coastguard Worker rshrn v0.4h, v24.4s, #6 1044*c0909341SAndroid Build Coastguard Worker sub z0.h, z0.h, #PREP_BIAS 1045*c0909341SAndroid Build Coastguard Worker str d0, [\dst], #8 1046*c0909341SAndroid Build Coastguard Worker.else // put 1047*c0909341SAndroid Build Coastguard Worker srshl v0.4s, v24.4s, v6.4s 1048*c0909341SAndroid Build Coastguard Worker sqxtun v0.4h, v0.4s 1049*c0909341SAndroid Build Coastguard Worker umin v0.4h, v0.4h, v29.4h 1050*c0909341SAndroid Build Coastguard Worker st1h {z0.h}, p1, [\dst] 1051*c0909341SAndroid Build Coastguard Worker add \dst, \dst, \d_strd, lsl #1 1052*c0909341SAndroid Build Coastguard Worker.endif 1053*c0909341SAndroid Build Coastguard Worker b.gt 4b 1054*c0909341SAndroid Build Coastguard Worker ret x15 1055*c0909341SAndroid Build Coastguard Worker 1056*c0909341SAndroid Build Coastguard Worker .align JUMP_ALIGN 1057*c0909341SAndroid Build Coastguard WorkerL(\type\()_8tap_h_\isa): 1058*c0909341SAndroid Build Coastguard Worker movrel x11, \type\()_8tap_h_\isa\()_tbl 1059*c0909341SAndroid Build Coastguard Worker ldrsw x12, [x11, x8, lsl #2] 1060*c0909341SAndroid Build Coastguard Worker.ifc \bdmax, w8 1061*c0909341SAndroid Build Coastguard Worker ldr \bdmax, [sp] 1062*c0909341SAndroid Build Coastguard Worker.endif 1063*c0909341SAndroid Build Coastguard Worker.ifc \type, prep 1064*c0909341SAndroid Build Coastguard Worker clz \bdmax, \bdmax 1065*c0909341SAndroid Build Coastguard Worker sub \bdmax, \bdmax, #24 1066*c0909341SAndroid Build Coastguard Worker dup v5.4s, \bdmax 1067*c0909341SAndroid Build Coastguard Worker.else // put 1068*c0909341SAndroid Build Coastguard Worker mov w9, #34 // rounding for 10-bit case 1069*c0909341SAndroid Build Coastguard Worker mov w10, #40 // rounding for 12-bit case 1070*c0909341SAndroid Build Coastguard Worker cmp \bdmax, #0xFFF 1071*c0909341SAndroid Build Coastguard Worker csel w9, w9, w10, ne // select rounding based on \bdmax 1072*c0909341SAndroid Build Coastguard Worker dup v5.8h, \bdmax 1073*c0909341SAndroid Build Coastguard Worker dup v6.2d, x9 1074*c0909341SAndroid Build Coastguard Worker.endif 1075*c0909341SAndroid Build Coastguard Worker add x11, x11, x12 1076*c0909341SAndroid Build Coastguard Worker ld1sb {z4.h}, p0/z, [\xmx] 1077*c0909341SAndroid Build Coastguard Worker br x11 1078*c0909341SAndroid Build Coastguard Worker 1079*c0909341SAndroid Build Coastguard Worker .align JUMP_ALIGN 1080*c0909341SAndroid Build Coastguard Worker20: // H - 4xN, put only: 2xN 1081*c0909341SAndroid Build Coastguard Worker40: 1082*c0909341SAndroid Build Coastguard Worker AARCH64_VALID_JUMP_TARGET 1083*c0909341SAndroid Build Coastguard Worker add \src, \src, #4 // src - 1 * 2 1084*c0909341SAndroid Build Coastguard Worker ext v4.16b, v4.16b, v4.16b, #4 // [\xmy + 2 * 2] 1085*c0909341SAndroid Build Coastguard Worker.ifc \type, put 1086*c0909341SAndroid Build Coastguard Worker lsr \d_strd, \d_strd, #1 // hword index for `st1h` 1087*c0909341SAndroid Build Coastguard Worker whilelt p1.h, wzr, \w // masking for writes 1088*c0909341SAndroid Build Coastguard Worker.endif 1089*c0909341SAndroid Build Coastguard Worker .align LOOP_ALIGN 1090*c0909341SAndroid Build Coastguard Worker4: 1091*c0909341SAndroid Build Coastguard Worker ldr q17, [\src] 1092*c0909341SAndroid Build Coastguard Worker ldr q19, [\src, \s_strd] 1093*c0909341SAndroid Build Coastguard Worker add \src, \src, \s_strd, lsl #1 1094*c0909341SAndroid Build Coastguard Worker 1095*c0909341SAndroid Build Coastguard Worker.ifc \type, prep 1096*c0909341SAndroid Build Coastguard Worker movi v0.2d, #0 1097*c0909341SAndroid Build Coastguard Worker movi v1.2d, #0 1098*c0909341SAndroid Build Coastguard Worker movi v2.2d, #0 1099*c0909341SAndroid Build Coastguard Worker movi v3.2d, #0 1100*c0909341SAndroid Build Coastguard Worker.else 1101*c0909341SAndroid Build Coastguard Worker mov v0.16b, v6.16b 1102*c0909341SAndroid Build Coastguard Worker mov v1.16b, v6.16b 1103*c0909341SAndroid Build Coastguard Worker mov v2.16b, v6.16b 1104*c0909341SAndroid Build Coastguard Worker mov v3.16b, v6.16b 1105*c0909341SAndroid Build Coastguard Worker.endif 1106*c0909341SAndroid Build Coastguard Worker tbl v16.16b, {v17.16b}, v30.16b 1107*c0909341SAndroid Build Coastguard Worker tbl v17.16b, {v17.16b}, v31.16b 1108*c0909341SAndroid Build Coastguard Worker sdot z0.d, z16.h, z4.h[0] 1109*c0909341SAndroid Build Coastguard Worker sdot z1.d, z17.h, z4.h[0] 1110*c0909341SAndroid Build Coastguard Worker subs \h, \h, #2 1111*c0909341SAndroid Build Coastguard Worker tbl v18.16b, {v19.16b}, v30.16b 1112*c0909341SAndroid Build Coastguard Worker tbl v19.16b, {v19.16b}, v31.16b 1113*c0909341SAndroid Build Coastguard Worker sdot z2.d, z18.h, z4.h[0] 1114*c0909341SAndroid Build Coastguard Worker sdot z3.d, z19.h, z4.h[0] 1115*c0909341SAndroid Build Coastguard Worker 1116*c0909341SAndroid Build Coastguard Worker uzp1 v0.4s, v0.4s, v1.4s 1117*c0909341SAndroid Build Coastguard Worker uzp1 v1.4s, v2.4s, v3.4s 1118*c0909341SAndroid Build Coastguard Worker.ifc \type, prep 1119*c0909341SAndroid Build Coastguard Worker srshl v0.4s, v0.4s, v5.4s 1120*c0909341SAndroid Build Coastguard Worker srshl v1.4s, v1.4s, v5.4s 1121*c0909341SAndroid Build Coastguard Worker uzp1 v0.8h, v0.8h, v1.8h 1122*c0909341SAndroid Build Coastguard Worker sub z0.h, z0.h, #PREP_BIAS 1123*c0909341SAndroid Build Coastguard Worker str q0, [\dst], #16 1124*c0909341SAndroid Build Coastguard Worker.else // put 1125*c0909341SAndroid Build Coastguard Worker sqshrun v0.4h, v0.4s, #6 1126*c0909341SAndroid Build Coastguard Worker sqshrun v1.4h, v1.4s, #6 1127*c0909341SAndroid Build Coastguard Worker umin v0.4h, v0.4h, v5.4h 1128*c0909341SAndroid Build Coastguard Worker umin v1.4h, v1.4h, v5.4h 1129*c0909341SAndroid Build Coastguard Worker st1h {z0.h}, p1, [\dst] 1130*c0909341SAndroid Build Coastguard Worker st1h {z1.h}, p1, [\dst, \d_strd, lsl #1] 1131*c0909341SAndroid Build Coastguard Worker add \dst, \dst, \d_strd, lsl #2 1132*c0909341SAndroid Build Coastguard Worker.endif 1133*c0909341SAndroid Build Coastguard Worker b.gt 4b 1134*c0909341SAndroid Build Coastguard Worker ret 1135*c0909341SAndroid Build Coastguard Worker 1136*c0909341SAndroid Build Coastguard Worker .align JUMP_ALIGN 1137*c0909341SAndroid Build Coastguard Worker80: // H - 8xN 1138*c0909341SAndroid Build Coastguard Worker AARCH64_VALID_JUMP_TARGET 1139*c0909341SAndroid Build Coastguard Worker 1140*c0909341SAndroid Build Coastguard Worker .align LOOP_ALIGN 1141*c0909341SAndroid Build Coastguard Worker8: 1142*c0909341SAndroid Build Coastguard Worker ldp q17, q21, [\src] 1143*c0909341SAndroid Build Coastguard Worker ldur q19, [\src, #8] 1144*c0909341SAndroid Build Coastguard Worker 1145*c0909341SAndroid Build Coastguard Worker.ifc \type, prep 1146*c0909341SAndroid Build Coastguard Worker movi v0.2d, #0 1147*c0909341SAndroid Build Coastguard Worker movi v2.2d, #0 1148*c0909341SAndroid Build Coastguard Worker.else 1149*c0909341SAndroid Build Coastguard Worker mov v0.16b, v6.16b 1150*c0909341SAndroid Build Coastguard Worker mov v2.16b, v6.16b 1151*c0909341SAndroid Build Coastguard Worker.endif 1152*c0909341SAndroid Build Coastguard Worker tbl v16.16b, {v17.16b}, v30.16b 1153*c0909341SAndroid Build Coastguard Worker tbl v17.16b, {v17.16b}, v31.16b 1154*c0909341SAndroid Build Coastguard Worker add \src, \src, \s_strd 1155*c0909341SAndroid Build Coastguard Worker sdot z0.d, z16.h, z4.h[0] 1156*c0909341SAndroid Build Coastguard Worker sdot z2.d, z17.h, z4.h[0] 1157*c0909341SAndroid Build Coastguard Worker 1158*c0909341SAndroid Build Coastguard Worker tbl v18.16b, {v19.16b}, v30.16b 1159*c0909341SAndroid Build Coastguard Worker tbl v19.16b, {v19.16b}, v31.16b 1160*c0909341SAndroid Build Coastguard Worker.ifc \type, prep 1161*c0909341SAndroid Build Coastguard Worker movi v16.2d, #0 1162*c0909341SAndroid Build Coastguard Worker movi v17.2d, #0 1163*c0909341SAndroid Build Coastguard Worker.else 1164*c0909341SAndroid Build Coastguard Worker mov v16.16b, v6.16b 1165*c0909341SAndroid Build Coastguard Worker mov v17.16b, v6.16b 1166*c0909341SAndroid Build Coastguard Worker.endif 1167*c0909341SAndroid Build Coastguard Worker ldp q23, q27, [\src] 1168*c0909341SAndroid Build Coastguard Worker ldur q25, [\src, #8] 1169*c0909341SAndroid Build Coastguard Worker 1170*c0909341SAndroid Build Coastguard Worker sdot z16.d, z18.h, z4.h[0] 1171*c0909341SAndroid Build Coastguard Worker sdot z17.d, z19.h, z4.h[0] 1172*c0909341SAndroid Build Coastguard Worker 1173*c0909341SAndroid Build Coastguard Worker tbl v22.16b, {v23.16b}, v30.16b 1174*c0909341SAndroid Build Coastguard Worker tbl v23.16b, {v23.16b}, v31.16b 1175*c0909341SAndroid Build Coastguard Worker.ifc \type, prep 1176*c0909341SAndroid Build Coastguard Worker movi v1.2d, #0 1177*c0909341SAndroid Build Coastguard Worker movi v3.2d, #0 1178*c0909341SAndroid Build Coastguard Worker.else 1179*c0909341SAndroid Build Coastguard Worker mov v1.16b, v6.16b 1180*c0909341SAndroid Build Coastguard Worker mov v3.16b, v6.16b 1181*c0909341SAndroid Build Coastguard Worker.endif 1182*c0909341SAndroid Build Coastguard Worker add \src, \src, \s_strd 1183*c0909341SAndroid Build Coastguard Worker sdot z1.d, z22.h, z4.h[0] 1184*c0909341SAndroid Build Coastguard Worker sdot z3.d, z23.h, z4.h[0] 1185*c0909341SAndroid Build Coastguard Worker 1186*c0909341SAndroid Build Coastguard Worker tbl v24.16b, {v25.16b}, v30.16b 1187*c0909341SAndroid Build Coastguard Worker tbl v25.16b, {v25.16b}, v31.16b 1188*c0909341SAndroid Build Coastguard Worker.ifc \type, prep 1189*c0909341SAndroid Build Coastguard Worker movi v22.2d, #0 1190*c0909341SAndroid Build Coastguard Worker movi v23.2d, #0 1191*c0909341SAndroid Build Coastguard Worker.else 1192*c0909341SAndroid Build Coastguard Worker mov v22.16b, v6.16b 1193*c0909341SAndroid Build Coastguard Worker mov v23.16b, v6.16b 1194*c0909341SAndroid Build Coastguard Worker.endif 1195*c0909341SAndroid Build Coastguard Worker sdot z22.d, z24.h, z4.h[0] 1196*c0909341SAndroid Build Coastguard Worker sdot z23.d, z25.h, z4.h[0] 1197*c0909341SAndroid Build Coastguard Worker 1198*c0909341SAndroid Build Coastguard Worker tbl v20.16b, {v21.16b}, v30.16b 1199*c0909341SAndroid Build Coastguard Worker tbl v21.16b, {v21.16b}, v31.16b 1200*c0909341SAndroid Build Coastguard Worker sdot z0.d, z18.h, z4.h[1] 1201*c0909341SAndroid Build Coastguard Worker sdot z2.d, z19.h, z4.h[1] 1202*c0909341SAndroid Build Coastguard Worker tbl v26.16b, {v27.16b}, v30.16b 1203*c0909341SAndroid Build Coastguard Worker tbl v27.16b, {v27.16b}, v31.16b 1204*c0909341SAndroid Build Coastguard Worker sdot z16.d, z20.h, z4.h[1] 1205*c0909341SAndroid Build Coastguard Worker sdot z17.d, z21.h, z4.h[1] 1206*c0909341SAndroid Build Coastguard Worker 1207*c0909341SAndroid Build Coastguard Worker sdot z1.d, z24.h, z4.h[1] 1208*c0909341SAndroid Build Coastguard Worker sdot z3.d, z25.h, z4.h[1] 1209*c0909341SAndroid Build Coastguard Worker 1210*c0909341SAndroid Build Coastguard Worker sdot z22.d, z26.h, z4.h[1] 1211*c0909341SAndroid Build Coastguard Worker sdot z23.d, z27.h, z4.h[1] 1212*c0909341SAndroid Build Coastguard Worker 1213*c0909341SAndroid Build Coastguard Worker subs \h, \h, #2 1214*c0909341SAndroid Build Coastguard Worker uzp1 v0.4s, v0.4s, v2.4s 1215*c0909341SAndroid Build Coastguard Worker uzp1 v2.4s, v16.4s, v17.4s 1216*c0909341SAndroid Build Coastguard Worker uzp1 v1.4s, v1.4s, v3.4s 1217*c0909341SAndroid Build Coastguard Worker uzp1 v3.4s, v22.4s, v23.4s 1218*c0909341SAndroid Build Coastguard Worker.ifc \type, prep 1219*c0909341SAndroid Build Coastguard Worker srshl v0.4s, v0.4s, v5.4s 1220*c0909341SAndroid Build Coastguard Worker srshl v2.4s, v2.4s, v5.4s 1221*c0909341SAndroid Build Coastguard Worker srshl v1.4s, v1.4s, v5.4s 1222*c0909341SAndroid Build Coastguard Worker srshl v3.4s, v3.4s, v5.4s 1223*c0909341SAndroid Build Coastguard Worker uzp1 v0.8h, v0.8h, v2.8h 1224*c0909341SAndroid Build Coastguard Worker uzp1 v1.8h, v1.8h, v3.8h 1225*c0909341SAndroid Build Coastguard Worker sub z0.h, z0.h, #PREP_BIAS 1226*c0909341SAndroid Build Coastguard Worker sub z1.h, z1.h, #PREP_BIAS 1227*c0909341SAndroid Build Coastguard Worker stp q0, q1, [\dst], #32 1228*c0909341SAndroid Build Coastguard Worker.else // put 1229*c0909341SAndroid Build Coastguard Worker sqshrun v0.4h, v0.4s, #6 1230*c0909341SAndroid Build Coastguard Worker sqshrun2 v0.8h, v2.4s, #6 1231*c0909341SAndroid Build Coastguard Worker sqshrun v1.4h, v1.4s, #6 1232*c0909341SAndroid Build Coastguard Worker sqshrun2 v1.8h, v3.4s, #6 1233*c0909341SAndroid Build Coastguard Worker umin v0.8h, v0.8h, v5.8h 1234*c0909341SAndroid Build Coastguard Worker umin v1.8h, v1.8h, v5.8h 1235*c0909341SAndroid Build Coastguard Worker st1 {v0.16b}, [\dst], \d_strd 1236*c0909341SAndroid Build Coastguard Worker st1 {v1.16b}, [\dst], \d_strd 1237*c0909341SAndroid Build Coastguard Worker.endif 1238*c0909341SAndroid Build Coastguard Worker b.gt 8b 1239*c0909341SAndroid Build Coastguard Worker ret 1240*c0909341SAndroid Build Coastguard Worker 1241*c0909341SAndroid Build Coastguard Worker .align JUMP_ALIGN 1242*c0909341SAndroid Build Coastguard Worker160: // H - 16xN 1243*c0909341SAndroid Build Coastguard Worker AARCH64_VALID_JUMP_TARGET 1244*c0909341SAndroid Build Coastguard Worker 1245*c0909341SAndroid Build Coastguard Worker .align LOOP_ALIGN 1246*c0909341SAndroid Build Coastguard Worker16: 1247*c0909341SAndroid Build Coastguard Worker ldp q17, q21, [\src] 1248*c0909341SAndroid Build Coastguard Worker ldur q19, [\src, #8] 1249*c0909341SAndroid Build Coastguard Worker 1250*c0909341SAndroid Build Coastguard Worker.ifc \type, prep 1251*c0909341SAndroid Build Coastguard Worker movi v0.2d, #0 1252*c0909341SAndroid Build Coastguard Worker movi v2.2d, #0 1253*c0909341SAndroid Build Coastguard Worker.else 1254*c0909341SAndroid Build Coastguard Worker mov v0.16b, v6.16b 1255*c0909341SAndroid Build Coastguard Worker mov v2.16b, v6.16b 1256*c0909341SAndroid Build Coastguard Worker.endif 1257*c0909341SAndroid Build Coastguard Worker tbl v16.16b, {v17.16b}, v30.16b 1258*c0909341SAndroid Build Coastguard Worker tbl v17.16b, {v17.16b}, v31.16b 1259*c0909341SAndroid Build Coastguard Worker sdot z0.d, z16.h, z4.h[0] 1260*c0909341SAndroid Build Coastguard Worker sdot z2.d, z17.h, z4.h[0] 1261*c0909341SAndroid Build Coastguard Worker 1262*c0909341SAndroid Build Coastguard Worker tbl v18.16b, {v19.16b}, v30.16b 1263*c0909341SAndroid Build Coastguard Worker tbl v19.16b, {v19.16b}, v31.16b 1264*c0909341SAndroid Build Coastguard Worker.ifc \type, prep 1265*c0909341SAndroid Build Coastguard Worker movi v16.2d, #0 1266*c0909341SAndroid Build Coastguard Worker movi v17.2d, #0 1267*c0909341SAndroid Build Coastguard Worker.else 1268*c0909341SAndroid Build Coastguard Worker mov v16.16b, v6.16b 1269*c0909341SAndroid Build Coastguard Worker mov v17.16b, v6.16b 1270*c0909341SAndroid Build Coastguard Worker.endif 1271*c0909341SAndroid Build Coastguard Worker ldur q25, [\src, #24] 1272*c0909341SAndroid Build Coastguard Worker ldr q27, [\src, #32] 1273*c0909341SAndroid Build Coastguard Worker 1274*c0909341SAndroid Build Coastguard Worker sdot z16.d, z18.h, z4.h[0] 1275*c0909341SAndroid Build Coastguard Worker sdot z17.d, z19.h, z4.h[0] 1276*c0909341SAndroid Build Coastguard Worker 1277*c0909341SAndroid Build Coastguard Worker tbl v22.16b, {v21.16b}, v30.16b 1278*c0909341SAndroid Build Coastguard Worker tbl v23.16b, {v21.16b}, v31.16b 1279*c0909341SAndroid Build Coastguard Worker.ifc \type, prep 1280*c0909341SAndroid Build Coastguard Worker movi v1.2d, #0 1281*c0909341SAndroid Build Coastguard Worker movi v3.2d, #0 1282*c0909341SAndroid Build Coastguard Worker.else 1283*c0909341SAndroid Build Coastguard Worker mov v1.16b, v6.16b 1284*c0909341SAndroid Build Coastguard Worker mov v3.16b, v6.16b 1285*c0909341SAndroid Build Coastguard Worker.endif 1286*c0909341SAndroid Build Coastguard Worker add \src, \src, \s_strd 1287*c0909341SAndroid Build Coastguard Worker sdot z1.d, z22.h, z4.h[0] 1288*c0909341SAndroid Build Coastguard Worker sdot z3.d, z23.h, z4.h[0] 1289*c0909341SAndroid Build Coastguard Worker 1290*c0909341SAndroid Build Coastguard Worker tbl v24.16b, {v25.16b}, v30.16b 1291*c0909341SAndroid Build Coastguard Worker tbl v25.16b, {v25.16b}, v31.16b 1292*c0909341SAndroid Build Coastguard Worker.ifc \type, prep 1293*c0909341SAndroid Build Coastguard Worker movi v22.2d, #0 1294*c0909341SAndroid Build Coastguard Worker movi v23.2d, #0 1295*c0909341SAndroid Build Coastguard Worker.else 1296*c0909341SAndroid Build Coastguard Worker mov v22.16b, v6.16b 1297*c0909341SAndroid Build Coastguard Worker mov v23.16b, v6.16b 1298*c0909341SAndroid Build Coastguard Worker.endif 1299*c0909341SAndroid Build Coastguard Worker sdot z22.d, z24.h, z4.h[0] 1300*c0909341SAndroid Build Coastguard Worker sdot z23.d, z25.h, z4.h[0] 1301*c0909341SAndroid Build Coastguard Worker 1302*c0909341SAndroid Build Coastguard Worker tbl v20.16b, {v21.16b}, v30.16b 1303*c0909341SAndroid Build Coastguard Worker tbl v21.16b, {v21.16b}, v31.16b 1304*c0909341SAndroid Build Coastguard Worker sdot z0.d, z18.h, z4.h[1] 1305*c0909341SAndroid Build Coastguard Worker sdot z2.d, z19.h, z4.h[1] 1306*c0909341SAndroid Build Coastguard Worker tbl v26.16b, {v27.16b}, v30.16b 1307*c0909341SAndroid Build Coastguard Worker tbl v27.16b, {v27.16b}, v31.16b 1308*c0909341SAndroid Build Coastguard Worker sdot z16.d, z20.h, z4.h[1] 1309*c0909341SAndroid Build Coastguard Worker sdot z17.d, z21.h, z4.h[1] 1310*c0909341SAndroid Build Coastguard Worker 1311*c0909341SAndroid Build Coastguard Worker sdot z1.d, z24.h, z4.h[1] 1312*c0909341SAndroid Build Coastguard Worker sdot z3.d, z25.h, z4.h[1] 1313*c0909341SAndroid Build Coastguard Worker 1314*c0909341SAndroid Build Coastguard Worker sdot z22.d, z26.h, z4.h[1] 1315*c0909341SAndroid Build Coastguard Worker sdot z23.d, z27.h, z4.h[1] 1316*c0909341SAndroid Build Coastguard Worker 1317*c0909341SAndroid Build Coastguard Worker subs \h, \h, #1 1318*c0909341SAndroid Build Coastguard Worker uzp1 v0.4s, v0.4s, v2.4s 1319*c0909341SAndroid Build Coastguard Worker uzp1 v2.4s, v16.4s, v17.4s 1320*c0909341SAndroid Build Coastguard Worker uzp1 v1.4s, v1.4s, v3.4s 1321*c0909341SAndroid Build Coastguard Worker uzp1 v3.4s, v22.4s, v23.4s 1322*c0909341SAndroid Build Coastguard Worker.ifc \type, prep 1323*c0909341SAndroid Build Coastguard Worker srshl v0.4s, v0.4s, v5.4s 1324*c0909341SAndroid Build Coastguard Worker srshl v2.4s, v2.4s, v5.4s 1325*c0909341SAndroid Build Coastguard Worker srshl v1.4s, v1.4s, v5.4s 1326*c0909341SAndroid Build Coastguard Worker srshl v3.4s, v3.4s, v5.4s 1327*c0909341SAndroid Build Coastguard Worker uzp1 v0.8h, v0.8h, v2.8h 1328*c0909341SAndroid Build Coastguard Worker uzp1 v1.8h, v1.8h, v3.8h 1329*c0909341SAndroid Build Coastguard Worker sub z0.h, z0.h, #PREP_BIAS 1330*c0909341SAndroid Build Coastguard Worker sub z1.h, z1.h, #PREP_BIAS 1331*c0909341SAndroid Build Coastguard Worker stp q0, q1, [\dst], #32 1332*c0909341SAndroid Build Coastguard Worker.else // put 1333*c0909341SAndroid Build Coastguard Worker sqshrun v0.4h, v0.4s, #6 1334*c0909341SAndroid Build Coastguard Worker sqshrun2 v0.8h, v2.4s, #6 1335*c0909341SAndroid Build Coastguard Worker sqshrun v1.4h, v1.4s, #6 1336*c0909341SAndroid Build Coastguard Worker sqshrun2 v1.8h, v3.4s, #6 1337*c0909341SAndroid Build Coastguard Worker umin v0.8h, v0.8h, v5.8h 1338*c0909341SAndroid Build Coastguard Worker umin v1.8h, v1.8h, v5.8h 1339*c0909341SAndroid Build Coastguard Worker st1 {v0.16b, v1.16b}, [\dst], \d_strd 1340*c0909341SAndroid Build Coastguard Worker.endif 1341*c0909341SAndroid Build Coastguard Worker b.gt 16b 1342*c0909341SAndroid Build Coastguard Worker ret 1343*c0909341SAndroid Build Coastguard Worker 1344*c0909341SAndroid Build Coastguard Worker .align JUMP_ALIGN 1345*c0909341SAndroid Build Coastguard Worker320: // H - 32xN+ 1346*c0909341SAndroid Build Coastguard Worker640: 1347*c0909341SAndroid Build Coastguard Worker1280: 1348*c0909341SAndroid Build Coastguard Worker AARCH64_VALID_JUMP_TARGET 1349*c0909341SAndroid Build Coastguard Worker.ifc \type, put 1350*c0909341SAndroid Build Coastguard Worker sub \d_strd, \d_strd, \w, uxtw #1 1351*c0909341SAndroid Build Coastguard Worker.endif 1352*c0909341SAndroid Build Coastguard Worker sub \s_strd, \s_strd, \w, uxtw #1 1353*c0909341SAndroid Build Coastguard Worker mov w8, \w 1354*c0909341SAndroid Build Coastguard Worker 1355*c0909341SAndroid Build Coastguard Worker .align LOOP_ALIGN 1356*c0909341SAndroid Build Coastguard Worker32: 1357*c0909341SAndroid Build Coastguard Worker ldp q17, q21, [\src] 1358*c0909341SAndroid Build Coastguard Worker ldur q19, [\src, #8] 1359*c0909341SAndroid Build Coastguard Worker 1360*c0909341SAndroid Build Coastguard Worker.ifc \type, prep 1361*c0909341SAndroid Build Coastguard Worker movi v0.2d, #0 1362*c0909341SAndroid Build Coastguard Worker movi v2.2d, #0 1363*c0909341SAndroid Build Coastguard Worker.else 1364*c0909341SAndroid Build Coastguard Worker mov v0.16b, v6.16b 1365*c0909341SAndroid Build Coastguard Worker mov v2.16b, v6.16b 1366*c0909341SAndroid Build Coastguard Worker.endif 1367*c0909341SAndroid Build Coastguard Worker tbl v16.16b, {v17.16b}, v30.16b 1368*c0909341SAndroid Build Coastguard Worker tbl v17.16b, {v17.16b}, v31.16b 1369*c0909341SAndroid Build Coastguard Worker sdot z0.d, z16.h, z4.h[0] 1370*c0909341SAndroid Build Coastguard Worker sdot z2.d, z17.h, z4.h[0] 1371*c0909341SAndroid Build Coastguard Worker 1372*c0909341SAndroid Build Coastguard Worker tbl v18.16b, {v19.16b}, v30.16b 1373*c0909341SAndroid Build Coastguard Worker tbl v19.16b, {v19.16b}, v31.16b 1374*c0909341SAndroid Build Coastguard Worker.ifc \type, prep 1375*c0909341SAndroid Build Coastguard Worker movi v16.2d, #0 1376*c0909341SAndroid Build Coastguard Worker movi v17.2d, #0 1377*c0909341SAndroid Build Coastguard Worker.else 1378*c0909341SAndroid Build Coastguard Worker mov v16.16b, v6.16b 1379*c0909341SAndroid Build Coastguard Worker mov v17.16b, v6.16b 1380*c0909341SAndroid Build Coastguard Worker.endif 1381*c0909341SAndroid Build Coastguard Worker ldur q25, [\src, #24] 1382*c0909341SAndroid Build Coastguard Worker 1383*c0909341SAndroid Build Coastguard Worker sdot z16.d, z18.h, z4.h[0] 1384*c0909341SAndroid Build Coastguard Worker sdot z17.d, z19.h, z4.h[0] 1385*c0909341SAndroid Build Coastguard Worker 1386*c0909341SAndroid Build Coastguard Worker ldr q27, [\src, #32]! 1387*c0909341SAndroid Build Coastguard Worker 1388*c0909341SAndroid Build Coastguard Worker tbl v22.16b, {v21.16b}, v30.16b 1389*c0909341SAndroid Build Coastguard Worker tbl v23.16b, {v21.16b}, v31.16b 1390*c0909341SAndroid Build Coastguard Worker.ifc \type, prep 1391*c0909341SAndroid Build Coastguard Worker movi v1.2d, #0 1392*c0909341SAndroid Build Coastguard Worker movi v3.2d, #0 1393*c0909341SAndroid Build Coastguard Worker.else 1394*c0909341SAndroid Build Coastguard Worker mov v1.16b, v6.16b 1395*c0909341SAndroid Build Coastguard Worker mov v3.16b, v6.16b 1396*c0909341SAndroid Build Coastguard Worker.endif 1397*c0909341SAndroid Build Coastguard Worker sdot z1.d, z22.h, z4.h[0] 1398*c0909341SAndroid Build Coastguard Worker sdot z3.d, z23.h, z4.h[0] 1399*c0909341SAndroid Build Coastguard Worker 1400*c0909341SAndroid Build Coastguard Worker tbl v24.16b, {v25.16b}, v30.16b 1401*c0909341SAndroid Build Coastguard Worker tbl v25.16b, {v25.16b}, v31.16b 1402*c0909341SAndroid Build Coastguard Worker.ifc \type, prep 1403*c0909341SAndroid Build Coastguard Worker movi v22.2d, #0 1404*c0909341SAndroid Build Coastguard Worker movi v23.2d, #0 1405*c0909341SAndroid Build Coastguard Worker.else 1406*c0909341SAndroid Build Coastguard Worker mov v22.16b, v6.16b 1407*c0909341SAndroid Build Coastguard Worker mov v23.16b, v6.16b 1408*c0909341SAndroid Build Coastguard Worker.endif 1409*c0909341SAndroid Build Coastguard Worker sdot z22.d, z24.h, z4.h[0] 1410*c0909341SAndroid Build Coastguard Worker sdot z23.d, z25.h, z4.h[0] 1411*c0909341SAndroid Build Coastguard Worker 1412*c0909341SAndroid Build Coastguard Worker tbl v20.16b, {v21.16b}, v30.16b 1413*c0909341SAndroid Build Coastguard Worker tbl v21.16b, {v21.16b}, v31.16b 1414*c0909341SAndroid Build Coastguard Worker sdot z0.d, z18.h, z4.h[1] 1415*c0909341SAndroid Build Coastguard Worker sdot z2.d, z19.h, z4.h[1] 1416*c0909341SAndroid Build Coastguard Worker tbl v26.16b, {v27.16b}, v30.16b 1417*c0909341SAndroid Build Coastguard Worker tbl v27.16b, {v27.16b}, v31.16b 1418*c0909341SAndroid Build Coastguard Worker sdot z16.d, z20.h, z4.h[1] 1419*c0909341SAndroid Build Coastguard Worker sdot z17.d, z21.h, z4.h[1] 1420*c0909341SAndroid Build Coastguard Worker 1421*c0909341SAndroid Build Coastguard Worker sdot z1.d, z24.h, z4.h[1] 1422*c0909341SAndroid Build Coastguard Worker sdot z3.d, z25.h, z4.h[1] 1423*c0909341SAndroid Build Coastguard Worker 1424*c0909341SAndroid Build Coastguard Worker sdot z22.d, z26.h, z4.h[1] 1425*c0909341SAndroid Build Coastguard Worker sdot z23.d, z27.h, z4.h[1] 1426*c0909341SAndroid Build Coastguard Worker 1427*c0909341SAndroid Build Coastguard Worker subs w8, w8, #16 1428*c0909341SAndroid Build Coastguard Worker uzp1 v0.4s, v0.4s, v2.4s 1429*c0909341SAndroid Build Coastguard Worker uzp1 v2.4s, v16.4s, v17.4s 1430*c0909341SAndroid Build Coastguard Worker uzp1 v1.4s, v1.4s, v3.4s 1431*c0909341SAndroid Build Coastguard Worker uzp1 v3.4s, v22.4s, v23.4s 1432*c0909341SAndroid Build Coastguard Worker.ifc \type, prep 1433*c0909341SAndroid Build Coastguard Worker srshl v0.4s, v0.4s, v5.4s 1434*c0909341SAndroid Build Coastguard Worker srshl v2.4s, v2.4s, v5.4s 1435*c0909341SAndroid Build Coastguard Worker srshl v1.4s, v1.4s, v5.4s 1436*c0909341SAndroid Build Coastguard Worker srshl v3.4s, v3.4s, v5.4s 1437*c0909341SAndroid Build Coastguard Worker uzp1 v0.8h, v0.8h, v2.8h 1438*c0909341SAndroid Build Coastguard Worker uzp1 v1.8h, v1.8h, v3.8h 1439*c0909341SAndroid Build Coastguard Worker sub z0.h, z0.h, #PREP_BIAS 1440*c0909341SAndroid Build Coastguard Worker sub z1.h, z1.h, #PREP_BIAS 1441*c0909341SAndroid Build Coastguard Worker.else // put 1442*c0909341SAndroid Build Coastguard Worker sqshrun v0.4h, v0.4s, #6 1443*c0909341SAndroid Build Coastguard Worker sqshrun2 v0.8h, v2.4s, #6 1444*c0909341SAndroid Build Coastguard Worker sqshrun v1.4h, v1.4s, #6 1445*c0909341SAndroid Build Coastguard Worker sqshrun2 v1.8h, v3.4s, #6 1446*c0909341SAndroid Build Coastguard Worker umin v0.8h, v0.8h, v5.8h 1447*c0909341SAndroid Build Coastguard Worker umin v1.8h, v1.8h, v5.8h 1448*c0909341SAndroid Build Coastguard Worker.endif 1449*c0909341SAndroid Build Coastguard Worker stp q0, q1, [\dst], #32 1450*c0909341SAndroid Build Coastguard Worker b.gt 32b 1451*c0909341SAndroid Build Coastguard Worker 1452*c0909341SAndroid Build Coastguard Worker add \src, \src, \s_strd 1453*c0909341SAndroid Build Coastguard Worker.ifc \type, put 1454*c0909341SAndroid Build Coastguard Worker add \dst, \dst, \d_strd 1455*c0909341SAndroid Build Coastguard Worker.endif 1456*c0909341SAndroid Build Coastguard Worker subs \h, \h, #1 1457*c0909341SAndroid Build Coastguard Worker mov w8, \w 1458*c0909341SAndroid Build Coastguard Worker b.gt 32b 1459*c0909341SAndroid Build Coastguard Worker ret 1460*c0909341SAndroid Build Coastguard Workerendfunc 1461*c0909341SAndroid Build Coastguard Worker 1462*c0909341SAndroid Build Coastguard Workerjumptable \type\()_8tap_h_\isa\()_tbl 1463*c0909341SAndroid Build Coastguard Worker .word 1280b - \type\()_8tap_h_\isa\()_tbl 1464*c0909341SAndroid Build Coastguard Worker .word 640b - \type\()_8tap_h_\isa\()_tbl 1465*c0909341SAndroid Build Coastguard Worker .word 320b - \type\()_8tap_h_\isa\()_tbl 1466*c0909341SAndroid Build Coastguard Worker .word 160b - \type\()_8tap_h_\isa\()_tbl 1467*c0909341SAndroid Build Coastguard Worker .word 80b - \type\()_8tap_h_\isa\()_tbl 1468*c0909341SAndroid Build Coastguard Worker .word 40b - \type\()_8tap_h_\isa\()_tbl 1469*c0909341SAndroid Build Coastguard Worker.ifc \type, put 1470*c0909341SAndroid Build Coastguard Worker .word 20b - \type\()_8tap_h_\isa\()_tbl 1471*c0909341SAndroid Build Coastguard Worker.endif 1472*c0909341SAndroid Build Coastguard Workerendjumptable 1473*c0909341SAndroid Build Coastguard Worker.endm 1474*c0909341SAndroid Build Coastguard Worker 1475*c0909341SAndroid Build Coastguard Worker 1476*c0909341SAndroid Build Coastguard Workerfunction prep_sve 1477*c0909341SAndroid Build Coastguard Worker movrel x9, prep_tbl 1478*c0909341SAndroid Build Coastguard Worker mov w6, #19 1479*c0909341SAndroid Build Coastguard Worker ldrsw x8, [x9, x8, lsl #2] 1480*c0909341SAndroid Build Coastguard Worker sub w6, w6, w7, lsr #8 // 19 - bdmax / 256 1481*c0909341SAndroid Build Coastguard Worker add x9, x9, x8 1482*c0909341SAndroid Build Coastguard Worker movi v30.8h, #PREP_BIAS_NEG 1483*c0909341SAndroid Build Coastguard Worker dup v29.8h, w6 // 10b: 1 << 4, 12b: 1 << 2 1484*c0909341SAndroid Build Coastguard Worker br x9 1485*c0909341SAndroid Build Coastguard Worker 1486*c0909341SAndroid Build Coastguard Worker .align JUMP_ALIGN 1487*c0909341SAndroid Build Coastguard Worker40: // prep - 4xN 1488*c0909341SAndroid Build Coastguard Worker AARCH64_VALID_JUMP_TARGET 1489*c0909341SAndroid Build Coastguard Worker 1490*c0909341SAndroid Build Coastguard Worker .align LOOP_ALIGN 1491*c0909341SAndroid Build Coastguard Worker4: 1492*c0909341SAndroid Build Coastguard Worker ldr d0, [x1] 1493*c0909341SAndroid Build Coastguard Worker ldr d1, [x1, x2] 1494*c0909341SAndroid Build Coastguard Worker add x1, x1, x2, lsl #1 1495*c0909341SAndroid Build Coastguard Worker subs w4, w4, #2 1496*c0909341SAndroid Build Coastguard Worker mad z0.h, p0/m, z29.h, z30.h 1497*c0909341SAndroid Build Coastguard Worker mad z1.h, p0/m, z29.h, z30.h 1498*c0909341SAndroid Build Coastguard Worker stp d0, d1, [x0], #16 1499*c0909341SAndroid Build Coastguard Worker b.gt 4b 1500*c0909341SAndroid Build Coastguard Worker ret 1501*c0909341SAndroid Build Coastguard Worker 1502*c0909341SAndroid Build Coastguard Worker .align JUMP_ALIGN 1503*c0909341SAndroid Build Coastguard Worker80: // prep - 8xN 1504*c0909341SAndroid Build Coastguard Worker AARCH64_VALID_JUMP_TARGET 1505*c0909341SAndroid Build Coastguard Worker 1506*c0909341SAndroid Build Coastguard Worker .align LOOP_ALIGN 1507*c0909341SAndroid Build Coastguard Worker8: 1508*c0909341SAndroid Build Coastguard Worker ld1 {v0.8h}, [x1], x2 1509*c0909341SAndroid Build Coastguard Worker ld1 {v1.8h}, [x1], x2 1510*c0909341SAndroid Build Coastguard Worker subs w4, w4, #2 1511*c0909341SAndroid Build Coastguard Worker mad z0.h, p0/m, z29.h, z30.h 1512*c0909341SAndroid Build Coastguard Worker mad z1.h, p0/m, z29.h, z30.h 1513*c0909341SAndroid Build Coastguard Worker stp q0, q1, [x0], #32 1514*c0909341SAndroid Build Coastguard Worker b.gt 8b 1515*c0909341SAndroid Build Coastguard Worker ret 1516*c0909341SAndroid Build Coastguard Worker 1517*c0909341SAndroid Build Coastguard Worker .align JUMP_ALIGN 1518*c0909341SAndroid Build Coastguard Worker160: // prep - 16xN 1519*c0909341SAndroid Build Coastguard Worker AARCH64_VALID_JUMP_TARGET 1520*c0909341SAndroid Build Coastguard Worker 1521*c0909341SAndroid Build Coastguard Worker .align LOOP_ALIGN 1522*c0909341SAndroid Build Coastguard Worker16: 1523*c0909341SAndroid Build Coastguard Worker ld1 {v0.8h, v1.8h}, [x1], x2 1524*c0909341SAndroid Build Coastguard Worker mad z0.h, p0/m, z29.h, z30.h 1525*c0909341SAndroid Build Coastguard Worker mad z1.h, p0/m, z29.h, z30.h 1526*c0909341SAndroid Build Coastguard Worker subs w4, w4, #2 1527*c0909341SAndroid Build Coastguard Worker ld1 {v2.8h, v3.8h}, [x1], x2 1528*c0909341SAndroid Build Coastguard Worker mad z2.h, p0/m, z29.h, z30.h 1529*c0909341SAndroid Build Coastguard Worker mad z3.h, p0/m, z29.h, z30.h 1530*c0909341SAndroid Build Coastguard Worker stp q0, q1, [x0] 1531*c0909341SAndroid Build Coastguard Worker stp q2, q3, [x0, #32] 1532*c0909341SAndroid Build Coastguard Worker add x0, x0, #64 1533*c0909341SAndroid Build Coastguard Worker b.gt 16b 1534*c0909341SAndroid Build Coastguard Worker ret 1535*c0909341SAndroid Build Coastguard Worker 1536*c0909341SAndroid Build Coastguard Worker .align JUMP_ALIGN 1537*c0909341SAndroid Build Coastguard Worker320: // prep - 32xN 1538*c0909341SAndroid Build Coastguard Worker AARCH64_VALID_JUMP_TARGET 1539*c0909341SAndroid Build Coastguard Worker 1540*c0909341SAndroid Build Coastguard Worker .align LOOP_ALIGN 1541*c0909341SAndroid Build Coastguard Worker32: 1542*c0909341SAndroid Build Coastguard Worker ldp q0, q1, [x1] 1543*c0909341SAndroid Build Coastguard Worker mad z0.h, p0/m, z29.h, z30.h 1544*c0909341SAndroid Build Coastguard Worker mad z1.h, p0/m, z29.h, z30.h 1545*c0909341SAndroid Build Coastguard Worker ldp q2, q3, [x1, #32] 1546*c0909341SAndroid Build Coastguard Worker subs w4, w4, #1 1547*c0909341SAndroid Build Coastguard Worker mad z2.h, p0/m, z29.h, z30.h 1548*c0909341SAndroid Build Coastguard Worker mad z3.h, p0/m, z29.h, z30.h 1549*c0909341SAndroid Build Coastguard Worker add x1, x1, x2 1550*c0909341SAndroid Build Coastguard Worker stp q0, q1, [x0] 1551*c0909341SAndroid Build Coastguard Worker stp q2, q3, [x0, #32] 1552*c0909341SAndroid Build Coastguard Worker add x0, x0, #64 1553*c0909341SAndroid Build Coastguard Worker b.gt 32b 1554*c0909341SAndroid Build Coastguard Worker ret 1555*c0909341SAndroid Build Coastguard Worker 1556*c0909341SAndroid Build Coastguard Worker .align JUMP_ALIGN 1557*c0909341SAndroid Build Coastguard Worker640: // prep - 64xN 1558*c0909341SAndroid Build Coastguard Worker AARCH64_VALID_JUMP_TARGET 1559*c0909341SAndroid Build Coastguard Worker 1560*c0909341SAndroid Build Coastguard Worker .align LOOP_ALIGN 1561*c0909341SAndroid Build Coastguard Worker64: 1562*c0909341SAndroid Build Coastguard Worker ldp q0, q1, [x1] 1563*c0909341SAndroid Build Coastguard Worker mad z0.h, p0/m, z29.h, z30.h 1564*c0909341SAndroid Build Coastguard Worker mad z1.h, p0/m, z29.h, z30.h 1565*c0909341SAndroid Build Coastguard Worker ldp q2, q3, [x1, #32] 1566*c0909341SAndroid Build Coastguard Worker mad z2.h, p0/m, z29.h, z30.h 1567*c0909341SAndroid Build Coastguard Worker mad z3.h, p0/m, z29.h, z30.h 1568*c0909341SAndroid Build Coastguard Worker ldp q4, q5, [x1, #64] 1569*c0909341SAndroid Build Coastguard Worker mad z4.h, p0/m, z29.h, z30.h 1570*c0909341SAndroid Build Coastguard Worker mad z5.h, p0/m, z29.h, z30.h 1571*c0909341SAndroid Build Coastguard Worker ldp q6, q7, [x1, #96] 1572*c0909341SAndroid Build Coastguard Worker add x1, x1, x2 1573*c0909341SAndroid Build Coastguard Worker subs w4, w4, #1 1574*c0909341SAndroid Build Coastguard Worker mad z6.h, p0/m, z29.h, z30.h 1575*c0909341SAndroid Build Coastguard Worker mad z7.h, p0/m, z29.h, z30.h 1576*c0909341SAndroid Build Coastguard Worker stp q0, q1, [x0] 1577*c0909341SAndroid Build Coastguard Worker stp q2, q3, [x0, #32] 1578*c0909341SAndroid Build Coastguard Worker stp q4, q5, [x0, #64] 1579*c0909341SAndroid Build Coastguard Worker stp q6, q7, [x0, #96] 1580*c0909341SAndroid Build Coastguard Worker add x0, x0, #128 1581*c0909341SAndroid Build Coastguard Worker b.gt 64b 1582*c0909341SAndroid Build Coastguard Worker ret 1583*c0909341SAndroid Build Coastguard Worker 1584*c0909341SAndroid Build Coastguard Worker .align JUMP_ALIGN 1585*c0909341SAndroid Build Coastguard Worker1280: // prep - 128xN 1586*c0909341SAndroid Build Coastguard Worker AARCH64_VALID_JUMP_TARGET 1587*c0909341SAndroid Build Coastguard Worker 1588*c0909341SAndroid Build Coastguard Worker .align LOOP_ALIGN 1589*c0909341SAndroid Build Coastguard Worker128: 1590*c0909341SAndroid Build Coastguard Worker ldp q0, q1, [x1] 1591*c0909341SAndroid Build Coastguard Worker mad z0.h, p0/m, z29.h, z30.h 1592*c0909341SAndroid Build Coastguard Worker mad z1.h, p0/m, z29.h, z30.h 1593*c0909341SAndroid Build Coastguard Worker ldp q2, q3, [x1, #32] 1594*c0909341SAndroid Build Coastguard Worker mad z2.h, p0/m, z29.h, z30.h 1595*c0909341SAndroid Build Coastguard Worker mad z3.h, p0/m, z29.h, z30.h 1596*c0909341SAndroid Build Coastguard Worker ldp q4, q5, [x1, #64] 1597*c0909341SAndroid Build Coastguard Worker mad z4.h, p0/m, z29.h, z30.h 1598*c0909341SAndroid Build Coastguard Worker mad z5.h, p0/m, z29.h, z30.h 1599*c0909341SAndroid Build Coastguard Worker ldp q6, q7, [x1, #96] 1600*c0909341SAndroid Build Coastguard Worker mad z6.h, p0/m, z29.h, z30.h 1601*c0909341SAndroid Build Coastguard Worker mad z7.h, p0/m, z29.h, z30.h 1602*c0909341SAndroid Build Coastguard Worker ldp q16, q17, [x1, #128] 1603*c0909341SAndroid Build Coastguard Worker mad z16.h, p0/m, z29.h, z30.h 1604*c0909341SAndroid Build Coastguard Worker mad z17.h, p0/m, z29.h, z30.h 1605*c0909341SAndroid Build Coastguard Worker ldp q18, q19, [x1, #160] 1606*c0909341SAndroid Build Coastguard Worker mad z18.h, p0/m, z29.h, z30.h 1607*c0909341SAndroid Build Coastguard Worker mad z19.h, p0/m, z29.h, z30.h 1608*c0909341SAndroid Build Coastguard Worker ldp q20, q21, [x1, #192] 1609*c0909341SAndroid Build Coastguard Worker mad z20.h, p0/m, z29.h, z30.h 1610*c0909341SAndroid Build Coastguard Worker mad z21.h, p0/m, z29.h, z30.h 1611*c0909341SAndroid Build Coastguard Worker ldp q22, q23, [x1, #224] 1612*c0909341SAndroid Build Coastguard Worker add x1, x1, x2 1613*c0909341SAndroid Build Coastguard Worker mad z22.h, p0/m, z29.h, z30.h 1614*c0909341SAndroid Build Coastguard Worker mad z23.h, p0/m, z29.h, z30.h 1615*c0909341SAndroid Build Coastguard Worker subs w4, w4, #1 1616*c0909341SAndroid Build Coastguard Worker stp q0, q1, [x0] 1617*c0909341SAndroid Build Coastguard Worker stp q2, q3, [x0, #32] 1618*c0909341SAndroid Build Coastguard Worker stp q4, q5, [x0, #64] 1619*c0909341SAndroid Build Coastguard Worker stp q6, q7, [x0, #96] 1620*c0909341SAndroid Build Coastguard Worker stp q16, q17, [x0, #128] 1621*c0909341SAndroid Build Coastguard Worker stp q18, q19, [x0, #160] 1622*c0909341SAndroid Build Coastguard Worker stp q20, q21, [x0, #192] 1623*c0909341SAndroid Build Coastguard Worker stp q22, q23, [x0, #224] 1624*c0909341SAndroid Build Coastguard Worker add x0, x0, #256 1625*c0909341SAndroid Build Coastguard Worker b.gt 128b 1626*c0909341SAndroid Build Coastguard Worker ret 1627*c0909341SAndroid Build Coastguard Workerendfunc 1628*c0909341SAndroid Build Coastguard Worker 1629*c0909341SAndroid Build Coastguard Workerjumptable prep_tbl 1630*c0909341SAndroid Build Coastguard Worker .word 1280b - prep_tbl 1631*c0909341SAndroid Build Coastguard Worker .word 640b - prep_tbl 1632*c0909341SAndroid Build Coastguard Worker .word 320b - prep_tbl 1633*c0909341SAndroid Build Coastguard Worker .word 160b - prep_tbl 1634*c0909341SAndroid Build Coastguard Worker .word 80b - prep_tbl 1635*c0909341SAndroid Build Coastguard Worker .word 40b - prep_tbl 1636*c0909341SAndroid Build Coastguard Workerendjumptable 1637*c0909341SAndroid Build Coastguard Worker 1638*c0909341SAndroid Build Coastguard Worker 1639*c0909341SAndroid Build Coastguard Worker// dst(x0), d_strd(x9), src(x1), s_strd(x2), w(w3), h(w4), mx(w5), my(w6), bdmax(w7) 1640*c0909341SAndroid Build Coastguard Worker// xmx(x5), xmy(x6), ldst(x5), lsrc(x6), wd_strd(w9), ws_strd(w2) 1641*c0909341SAndroid Build Coastguard Workerfilter_8tap_fn prep, sve2, x0, x9, x1, x2, w3, w4, w5, w6, w7, x5, x6, x5, x6, w9, w2 1642*c0909341SAndroid Build Coastguard Worker 1643*c0909341SAndroid Build Coastguard Worker// dst(x0) d_strd(x1) src(x2) s_strd(x3) w(w4) h(w5) mx(w6) my(w7), bdmax(w8) 1644*c0909341SAndroid Build Coastguard Worker// xmx(x6), xmy(x7), ldst(x6), lsrc(x7), wd_strd(w1), ws_strd(w3) 1645*c0909341SAndroid Build Coastguard Workerfilter_8tap_fn put, sve2, x0, x1, x2, x3, w4, w5, w6, w7, w8, x6, x7, x6, x7, w1, w3 1646*c0909341SAndroid Build Coastguard Worker 1647*c0909341SAndroid Build Coastguard WorkerDISABLE_SVE2 1648*c0909341SAndroid Build Coastguard WorkerDISABLE_SVE 1649*c0909341SAndroid Build Coastguard Worker#endif // HAVE_SVE2 1650