1*c0909341SAndroid Build Coastguard Worker/* 2*c0909341SAndroid Build Coastguard Worker * Copyright © 2018, VideoLAN and dav1d authors 3*c0909341SAndroid Build Coastguard Worker * Copyright © 2018, Janne Grunau 4*c0909341SAndroid Build Coastguard Worker * Copyright © 2018, Martin Storsjo 5*c0909341SAndroid Build Coastguard Worker * All rights reserved. 6*c0909341SAndroid Build Coastguard Worker * 7*c0909341SAndroid Build Coastguard Worker * Redistribution and use in source and binary forms, with or without 8*c0909341SAndroid Build Coastguard Worker * modification, are permitted provided that the following conditions are met: 9*c0909341SAndroid Build Coastguard Worker * 10*c0909341SAndroid Build Coastguard Worker * 1. Redistributions of source code must retain the above copyright notice, this 11*c0909341SAndroid Build Coastguard Worker * list of conditions and the following disclaimer. 12*c0909341SAndroid Build Coastguard Worker * 13*c0909341SAndroid Build Coastguard Worker * 2. Redistributions in binary form must reproduce the above copyright notice, 14*c0909341SAndroid Build Coastguard Worker * this list of conditions and the following disclaimer in the documentation 15*c0909341SAndroid Build Coastguard Worker * and/or other materials provided with the distribution. 16*c0909341SAndroid Build Coastguard Worker * 17*c0909341SAndroid Build Coastguard Worker * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND 18*c0909341SAndroid Build Coastguard Worker * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED 19*c0909341SAndroid Build Coastguard Worker * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 20*c0909341SAndroid Build Coastguard Worker * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR 21*c0909341SAndroid Build Coastguard Worker * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES 22*c0909341SAndroid Build Coastguard Worker * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 23*c0909341SAndroid Build Coastguard Worker * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND 24*c0909341SAndroid Build Coastguard Worker * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 25*c0909341SAndroid Build Coastguard Worker * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 26*c0909341SAndroid Build Coastguard Worker * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 27*c0909341SAndroid Build Coastguard Worker */ 28*c0909341SAndroid Build Coastguard Worker 29*c0909341SAndroid Build Coastguard Worker#include "src/arm/asm.S" 30*c0909341SAndroid Build Coastguard Worker#include "util.S" 31*c0909341SAndroid Build Coastguard Worker 32*c0909341SAndroid Build Coastguard Worker.macro avg dst, t0, t1, t2, t3 33*c0909341SAndroid Build Coastguard Worker ld1 {\t0\().8h,\t1\().8h}, [x2], 32 34*c0909341SAndroid Build Coastguard Worker ld1 {\t2\().8h,\t3\().8h}, [x3], 32 35*c0909341SAndroid Build Coastguard Worker add \t0\().8h, \t0\().8h, \t2\().8h 36*c0909341SAndroid Build Coastguard Worker add \t1\().8h, \t1\().8h, \t3\().8h 37*c0909341SAndroid Build Coastguard Worker sqrshrun \dst\().8b, \t0\().8h, #5 38*c0909341SAndroid Build Coastguard Worker sqrshrun2 \dst\().16b, \t1\().8h, #5 39*c0909341SAndroid Build Coastguard Worker.endm 40*c0909341SAndroid Build Coastguard Worker 41*c0909341SAndroid Build Coastguard Worker.macro w_avg dst, t0, t1, t2, t3 42*c0909341SAndroid Build Coastguard Worker ld1 {\t0\().8h,\t1\().8h}, [x2], 32 43*c0909341SAndroid Build Coastguard Worker ld1 {\t2\().8h,\t3\().8h}, [x3], 32 44*c0909341SAndroid Build Coastguard Worker sub \t0\().8h, \t2\().8h, \t0\().8h 45*c0909341SAndroid Build Coastguard Worker sub \t1\().8h, \t3\().8h, \t1\().8h 46*c0909341SAndroid Build Coastguard Worker sqdmulh \t0\().8h, \t0\().8h, v30.8h 47*c0909341SAndroid Build Coastguard Worker sqdmulh \t1\().8h, \t1\().8h, v30.8h 48*c0909341SAndroid Build Coastguard Worker add \t0\().8h, \t2\().8h, \t0\().8h 49*c0909341SAndroid Build Coastguard Worker add \t1\().8h, \t3\().8h, \t1\().8h 50*c0909341SAndroid Build Coastguard Worker sqrshrun \dst\().8b, \t0\().8h, #4 51*c0909341SAndroid Build Coastguard Worker sqrshrun2 \dst\().16b, \t1\().8h, #4 52*c0909341SAndroid Build Coastguard Worker.endm 53*c0909341SAndroid Build Coastguard Worker 54*c0909341SAndroid Build Coastguard Worker.macro mask dst, t0, t1, t2, t3 55*c0909341SAndroid Build Coastguard Worker ld1 {v30.16b}, [x6], 16 56*c0909341SAndroid Build Coastguard Worker ld1 {\t0\().8h,\t1\().8h}, [x2], 32 57*c0909341SAndroid Build Coastguard Worker mul v30.16b, v30.16b, v31.16b 58*c0909341SAndroid Build Coastguard Worker ld1 {\t2\().8h,\t3\().8h}, [x3], 32 59*c0909341SAndroid Build Coastguard Worker shll v28.8h, v30.8b, #8 60*c0909341SAndroid Build Coastguard Worker shll2 v29.8h, v30.16b, #8 61*c0909341SAndroid Build Coastguard Worker sub \t0\().8h, \t2\().8h, \t0\().8h 62*c0909341SAndroid Build Coastguard Worker sub \t1\().8h, \t3\().8h, \t1\().8h 63*c0909341SAndroid Build Coastguard Worker sqdmulh \t0\().8h, \t0\().8h, v28.8h 64*c0909341SAndroid Build Coastguard Worker sqdmulh \t1\().8h, \t1\().8h, v29.8h 65*c0909341SAndroid Build Coastguard Worker add \t0\().8h, \t2\().8h, \t0\().8h 66*c0909341SAndroid Build Coastguard Worker add \t1\().8h, \t3\().8h, \t1\().8h 67*c0909341SAndroid Build Coastguard Worker sqrshrun \dst\().8b, \t0\().8h, #4 68*c0909341SAndroid Build Coastguard Worker sqrshrun2 \dst\().16b, \t1\().8h, #4 69*c0909341SAndroid Build Coastguard Worker.endm 70*c0909341SAndroid Build Coastguard Worker 71*c0909341SAndroid Build Coastguard Worker.macro bidir_fn type 72*c0909341SAndroid Build Coastguard Workerfunction \type\()_8bpc_neon, export=1 73*c0909341SAndroid Build Coastguard Worker clz w4, w4 74*c0909341SAndroid Build Coastguard Worker.ifc \type, w_avg 75*c0909341SAndroid Build Coastguard Worker dup v30.8h, w6 76*c0909341SAndroid Build Coastguard Worker neg v30.8h, v30.8h 77*c0909341SAndroid Build Coastguard Worker shl v30.8h, v30.8h, #11 78*c0909341SAndroid Build Coastguard Worker.endif 79*c0909341SAndroid Build Coastguard Worker.ifc \type, mask 80*c0909341SAndroid Build Coastguard Worker movi v31.16b, #256-2 81*c0909341SAndroid Build Coastguard Worker.endif 82*c0909341SAndroid Build Coastguard Worker movrel x7, \type\()_tbl 83*c0909341SAndroid Build Coastguard Worker sub w4, w4, #24 84*c0909341SAndroid Build Coastguard Worker ldrsw x4, [x7, x4, lsl #2] 85*c0909341SAndroid Build Coastguard Worker \type v4, v0, v1, v2, v3 86*c0909341SAndroid Build Coastguard Worker add x7, x7, x4 87*c0909341SAndroid Build Coastguard Worker br x7 88*c0909341SAndroid Build Coastguard Worker40: 89*c0909341SAndroid Build Coastguard Worker AARCH64_VALID_JUMP_TARGET 90*c0909341SAndroid Build Coastguard Worker add x7, x0, x1 91*c0909341SAndroid Build Coastguard Worker lsl x1, x1, #1 92*c0909341SAndroid Build Coastguard Worker4: 93*c0909341SAndroid Build Coastguard Worker cmp w5, #4 94*c0909341SAndroid Build Coastguard Worker st1 {v4.s}[0], [x0], x1 95*c0909341SAndroid Build Coastguard Worker st1 {v4.s}[1], [x7], x1 96*c0909341SAndroid Build Coastguard Worker st1 {v4.s}[2], [x0], x1 97*c0909341SAndroid Build Coastguard Worker st1 {v4.s}[3], [x7], x1 98*c0909341SAndroid Build Coastguard Worker b.eq 0f 99*c0909341SAndroid Build Coastguard Worker \type v5, v0, v1, v2, v3 100*c0909341SAndroid Build Coastguard Worker cmp w5, #8 101*c0909341SAndroid Build Coastguard Worker st1 {v5.s}[0], [x0], x1 102*c0909341SAndroid Build Coastguard Worker st1 {v5.s}[1], [x7], x1 103*c0909341SAndroid Build Coastguard Worker st1 {v5.s}[2], [x0], x1 104*c0909341SAndroid Build Coastguard Worker st1 {v5.s}[3], [x7], x1 105*c0909341SAndroid Build Coastguard Worker b.eq 0f 106*c0909341SAndroid Build Coastguard Worker \type v4, v0, v1, v2, v3 107*c0909341SAndroid Build Coastguard Worker st1 {v4.s}[0], [x0], x1 108*c0909341SAndroid Build Coastguard Worker st1 {v4.s}[1], [x7], x1 109*c0909341SAndroid Build Coastguard Worker \type v5, v0, v1, v2, v3 110*c0909341SAndroid Build Coastguard Worker st1 {v4.s}[2], [x0], x1 111*c0909341SAndroid Build Coastguard Worker st1 {v4.s}[3], [x7], x1 112*c0909341SAndroid Build Coastguard Worker st1 {v5.s}[0], [x0], x1 113*c0909341SAndroid Build Coastguard Worker st1 {v5.s}[1], [x7], x1 114*c0909341SAndroid Build Coastguard Worker st1 {v5.s}[2], [x0], x1 115*c0909341SAndroid Build Coastguard Worker st1 {v5.s}[3], [x7], x1 116*c0909341SAndroid Build Coastguard Worker ret 117*c0909341SAndroid Build Coastguard Worker80: 118*c0909341SAndroid Build Coastguard Worker AARCH64_VALID_JUMP_TARGET 119*c0909341SAndroid Build Coastguard Worker add x7, x0, x1 120*c0909341SAndroid Build Coastguard Worker lsl x1, x1, #1 121*c0909341SAndroid Build Coastguard Worker8: 122*c0909341SAndroid Build Coastguard Worker st1 {v4.8b}, [x0], x1 123*c0909341SAndroid Build Coastguard Worker \type v5, v0, v1, v2, v3 124*c0909341SAndroid Build Coastguard Worker st1 {v4.d}[1], [x7], x1 125*c0909341SAndroid Build Coastguard Worker st1 {v5.8b}, [x0], x1 126*c0909341SAndroid Build Coastguard Worker subs w5, w5, #4 127*c0909341SAndroid Build Coastguard Worker st1 {v5.d}[1], [x7], x1 128*c0909341SAndroid Build Coastguard Worker b.le 0f 129*c0909341SAndroid Build Coastguard Worker \type v4, v0, v1, v2, v3 130*c0909341SAndroid Build Coastguard Worker b 8b 131*c0909341SAndroid Build Coastguard Worker160: 132*c0909341SAndroid Build Coastguard Worker AARCH64_VALID_JUMP_TARGET 133*c0909341SAndroid Build Coastguard Worker16: 134*c0909341SAndroid Build Coastguard Worker \type v5, v0, v1, v2, v3 135*c0909341SAndroid Build Coastguard Worker st1 {v4.16b}, [x0], x1 136*c0909341SAndroid Build Coastguard Worker \type v6, v0, v1, v2, v3 137*c0909341SAndroid Build Coastguard Worker st1 {v5.16b}, [x0], x1 138*c0909341SAndroid Build Coastguard Worker \type v7, v0, v1, v2, v3 139*c0909341SAndroid Build Coastguard Worker st1 {v6.16b}, [x0], x1 140*c0909341SAndroid Build Coastguard Worker subs w5, w5, #4 141*c0909341SAndroid Build Coastguard Worker st1 {v7.16b}, [x0], x1 142*c0909341SAndroid Build Coastguard Worker b.le 0f 143*c0909341SAndroid Build Coastguard Worker \type v4, v0, v1, v2, v3 144*c0909341SAndroid Build Coastguard Worker b 16b 145*c0909341SAndroid Build Coastguard Worker320: 146*c0909341SAndroid Build Coastguard Worker AARCH64_VALID_JUMP_TARGET 147*c0909341SAndroid Build Coastguard Worker add x7, x0, x1 148*c0909341SAndroid Build Coastguard Worker lsl x1, x1, #1 149*c0909341SAndroid Build Coastguard Worker32: 150*c0909341SAndroid Build Coastguard Worker \type v5, v0, v1, v2, v3 151*c0909341SAndroid Build Coastguard Worker \type v6, v0, v1, v2, v3 152*c0909341SAndroid Build Coastguard Worker st1 {v4.16b,v5.16b}, [x0], x1 153*c0909341SAndroid Build Coastguard Worker \type v7, v0, v1, v2, v3 154*c0909341SAndroid Build Coastguard Worker subs w5, w5, #2 155*c0909341SAndroid Build Coastguard Worker st1 {v6.16b,v7.16b}, [x7], x1 156*c0909341SAndroid Build Coastguard Worker b.le 0f 157*c0909341SAndroid Build Coastguard Worker \type v4, v0, v1, v2, v3 158*c0909341SAndroid Build Coastguard Worker b 32b 159*c0909341SAndroid Build Coastguard Worker640: 160*c0909341SAndroid Build Coastguard Worker AARCH64_VALID_JUMP_TARGET 161*c0909341SAndroid Build Coastguard Worker add x7, x0, x1 162*c0909341SAndroid Build Coastguard Worker lsl x1, x1, #1 163*c0909341SAndroid Build Coastguard Worker64: 164*c0909341SAndroid Build Coastguard Worker \type v5, v0, v1, v2, v3 165*c0909341SAndroid Build Coastguard Worker \type v6, v0, v1, v2, v3 166*c0909341SAndroid Build Coastguard Worker \type v7, v0, v1, v2, v3 167*c0909341SAndroid Build Coastguard Worker \type v16, v0, v1, v2, v3 168*c0909341SAndroid Build Coastguard Worker \type v17, v0, v1, v2, v3 169*c0909341SAndroid Build Coastguard Worker st1 {v4.16b,v5.16b,v6.16b,v7.16b}, [x0], x1 170*c0909341SAndroid Build Coastguard Worker \type v18, v0, v1, v2, v3 171*c0909341SAndroid Build Coastguard Worker \type v19, v0, v1, v2, v3 172*c0909341SAndroid Build Coastguard Worker subs w5, w5, #2 173*c0909341SAndroid Build Coastguard Worker st1 {v16.16b,v17.16b,v18.16b,v19.16b}, [x7], x1 174*c0909341SAndroid Build Coastguard Worker b.le 0f 175*c0909341SAndroid Build Coastguard Worker \type v4, v0, v1, v2, v3 176*c0909341SAndroid Build Coastguard Worker b 64b 177*c0909341SAndroid Build Coastguard Worker1280: 178*c0909341SAndroid Build Coastguard Worker AARCH64_VALID_JUMP_TARGET 179*c0909341SAndroid Build Coastguard Worker add x7, x0, #64 180*c0909341SAndroid Build Coastguard Worker128: 181*c0909341SAndroid Build Coastguard Worker \type v5, v0, v1, v2, v3 182*c0909341SAndroid Build Coastguard Worker \type v6, v0, v1, v2, v3 183*c0909341SAndroid Build Coastguard Worker \type v7, v0, v1, v2, v3 184*c0909341SAndroid Build Coastguard Worker \type v16, v0, v1, v2, v3 185*c0909341SAndroid Build Coastguard Worker \type v17, v0, v1, v2, v3 186*c0909341SAndroid Build Coastguard Worker st1 {v4.16b,v5.16b,v6.16b,v7.16b}, [x0], x1 187*c0909341SAndroid Build Coastguard Worker \type v18, v0, v1, v2, v3 188*c0909341SAndroid Build Coastguard Worker \type v19, v0, v1, v2, v3 189*c0909341SAndroid Build Coastguard Worker subs w5, w5, #1 190*c0909341SAndroid Build Coastguard Worker st1 {v16.16b,v17.16b,v18.16b,v19.16b}, [x7], x1 191*c0909341SAndroid Build Coastguard Worker b.le 0f 192*c0909341SAndroid Build Coastguard Worker \type v4, v0, v1, v2, v3 193*c0909341SAndroid Build Coastguard Worker b 128b 194*c0909341SAndroid Build Coastguard Worker0: 195*c0909341SAndroid Build Coastguard Worker ret 196*c0909341SAndroid Build Coastguard Workerendfunc 197*c0909341SAndroid Build Coastguard Worker 198*c0909341SAndroid Build Coastguard Workerjumptable \type\()_tbl 199*c0909341SAndroid Build Coastguard Worker .word 1280b - \type\()_tbl 200*c0909341SAndroid Build Coastguard Worker .word 640b - \type\()_tbl 201*c0909341SAndroid Build Coastguard Worker .word 320b - \type\()_tbl 202*c0909341SAndroid Build Coastguard Worker .word 160b - \type\()_tbl 203*c0909341SAndroid Build Coastguard Worker .word 80b - \type\()_tbl 204*c0909341SAndroid Build Coastguard Worker .word 40b - \type\()_tbl 205*c0909341SAndroid Build Coastguard Workerendjumptable 206*c0909341SAndroid Build Coastguard Worker.endm 207*c0909341SAndroid Build Coastguard Worker 208*c0909341SAndroid Build Coastguard Workerbidir_fn avg 209*c0909341SAndroid Build Coastguard Workerbidir_fn w_avg 210*c0909341SAndroid Build Coastguard Workerbidir_fn mask 211*c0909341SAndroid Build Coastguard Worker 212*c0909341SAndroid Build Coastguard Worker 213*c0909341SAndroid Build Coastguard Worker.macro w_mask_fn type 214*c0909341SAndroid Build Coastguard Workerfunction w_mask_\type\()_8bpc_neon, export=1 215*c0909341SAndroid Build Coastguard Worker clz w8, w4 216*c0909341SAndroid Build Coastguard Worker movrel x9, w_mask_\type\()_tbl 217*c0909341SAndroid Build Coastguard Worker sub w8, w8, #24 218*c0909341SAndroid Build Coastguard Worker ldrsw x8, [x9, x8, lsl #2] 219*c0909341SAndroid Build Coastguard Worker add x9, x9, x8 220*c0909341SAndroid Build Coastguard Worker mov w10, #6903 221*c0909341SAndroid Build Coastguard Worker dup v0.8h, w10 222*c0909341SAndroid Build Coastguard Worker.if \type == 444 223*c0909341SAndroid Build Coastguard Worker movi v1.16b, #64 224*c0909341SAndroid Build Coastguard Worker.elseif \type == 422 225*c0909341SAndroid Build Coastguard Worker dup v2.8b, w7 226*c0909341SAndroid Build Coastguard Worker movi v3.8b, #129 227*c0909341SAndroid Build Coastguard Worker sub v3.8b, v3.8b, v2.8b 228*c0909341SAndroid Build Coastguard Worker.elseif \type == 420 229*c0909341SAndroid Build Coastguard Worker dup v2.8h, w7 230*c0909341SAndroid Build Coastguard Worker movi v3.8h, #1, lsl #8 231*c0909341SAndroid Build Coastguard Worker sub v3.8h, v3.8h, v2.8h 232*c0909341SAndroid Build Coastguard Worker.endif 233*c0909341SAndroid Build Coastguard Worker add x12, x0, x1 234*c0909341SAndroid Build Coastguard Worker lsl x1, x1, #1 235*c0909341SAndroid Build Coastguard Worker br x9 236*c0909341SAndroid Build Coastguard Worker40: 237*c0909341SAndroid Build Coastguard Worker AARCH64_VALID_JUMP_TARGET 238*c0909341SAndroid Build Coastguard Worker4: 239*c0909341SAndroid Build Coastguard Worker ld1 {v4.8h, v5.8h}, [x2], #32 // tmp1 (four rows at once) 240*c0909341SAndroid Build Coastguard Worker ld1 {v6.8h, v7.8h}, [x3], #32 // tmp2 (four rows at once) 241*c0909341SAndroid Build Coastguard Worker subs w5, w5, #4 242*c0909341SAndroid Build Coastguard Worker sub v16.8h, v6.8h, v4.8h 243*c0909341SAndroid Build Coastguard Worker sub v17.8h, v7.8h, v5.8h 244*c0909341SAndroid Build Coastguard Worker sabd v18.8h, v4.8h, v6.8h 245*c0909341SAndroid Build Coastguard Worker sabd v19.8h, v5.8h, v7.8h 246*c0909341SAndroid Build Coastguard Worker uqsub v18.8h, v0.8h, v18.8h 247*c0909341SAndroid Build Coastguard Worker uqsub v19.8h, v0.8h, v19.8h 248*c0909341SAndroid Build Coastguard Worker ushr v18.8h, v18.8h, #8 249*c0909341SAndroid Build Coastguard Worker ushr v19.8h, v19.8h, #8 250*c0909341SAndroid Build Coastguard Worker shl v20.8h, v18.8h, #9 251*c0909341SAndroid Build Coastguard Worker shl v21.8h, v19.8h, #9 252*c0909341SAndroid Build Coastguard Worker sqdmulh v20.8h, v20.8h, v16.8h 253*c0909341SAndroid Build Coastguard Worker sqdmulh v21.8h, v21.8h, v17.8h 254*c0909341SAndroid Build Coastguard Worker add v20.8h, v20.8h, v4.8h 255*c0909341SAndroid Build Coastguard Worker add v21.8h, v21.8h, v5.8h 256*c0909341SAndroid Build Coastguard Worker sqrshrun v22.8b, v20.8h, #4 257*c0909341SAndroid Build Coastguard Worker sqrshrun v23.8b, v21.8h, #4 258*c0909341SAndroid Build Coastguard Worker.if \type == 444 259*c0909341SAndroid Build Coastguard Worker uzp1 v18.16b, v18.16b, v19.16b // Same as xtn, xtn2 260*c0909341SAndroid Build Coastguard Worker sub v18.16b, v1.16b, v18.16b 261*c0909341SAndroid Build Coastguard Worker st1 {v18.16b}, [x6], #16 262*c0909341SAndroid Build Coastguard Worker.elseif \type == 422 263*c0909341SAndroid Build Coastguard Worker addp v18.8h, v18.8h, v19.8h 264*c0909341SAndroid Build Coastguard Worker xtn v18.8b, v18.8h 265*c0909341SAndroid Build Coastguard Worker uhsub v18.8b, v3.8b, v18.8b 266*c0909341SAndroid Build Coastguard Worker st1 {v18.8b}, [x6], #8 267*c0909341SAndroid Build Coastguard Worker.elseif \type == 420 268*c0909341SAndroid Build Coastguard Worker trn1 v24.2d, v18.2d, v19.2d 269*c0909341SAndroid Build Coastguard Worker trn2 v25.2d, v18.2d, v19.2d 270*c0909341SAndroid Build Coastguard Worker add v24.8h, v24.8h, v25.8h 271*c0909341SAndroid Build Coastguard Worker addp v18.8h, v24.8h, v24.8h 272*c0909341SAndroid Build Coastguard Worker sub v18.4h, v3.4h, v18.4h 273*c0909341SAndroid Build Coastguard Worker rshrn v18.8b, v18.8h, #2 274*c0909341SAndroid Build Coastguard Worker str s18, [x6], #4 275*c0909341SAndroid Build Coastguard Worker.endif 276*c0909341SAndroid Build Coastguard Worker st1 {v22.s}[0], [x0], x1 277*c0909341SAndroid Build Coastguard Worker st1 {v22.s}[1], [x12], x1 278*c0909341SAndroid Build Coastguard Worker st1 {v23.s}[0], [x0], x1 279*c0909341SAndroid Build Coastguard Worker st1 {v23.s}[1], [x12], x1 280*c0909341SAndroid Build Coastguard Worker b.gt 4b 281*c0909341SAndroid Build Coastguard Worker ret 282*c0909341SAndroid Build Coastguard Worker80: 283*c0909341SAndroid Build Coastguard Worker AARCH64_VALID_JUMP_TARGET 284*c0909341SAndroid Build Coastguard Worker8: 285*c0909341SAndroid Build Coastguard Worker ld1 {v4.8h, v5.8h}, [x2], #32 286*c0909341SAndroid Build Coastguard Worker ld1 {v6.8h, v7.8h}, [x3], #32 287*c0909341SAndroid Build Coastguard Worker subs w5, w5, #2 288*c0909341SAndroid Build Coastguard Worker sub v16.8h, v6.8h, v4.8h 289*c0909341SAndroid Build Coastguard Worker sub v17.8h, v7.8h, v5.8h 290*c0909341SAndroid Build Coastguard Worker sabd v18.8h, v4.8h, v6.8h 291*c0909341SAndroid Build Coastguard Worker sabd v19.8h, v5.8h, v7.8h 292*c0909341SAndroid Build Coastguard Worker uqsub v18.8h, v0.8h, v18.8h 293*c0909341SAndroid Build Coastguard Worker uqsub v19.8h, v0.8h, v19.8h 294*c0909341SAndroid Build Coastguard Worker ushr v18.8h, v18.8h, #8 295*c0909341SAndroid Build Coastguard Worker ushr v19.8h, v19.8h, #8 296*c0909341SAndroid Build Coastguard Worker shl v20.8h, v18.8h, #9 297*c0909341SAndroid Build Coastguard Worker shl v21.8h, v19.8h, #9 298*c0909341SAndroid Build Coastguard Worker sqdmulh v20.8h, v20.8h, v16.8h 299*c0909341SAndroid Build Coastguard Worker sqdmulh v21.8h, v21.8h, v17.8h 300*c0909341SAndroid Build Coastguard Worker add v20.8h, v20.8h, v4.8h 301*c0909341SAndroid Build Coastguard Worker add v21.8h, v21.8h, v5.8h 302*c0909341SAndroid Build Coastguard Worker sqrshrun v22.8b, v20.8h, #4 303*c0909341SAndroid Build Coastguard Worker sqrshrun v23.8b, v21.8h, #4 304*c0909341SAndroid Build Coastguard Worker.if \type == 444 305*c0909341SAndroid Build Coastguard Worker uzp1 v18.16b, v18.16b, v19.16b // Same as xtn, xtn2 306*c0909341SAndroid Build Coastguard Worker sub v18.16b, v1.16b, v18.16b 307*c0909341SAndroid Build Coastguard Worker st1 {v18.16b}, [x6], #16 308*c0909341SAndroid Build Coastguard Worker.elseif \type == 422 309*c0909341SAndroid Build Coastguard Worker addp v18.8h, v18.8h, v19.8h 310*c0909341SAndroid Build Coastguard Worker xtn v18.8b, v18.8h 311*c0909341SAndroid Build Coastguard Worker uhsub v18.8b, v3.8b, v18.8b 312*c0909341SAndroid Build Coastguard Worker st1 {v18.8b}, [x6], #8 313*c0909341SAndroid Build Coastguard Worker.elseif \type == 420 314*c0909341SAndroid Build Coastguard Worker add v18.8h, v18.8h, v19.8h 315*c0909341SAndroid Build Coastguard Worker addp v18.8h, v18.8h, v18.8h 316*c0909341SAndroid Build Coastguard Worker sub v18.4h, v3.4h, v18.4h 317*c0909341SAndroid Build Coastguard Worker rshrn v18.8b, v18.8h, #2 318*c0909341SAndroid Build Coastguard Worker str s18, [x6], #4 319*c0909341SAndroid Build Coastguard Worker.endif 320*c0909341SAndroid Build Coastguard Worker st1 {v22.8b}, [x0], x1 321*c0909341SAndroid Build Coastguard Worker st1 {v23.8b}, [x12], x1 322*c0909341SAndroid Build Coastguard Worker b.gt 8b 323*c0909341SAndroid Build Coastguard Worker ret 324*c0909341SAndroid Build Coastguard Worker1280: 325*c0909341SAndroid Build Coastguard Worker640: 326*c0909341SAndroid Build Coastguard Worker320: 327*c0909341SAndroid Build Coastguard Worker160: 328*c0909341SAndroid Build Coastguard Worker AARCH64_VALID_JUMP_TARGET 329*c0909341SAndroid Build Coastguard Worker mov w11, w4 330*c0909341SAndroid Build Coastguard Worker sub x1, x1, w4, uxtw 331*c0909341SAndroid Build Coastguard Worker.if \type == 444 332*c0909341SAndroid Build Coastguard Worker add x10, x6, w4, uxtw 333*c0909341SAndroid Build Coastguard Worker.elseif \type == 422 334*c0909341SAndroid Build Coastguard Worker add x10, x6, x11, lsr #1 335*c0909341SAndroid Build Coastguard Worker.endif 336*c0909341SAndroid Build Coastguard Worker add x9, x3, w4, uxtw #1 337*c0909341SAndroid Build Coastguard Worker add x7, x2, w4, uxtw #1 338*c0909341SAndroid Build Coastguard Worker161: 339*c0909341SAndroid Build Coastguard Worker mov w8, w4 340*c0909341SAndroid Build Coastguard Worker16: 341*c0909341SAndroid Build Coastguard Worker ld1 {v4.8h, v5.8h}, [x2], #32 342*c0909341SAndroid Build Coastguard Worker ld1 {v6.8h, v7.8h}, [x3], #32 343*c0909341SAndroid Build Coastguard Worker ld1 {v16.8h, v17.8h}, [x7], #32 344*c0909341SAndroid Build Coastguard Worker ld1 {v18.8h, v19.8h}, [x9], #32 345*c0909341SAndroid Build Coastguard Worker subs w8, w8, #16 346*c0909341SAndroid Build Coastguard Worker sub v6.8h, v6.8h, v4.8h 347*c0909341SAndroid Build Coastguard Worker sub v7.8h, v7.8h, v5.8h 348*c0909341SAndroid Build Coastguard Worker sub v18.8h, v18.8h, v16.8h 349*c0909341SAndroid Build Coastguard Worker sub v19.8h, v19.8h, v17.8h 350*c0909341SAndroid Build Coastguard Worker abs v20.8h, v6.8h 351*c0909341SAndroid Build Coastguard Worker abs v21.8h, v7.8h 352*c0909341SAndroid Build Coastguard Worker abs v22.8h, v18.8h 353*c0909341SAndroid Build Coastguard Worker abs v23.8h, v19.8h 354*c0909341SAndroid Build Coastguard Worker uqsub v20.8h, v0.8h, v20.8h 355*c0909341SAndroid Build Coastguard Worker uqsub v21.8h, v0.8h, v21.8h 356*c0909341SAndroid Build Coastguard Worker uqsub v22.8h, v0.8h, v22.8h 357*c0909341SAndroid Build Coastguard Worker uqsub v23.8h, v0.8h, v23.8h 358*c0909341SAndroid Build Coastguard Worker ushr v20.8h, v20.8h, #8 359*c0909341SAndroid Build Coastguard Worker ushr v21.8h, v21.8h, #8 360*c0909341SAndroid Build Coastguard Worker ushr v22.8h, v22.8h, #8 361*c0909341SAndroid Build Coastguard Worker ushr v23.8h, v23.8h, #8 362*c0909341SAndroid Build Coastguard Worker shl v24.8h, v20.8h, #9 363*c0909341SAndroid Build Coastguard Worker shl v25.8h, v21.8h, #9 364*c0909341SAndroid Build Coastguard Worker shl v26.8h, v22.8h, #9 365*c0909341SAndroid Build Coastguard Worker shl v27.8h, v23.8h, #9 366*c0909341SAndroid Build Coastguard Worker sqdmulh v24.8h, v24.8h, v6.8h 367*c0909341SAndroid Build Coastguard Worker sqdmulh v25.8h, v25.8h, v7.8h 368*c0909341SAndroid Build Coastguard Worker sqdmulh v26.8h, v26.8h, v18.8h 369*c0909341SAndroid Build Coastguard Worker sqdmulh v27.8h, v27.8h, v19.8h 370*c0909341SAndroid Build Coastguard Worker add v24.8h, v24.8h, v4.8h 371*c0909341SAndroid Build Coastguard Worker add v25.8h, v25.8h, v5.8h 372*c0909341SAndroid Build Coastguard Worker add v26.8h, v26.8h, v16.8h 373*c0909341SAndroid Build Coastguard Worker add v27.8h, v27.8h, v17.8h 374*c0909341SAndroid Build Coastguard Worker sqrshrun v24.8b, v24.8h, #4 375*c0909341SAndroid Build Coastguard Worker sqrshrun v25.8b, v25.8h, #4 376*c0909341SAndroid Build Coastguard Worker sqrshrun v26.8b, v26.8h, #4 377*c0909341SAndroid Build Coastguard Worker sqrshrun v27.8b, v27.8h, #4 378*c0909341SAndroid Build Coastguard Worker.if \type == 444 379*c0909341SAndroid Build Coastguard Worker uzp1 v20.16b, v20.16b, v21.16b // Same as xtn, xtn2 380*c0909341SAndroid Build Coastguard Worker uzp1 v21.16b, v22.16b, v23.16b // Ditto 381*c0909341SAndroid Build Coastguard Worker sub v20.16b, v1.16b, v20.16b 382*c0909341SAndroid Build Coastguard Worker sub v21.16b, v1.16b, v21.16b 383*c0909341SAndroid Build Coastguard Worker st1 {v20.16b}, [x6], #16 384*c0909341SAndroid Build Coastguard Worker st1 {v21.16b}, [x10], #16 385*c0909341SAndroid Build Coastguard Worker.elseif \type == 422 386*c0909341SAndroid Build Coastguard Worker addp v20.8h, v20.8h, v21.8h 387*c0909341SAndroid Build Coastguard Worker addp v21.8h, v22.8h, v23.8h 388*c0909341SAndroid Build Coastguard Worker xtn v20.8b, v20.8h 389*c0909341SAndroid Build Coastguard Worker xtn v21.8b, v21.8h 390*c0909341SAndroid Build Coastguard Worker uhsub v20.8b, v3.8b, v20.8b 391*c0909341SAndroid Build Coastguard Worker uhsub v21.8b, v3.8b, v21.8b 392*c0909341SAndroid Build Coastguard Worker st1 {v20.8b}, [x6], #8 393*c0909341SAndroid Build Coastguard Worker st1 {v21.8b}, [x10], #8 394*c0909341SAndroid Build Coastguard Worker.elseif \type == 420 395*c0909341SAndroid Build Coastguard Worker add v20.8h, v20.8h, v22.8h 396*c0909341SAndroid Build Coastguard Worker add v21.8h, v21.8h, v23.8h 397*c0909341SAndroid Build Coastguard Worker addp v20.8h, v20.8h, v21.8h 398*c0909341SAndroid Build Coastguard Worker sub v20.8h, v3.8h, v20.8h 399*c0909341SAndroid Build Coastguard Worker rshrn v20.8b, v20.8h, #2 400*c0909341SAndroid Build Coastguard Worker st1 {v20.8b}, [x6], #8 401*c0909341SAndroid Build Coastguard Worker.endif 402*c0909341SAndroid Build Coastguard Worker st1 {v24.8b, v25.8b}, [x0], #16 403*c0909341SAndroid Build Coastguard Worker st1 {v26.8b, v27.8b}, [x12], #16 404*c0909341SAndroid Build Coastguard Worker b.gt 16b 405*c0909341SAndroid Build Coastguard Worker subs w5, w5, #2 406*c0909341SAndroid Build Coastguard Worker add x2, x2, w4, uxtw #1 407*c0909341SAndroid Build Coastguard Worker add x3, x3, w4, uxtw #1 408*c0909341SAndroid Build Coastguard Worker add x7, x7, w4, uxtw #1 409*c0909341SAndroid Build Coastguard Worker add x9, x9, w4, uxtw #1 410*c0909341SAndroid Build Coastguard Worker.if \type == 444 411*c0909341SAndroid Build Coastguard Worker add x6, x6, w4, uxtw 412*c0909341SAndroid Build Coastguard Worker add x10, x10, w4, uxtw 413*c0909341SAndroid Build Coastguard Worker.elseif \type == 422 414*c0909341SAndroid Build Coastguard Worker add x6, x6, x11, lsr #1 415*c0909341SAndroid Build Coastguard Worker add x10, x10, x11, lsr #1 416*c0909341SAndroid Build Coastguard Worker.endif 417*c0909341SAndroid Build Coastguard Worker add x0, x0, x1 418*c0909341SAndroid Build Coastguard Worker add x12, x12, x1 419*c0909341SAndroid Build Coastguard Worker b.gt 161b 420*c0909341SAndroid Build Coastguard Worker ret 421*c0909341SAndroid Build Coastguard Workerendfunc 422*c0909341SAndroid Build Coastguard Worker 423*c0909341SAndroid Build Coastguard Workerjumptable w_mask_\type\()_tbl 424*c0909341SAndroid Build Coastguard Worker .word 1280b - w_mask_\type\()_tbl 425*c0909341SAndroid Build Coastguard Worker .word 640b - w_mask_\type\()_tbl 426*c0909341SAndroid Build Coastguard Worker .word 320b - w_mask_\type\()_tbl 427*c0909341SAndroid Build Coastguard Worker .word 160b - w_mask_\type\()_tbl 428*c0909341SAndroid Build Coastguard Worker .word 80b - w_mask_\type\()_tbl 429*c0909341SAndroid Build Coastguard Worker .word 40b - w_mask_\type\()_tbl 430*c0909341SAndroid Build Coastguard Workerendjumptable 431*c0909341SAndroid Build Coastguard Worker.endm 432*c0909341SAndroid Build Coastguard Worker 433*c0909341SAndroid Build Coastguard Workerw_mask_fn 444 434*c0909341SAndroid Build Coastguard Workerw_mask_fn 422 435*c0909341SAndroid Build Coastguard Workerw_mask_fn 420 436*c0909341SAndroid Build Coastguard Worker 437*c0909341SAndroid Build Coastguard Worker 438*c0909341SAndroid Build Coastguard Workerfunction blend_8bpc_neon, export=1 439*c0909341SAndroid Build Coastguard Worker movrel x6, blend_tbl 440*c0909341SAndroid Build Coastguard Worker clz w3, w3 441*c0909341SAndroid Build Coastguard Worker sub w3, w3, #26 442*c0909341SAndroid Build Coastguard Worker ldrsw x3, [x6, x3, lsl #2] 443*c0909341SAndroid Build Coastguard Worker add x6, x6, x3 444*c0909341SAndroid Build Coastguard Worker movi v4.16b, #64 445*c0909341SAndroid Build Coastguard Worker add x8, x0, x1 446*c0909341SAndroid Build Coastguard Worker lsl x1, x1, #1 447*c0909341SAndroid Build Coastguard Worker br x6 448*c0909341SAndroid Build Coastguard Worker40: 449*c0909341SAndroid Build Coastguard Worker AARCH64_VALID_JUMP_TARGET 450*c0909341SAndroid Build Coastguard Worker4: 451*c0909341SAndroid Build Coastguard Worker ld1 {v2.8b}, [x5], #8 452*c0909341SAndroid Build Coastguard Worker ldr d1, [x2], #8 453*c0909341SAndroid Build Coastguard Worker ldr s0, [x0] 454*c0909341SAndroid Build Coastguard Worker subs w4, w4, #2 455*c0909341SAndroid Build Coastguard Worker ld1 {v0.s}[1], [x8] 456*c0909341SAndroid Build Coastguard Worker sub v3.8b, v4.8b, v2.8b 457*c0909341SAndroid Build Coastguard Worker umull v5.8h, v1.8b, v2.8b 458*c0909341SAndroid Build Coastguard Worker umlal v5.8h, v0.8b, v3.8b 459*c0909341SAndroid Build Coastguard Worker rshrn v6.8b, v5.8h, #6 460*c0909341SAndroid Build Coastguard Worker st1 {v6.s}[0], [x0], x1 461*c0909341SAndroid Build Coastguard Worker st1 {v6.s}[1], [x8], x1 462*c0909341SAndroid Build Coastguard Worker b.gt 4b 463*c0909341SAndroid Build Coastguard Worker ret 464*c0909341SAndroid Build Coastguard Worker80: 465*c0909341SAndroid Build Coastguard Worker AARCH64_VALID_JUMP_TARGET 466*c0909341SAndroid Build Coastguard Worker8: 467*c0909341SAndroid Build Coastguard Worker ld1 {v2.16b}, [x5], #16 468*c0909341SAndroid Build Coastguard Worker ld1 {v1.16b}, [x2], #16 469*c0909341SAndroid Build Coastguard Worker ldr d0, [x0] 470*c0909341SAndroid Build Coastguard Worker ld1 {v0.d}[1], [x8] 471*c0909341SAndroid Build Coastguard Worker sub v3.16b, v4.16b, v2.16b 472*c0909341SAndroid Build Coastguard Worker subs w4, w4, #2 473*c0909341SAndroid Build Coastguard Worker umull v5.8h, v1.8b, v2.8b 474*c0909341SAndroid Build Coastguard Worker umlal v5.8h, v0.8b, v3.8b 475*c0909341SAndroid Build Coastguard Worker umull2 v6.8h, v1.16b, v2.16b 476*c0909341SAndroid Build Coastguard Worker umlal2 v6.8h, v0.16b, v3.16b 477*c0909341SAndroid Build Coastguard Worker rshrn v7.8b, v5.8h, #6 478*c0909341SAndroid Build Coastguard Worker rshrn v16.8b, v6.8h, #6 479*c0909341SAndroid Build Coastguard Worker st1 {v7.8b}, [x0], x1 480*c0909341SAndroid Build Coastguard Worker st1 {v16.8b}, [x8], x1 481*c0909341SAndroid Build Coastguard Worker b.gt 8b 482*c0909341SAndroid Build Coastguard Worker ret 483*c0909341SAndroid Build Coastguard Worker160: 484*c0909341SAndroid Build Coastguard Worker AARCH64_VALID_JUMP_TARGET 485*c0909341SAndroid Build Coastguard Worker16: 486*c0909341SAndroid Build Coastguard Worker ld1 {v1.16b, v2.16b}, [x5], #32 487*c0909341SAndroid Build Coastguard Worker ld1 {v5.16b, v6.16b}, [x2], #32 488*c0909341SAndroid Build Coastguard Worker ld1 {v0.16b}, [x0] 489*c0909341SAndroid Build Coastguard Worker subs w4, w4, #2 490*c0909341SAndroid Build Coastguard Worker sub v7.16b, v4.16b, v1.16b 491*c0909341SAndroid Build Coastguard Worker sub v20.16b, v4.16b, v2.16b 492*c0909341SAndroid Build Coastguard Worker ld1 {v3.16b}, [x8] 493*c0909341SAndroid Build Coastguard Worker umull v16.8h, v5.8b, v1.8b 494*c0909341SAndroid Build Coastguard Worker umlal v16.8h, v0.8b, v7.8b 495*c0909341SAndroid Build Coastguard Worker umull2 v17.8h, v5.16b, v1.16b 496*c0909341SAndroid Build Coastguard Worker umlal2 v17.8h, v0.16b, v7.16b 497*c0909341SAndroid Build Coastguard Worker umull v21.8h, v6.8b, v2.8b 498*c0909341SAndroid Build Coastguard Worker umlal v21.8h, v3.8b, v20.8b 499*c0909341SAndroid Build Coastguard Worker umull2 v22.8h, v6.16b, v2.16b 500*c0909341SAndroid Build Coastguard Worker umlal2 v22.8h, v3.16b, v20.16b 501*c0909341SAndroid Build Coastguard Worker rshrn v18.8b, v16.8h, #6 502*c0909341SAndroid Build Coastguard Worker rshrn2 v18.16b, v17.8h, #6 503*c0909341SAndroid Build Coastguard Worker rshrn v19.8b, v21.8h, #6 504*c0909341SAndroid Build Coastguard Worker rshrn2 v19.16b, v22.8h, #6 505*c0909341SAndroid Build Coastguard Worker st1 {v18.16b}, [x0], x1 506*c0909341SAndroid Build Coastguard Worker st1 {v19.16b}, [x8], x1 507*c0909341SAndroid Build Coastguard Worker b.gt 16b 508*c0909341SAndroid Build Coastguard Worker ret 509*c0909341SAndroid Build Coastguard Worker320: 510*c0909341SAndroid Build Coastguard Worker AARCH64_VALID_JUMP_TARGET 511*c0909341SAndroid Build Coastguard Worker32: 512*c0909341SAndroid Build Coastguard Worker ld1 {v0.16b, v1.16b, v2.16b, v3.16b}, [x5], #64 513*c0909341SAndroid Build Coastguard Worker ld1 {v16.16b, v17.16b, v18.16b, v19.16b}, [x2], #64 514*c0909341SAndroid Build Coastguard Worker ld1 {v20.16b, v21.16b}, [x0] 515*c0909341SAndroid Build Coastguard Worker subs w4, w4, #2 516*c0909341SAndroid Build Coastguard Worker ld1 {v22.16b, v23.16b}, [x8] 517*c0909341SAndroid Build Coastguard Worker sub v5.16b, v4.16b, v0.16b 518*c0909341SAndroid Build Coastguard Worker sub v6.16b, v4.16b, v1.16b 519*c0909341SAndroid Build Coastguard Worker sub v30.16b, v4.16b, v2.16b 520*c0909341SAndroid Build Coastguard Worker sub v31.16b, v4.16b, v3.16b 521*c0909341SAndroid Build Coastguard Worker umull v24.8h, v16.8b, v0.8b 522*c0909341SAndroid Build Coastguard Worker umlal v24.8h, v20.8b, v5.8b 523*c0909341SAndroid Build Coastguard Worker umull2 v26.8h, v16.16b, v0.16b 524*c0909341SAndroid Build Coastguard Worker umlal2 v26.8h, v20.16b, v5.16b 525*c0909341SAndroid Build Coastguard Worker umull v28.8h, v17.8b, v1.8b 526*c0909341SAndroid Build Coastguard Worker umlal v28.8h, v21.8b, v6.8b 527*c0909341SAndroid Build Coastguard Worker umull2 v7.8h, v17.16b, v1.16b 528*c0909341SAndroid Build Coastguard Worker umlal2 v7.8h, v21.16b, v6.16b 529*c0909341SAndroid Build Coastguard Worker umull v27.8h, v18.8b, v2.8b 530*c0909341SAndroid Build Coastguard Worker umlal v27.8h, v22.8b, v30.8b 531*c0909341SAndroid Build Coastguard Worker umull2 v1.8h, v18.16b, v2.16b 532*c0909341SAndroid Build Coastguard Worker umlal2 v1.8h, v22.16b, v30.16b 533*c0909341SAndroid Build Coastguard Worker umull v29.8h, v19.8b, v3.8b 534*c0909341SAndroid Build Coastguard Worker umlal v29.8h, v23.8b, v31.8b 535*c0909341SAndroid Build Coastguard Worker umull2 v21.8h, v19.16b, v3.16b 536*c0909341SAndroid Build Coastguard Worker umlal2 v21.8h, v23.16b, v31.16b 537*c0909341SAndroid Build Coastguard Worker rshrn v24.8b, v24.8h, #6 538*c0909341SAndroid Build Coastguard Worker rshrn2 v24.16b, v26.8h, #6 539*c0909341SAndroid Build Coastguard Worker rshrn v25.8b, v28.8h, #6 540*c0909341SAndroid Build Coastguard Worker rshrn2 v25.16b, v7.8h, #6 541*c0909341SAndroid Build Coastguard Worker rshrn v27.8b, v27.8h, #6 542*c0909341SAndroid Build Coastguard Worker rshrn2 v27.16b, v1.8h, #6 543*c0909341SAndroid Build Coastguard Worker rshrn v28.8b, v29.8h, #6 544*c0909341SAndroid Build Coastguard Worker rshrn2 v28.16b, v21.8h, #6 545*c0909341SAndroid Build Coastguard Worker st1 {v24.16b, v25.16b}, [x0], x1 546*c0909341SAndroid Build Coastguard Worker st1 {v27.16b, v28.16b}, [x8], x1 547*c0909341SAndroid Build Coastguard Worker b.gt 32b 548*c0909341SAndroid Build Coastguard Worker ret 549*c0909341SAndroid Build Coastguard Workerendfunc 550*c0909341SAndroid Build Coastguard Worker 551*c0909341SAndroid Build Coastguard Workerjumptable blend_tbl 552*c0909341SAndroid Build Coastguard Worker .word 320b - blend_tbl 553*c0909341SAndroid Build Coastguard Worker .word 160b - blend_tbl 554*c0909341SAndroid Build Coastguard Worker .word 80b - blend_tbl 555*c0909341SAndroid Build Coastguard Worker .word 40b - blend_tbl 556*c0909341SAndroid Build Coastguard Workerendjumptable 557*c0909341SAndroid Build Coastguard Worker 558*c0909341SAndroid Build Coastguard Workerfunction blend_h_8bpc_neon, export=1 559*c0909341SAndroid Build Coastguard Worker movrel x6, blend_h_tbl 560*c0909341SAndroid Build Coastguard Worker movrel x5, X(obmc_masks) 561*c0909341SAndroid Build Coastguard Worker add x5, x5, w4, uxtw 562*c0909341SAndroid Build Coastguard Worker sub w4, w4, w4, lsr #2 563*c0909341SAndroid Build Coastguard Worker clz w7, w3 564*c0909341SAndroid Build Coastguard Worker movi v4.16b, #64 565*c0909341SAndroid Build Coastguard Worker add x8, x0, x1 566*c0909341SAndroid Build Coastguard Worker lsl x1, x1, #1 567*c0909341SAndroid Build Coastguard Worker sub w7, w7, #24 568*c0909341SAndroid Build Coastguard Worker ldrsw x7, [x6, x7, lsl #2] 569*c0909341SAndroid Build Coastguard Worker add x6, x6, x7 570*c0909341SAndroid Build Coastguard Worker br x6 571*c0909341SAndroid Build Coastguard Worker20: 572*c0909341SAndroid Build Coastguard Worker AARCH64_VALID_JUMP_TARGET 573*c0909341SAndroid Build Coastguard Worker2: 574*c0909341SAndroid Build Coastguard Worker ldr h0, [x5], #2 575*c0909341SAndroid Build Coastguard Worker ldr s1, [x2], #4 576*c0909341SAndroid Build Coastguard Worker subs w4, w4, #2 577*c0909341SAndroid Build Coastguard Worker ldr h2, [x0] 578*c0909341SAndroid Build Coastguard Worker zip1 v0.8b, v0.8b, v0.8b 579*c0909341SAndroid Build Coastguard Worker sub v3.8b, v4.8b, v0.8b 580*c0909341SAndroid Build Coastguard Worker ld1 {v2.h}[1], [x8] 581*c0909341SAndroid Build Coastguard Worker umull v5.8h, v1.8b, v0.8b 582*c0909341SAndroid Build Coastguard Worker umlal v5.8h, v2.8b, v3.8b 583*c0909341SAndroid Build Coastguard Worker rshrn v5.8b, v5.8h, #6 584*c0909341SAndroid Build Coastguard Worker st1 {v5.h}[0], [x0], x1 585*c0909341SAndroid Build Coastguard Worker st1 {v5.h}[1], [x8], x1 586*c0909341SAndroid Build Coastguard Worker b.gt 2b 587*c0909341SAndroid Build Coastguard Worker ret 588*c0909341SAndroid Build Coastguard Worker40: 589*c0909341SAndroid Build Coastguard Worker AARCH64_VALID_JUMP_TARGET 590*c0909341SAndroid Build Coastguard Worker4: 591*c0909341SAndroid Build Coastguard Worker ld2r {v0.8b, v1.8b}, [x5], #2 592*c0909341SAndroid Build Coastguard Worker ld1 {v2.8b}, [x2], #8 593*c0909341SAndroid Build Coastguard Worker subs w4, w4, #2 594*c0909341SAndroid Build Coastguard Worker ext v0.8b, v0.8b, v1.8b, #4 595*c0909341SAndroid Build Coastguard Worker ldr s3, [x0] 596*c0909341SAndroid Build Coastguard Worker sub v5.8b, v4.8b, v0.8b 597*c0909341SAndroid Build Coastguard Worker ld1 {v3.s}[1], [x8] 598*c0909341SAndroid Build Coastguard Worker umull v6.8h, v2.8b, v0.8b 599*c0909341SAndroid Build Coastguard Worker umlal v6.8h, v3.8b, v5.8b 600*c0909341SAndroid Build Coastguard Worker rshrn v6.8b, v6.8h, #6 601*c0909341SAndroid Build Coastguard Worker st1 {v6.s}[0], [x0], x1 602*c0909341SAndroid Build Coastguard Worker st1 {v6.s}[1], [x8], x1 603*c0909341SAndroid Build Coastguard Worker b.gt 4b 604*c0909341SAndroid Build Coastguard Worker ret 605*c0909341SAndroid Build Coastguard Worker80: 606*c0909341SAndroid Build Coastguard Worker AARCH64_VALID_JUMP_TARGET 607*c0909341SAndroid Build Coastguard Worker8: 608*c0909341SAndroid Build Coastguard Worker ld2r {v0.16b, v1.16b}, [x5], #2 609*c0909341SAndroid Build Coastguard Worker ld1 {v2.16b}, [x2], #16 610*c0909341SAndroid Build Coastguard Worker ldr d3, [x0] 611*c0909341SAndroid Build Coastguard Worker ext v0.16b, v0.16b, v1.16b, #8 612*c0909341SAndroid Build Coastguard Worker sub v5.16b, v4.16b, v0.16b 613*c0909341SAndroid Build Coastguard Worker ld1 {v3.d}[1], [x8] 614*c0909341SAndroid Build Coastguard Worker subs w4, w4, #2 615*c0909341SAndroid Build Coastguard Worker umull v6.8h, v0.8b, v2.8b 616*c0909341SAndroid Build Coastguard Worker umlal v6.8h, v3.8b, v5.8b 617*c0909341SAndroid Build Coastguard Worker umull2 v7.8h, v0.16b, v2.16b 618*c0909341SAndroid Build Coastguard Worker umlal2 v7.8h, v3.16b, v5.16b 619*c0909341SAndroid Build Coastguard Worker rshrn v16.8b, v6.8h, #6 620*c0909341SAndroid Build Coastguard Worker rshrn v17.8b, v7.8h, #6 621*c0909341SAndroid Build Coastguard Worker st1 {v16.8b}, [x0], x1 622*c0909341SAndroid Build Coastguard Worker st1 {v17.8b}, [x8], x1 623*c0909341SAndroid Build Coastguard Worker b.gt 8b 624*c0909341SAndroid Build Coastguard Worker ret 625*c0909341SAndroid Build Coastguard Worker160: 626*c0909341SAndroid Build Coastguard Worker AARCH64_VALID_JUMP_TARGET 627*c0909341SAndroid Build Coastguard Worker16: 628*c0909341SAndroid Build Coastguard Worker ld2r {v0.16b, v1.16b}, [x5], #2 629*c0909341SAndroid Build Coastguard Worker ld1 {v2.16b, v3.16b}, [x2], #32 630*c0909341SAndroid Build Coastguard Worker ld1 {v5.16b}, [x0] 631*c0909341SAndroid Build Coastguard Worker sub v7.16b, v4.16b, v0.16b 632*c0909341SAndroid Build Coastguard Worker sub v16.16b, v4.16b, v1.16b 633*c0909341SAndroid Build Coastguard Worker ld1 {v6.16b}, [x8] 634*c0909341SAndroid Build Coastguard Worker subs w4, w4, #2 635*c0909341SAndroid Build Coastguard Worker umull v17.8h, v0.8b, v2.8b 636*c0909341SAndroid Build Coastguard Worker umlal v17.8h, v5.8b, v7.8b 637*c0909341SAndroid Build Coastguard Worker umull2 v18.8h, v0.16b, v2.16b 638*c0909341SAndroid Build Coastguard Worker umlal2 v18.8h, v5.16b, v7.16b 639*c0909341SAndroid Build Coastguard Worker umull v19.8h, v1.8b, v3.8b 640*c0909341SAndroid Build Coastguard Worker umlal v19.8h, v6.8b, v16.8b 641*c0909341SAndroid Build Coastguard Worker umull2 v20.8h, v1.16b, v3.16b 642*c0909341SAndroid Build Coastguard Worker umlal2 v20.8h, v6.16b, v16.16b 643*c0909341SAndroid Build Coastguard Worker rshrn v21.8b, v17.8h, #6 644*c0909341SAndroid Build Coastguard Worker rshrn2 v21.16b, v18.8h, #6 645*c0909341SAndroid Build Coastguard Worker rshrn v22.8b, v19.8h, #6 646*c0909341SAndroid Build Coastguard Worker rshrn2 v22.16b, v20.8h, #6 647*c0909341SAndroid Build Coastguard Worker st1 {v21.16b}, [x0], x1 648*c0909341SAndroid Build Coastguard Worker st1 {v22.16b}, [x8], x1 649*c0909341SAndroid Build Coastguard Worker b.gt 16b 650*c0909341SAndroid Build Coastguard Worker ret 651*c0909341SAndroid Build Coastguard Worker1280: 652*c0909341SAndroid Build Coastguard Worker640: 653*c0909341SAndroid Build Coastguard Worker320: 654*c0909341SAndroid Build Coastguard Worker AARCH64_VALID_JUMP_TARGET 655*c0909341SAndroid Build Coastguard Worker sub x1, x1, w3, uxtw 656*c0909341SAndroid Build Coastguard Worker add x7, x2, w3, uxtw 657*c0909341SAndroid Build Coastguard Worker321: 658*c0909341SAndroid Build Coastguard Worker ld2r {v0.16b, v1.16b}, [x5], #2 659*c0909341SAndroid Build Coastguard Worker mov w6, w3 660*c0909341SAndroid Build Coastguard Worker sub v20.16b, v4.16b, v0.16b 661*c0909341SAndroid Build Coastguard Worker sub v21.16b, v4.16b, v1.16b 662*c0909341SAndroid Build Coastguard Worker32: 663*c0909341SAndroid Build Coastguard Worker ld1 {v16.16b, v17.16b}, [x2], #32 664*c0909341SAndroid Build Coastguard Worker ld1 {v2.16b, v3.16b}, [x0] 665*c0909341SAndroid Build Coastguard Worker subs w6, w6, #32 666*c0909341SAndroid Build Coastguard Worker umull v23.8h, v0.8b, v16.8b 667*c0909341SAndroid Build Coastguard Worker umlal v23.8h, v2.8b, v20.8b 668*c0909341SAndroid Build Coastguard Worker ld1 {v18.16b, v19.16b}, [x7], #32 669*c0909341SAndroid Build Coastguard Worker umull2 v27.8h, v0.16b, v16.16b 670*c0909341SAndroid Build Coastguard Worker umlal2 v27.8h, v2.16b, v20.16b 671*c0909341SAndroid Build Coastguard Worker ld1 {v6.16b, v7.16b}, [x8] 672*c0909341SAndroid Build Coastguard Worker umull v24.8h, v0.8b, v17.8b 673*c0909341SAndroid Build Coastguard Worker umlal v24.8h, v3.8b, v20.8b 674*c0909341SAndroid Build Coastguard Worker umull2 v28.8h, v0.16b, v17.16b 675*c0909341SAndroid Build Coastguard Worker umlal2 v28.8h, v3.16b, v20.16b 676*c0909341SAndroid Build Coastguard Worker umull v25.8h, v1.8b, v18.8b 677*c0909341SAndroid Build Coastguard Worker umlal v25.8h, v6.8b, v21.8b 678*c0909341SAndroid Build Coastguard Worker umull2 v5.8h, v1.16b, v18.16b 679*c0909341SAndroid Build Coastguard Worker umlal2 v5.8h, v6.16b, v21.16b 680*c0909341SAndroid Build Coastguard Worker rshrn v29.8b, v23.8h, #6 681*c0909341SAndroid Build Coastguard Worker rshrn2 v29.16b, v27.8h, #6 682*c0909341SAndroid Build Coastguard Worker umull v26.8h, v1.8b, v19.8b 683*c0909341SAndroid Build Coastguard Worker umlal v26.8h, v7.8b, v21.8b 684*c0909341SAndroid Build Coastguard Worker umull2 v31.8h, v1.16b, v19.16b 685*c0909341SAndroid Build Coastguard Worker umlal2 v31.8h, v7.16b, v21.16b 686*c0909341SAndroid Build Coastguard Worker rshrn v30.8b, v24.8h, #6 687*c0909341SAndroid Build Coastguard Worker rshrn2 v30.16b, v28.8h, #6 688*c0909341SAndroid Build Coastguard Worker rshrn v23.8b, v25.8h, #6 689*c0909341SAndroid Build Coastguard Worker rshrn2 v23.16b, v5.8h, #6 690*c0909341SAndroid Build Coastguard Worker rshrn v24.8b, v26.8h, #6 691*c0909341SAndroid Build Coastguard Worker st1 {v29.16b, v30.16b}, [x0], #32 692*c0909341SAndroid Build Coastguard Worker rshrn2 v24.16b, v31.8h, #6 693*c0909341SAndroid Build Coastguard Worker st1 {v23.16b, v24.16b}, [x8], #32 694*c0909341SAndroid Build Coastguard Worker b.gt 32b 695*c0909341SAndroid Build Coastguard Worker subs w4, w4, #2 696*c0909341SAndroid Build Coastguard Worker add x0, x0, x1 697*c0909341SAndroid Build Coastguard Worker add x8, x8, x1 698*c0909341SAndroid Build Coastguard Worker add x2, x2, w3, uxtw 699*c0909341SAndroid Build Coastguard Worker add x7, x7, w3, uxtw 700*c0909341SAndroid Build Coastguard Worker b.gt 321b 701*c0909341SAndroid Build Coastguard Worker ret 702*c0909341SAndroid Build Coastguard Workerendfunc 703*c0909341SAndroid Build Coastguard Worker 704*c0909341SAndroid Build Coastguard Workerjumptable blend_h_tbl 705*c0909341SAndroid Build Coastguard Worker .word 1280b - blend_h_tbl 706*c0909341SAndroid Build Coastguard Worker .word 640b - blend_h_tbl 707*c0909341SAndroid Build Coastguard Worker .word 320b - blend_h_tbl 708*c0909341SAndroid Build Coastguard Worker .word 160b - blend_h_tbl 709*c0909341SAndroid Build Coastguard Worker .word 80b - blend_h_tbl 710*c0909341SAndroid Build Coastguard Worker .word 40b - blend_h_tbl 711*c0909341SAndroid Build Coastguard Worker .word 20b - blend_h_tbl 712*c0909341SAndroid Build Coastguard Workerendjumptable 713*c0909341SAndroid Build Coastguard Worker 714*c0909341SAndroid Build Coastguard Workerfunction blend_v_8bpc_neon, export=1 715*c0909341SAndroid Build Coastguard Worker movrel x6, blend_v_tbl 716*c0909341SAndroid Build Coastguard Worker movrel x5, X(obmc_masks) 717*c0909341SAndroid Build Coastguard Worker add x5, x5, w3, uxtw 718*c0909341SAndroid Build Coastguard Worker clz w3, w3 719*c0909341SAndroid Build Coastguard Worker movi v4.16b, #64 720*c0909341SAndroid Build Coastguard Worker add x8, x0, x1 721*c0909341SAndroid Build Coastguard Worker lsl x1, x1, #1 722*c0909341SAndroid Build Coastguard Worker sub w3, w3, #26 723*c0909341SAndroid Build Coastguard Worker ldrsw x3, [x6, x3, lsl #2] 724*c0909341SAndroid Build Coastguard Worker add x6, x6, x3 725*c0909341SAndroid Build Coastguard Worker br x6 726*c0909341SAndroid Build Coastguard Worker20: 727*c0909341SAndroid Build Coastguard Worker AARCH64_VALID_JUMP_TARGET 728*c0909341SAndroid Build Coastguard Worker ld1r {v0.8b}, [x5] 729*c0909341SAndroid Build Coastguard Worker sub v1.8b, v4.8b, v0.8b 730*c0909341SAndroid Build Coastguard Worker2: 731*c0909341SAndroid Build Coastguard Worker ldr h2, [x2], #2 732*c0909341SAndroid Build Coastguard Worker ldr b3, [x0] 733*c0909341SAndroid Build Coastguard Worker subs w4, w4, #2 734*c0909341SAndroid Build Coastguard Worker ld1 {v2.b}[1], [x2] 735*c0909341SAndroid Build Coastguard Worker ld1 {v3.b}[1], [x8] 736*c0909341SAndroid Build Coastguard Worker umull v5.8h, v2.8b, v0.8b 737*c0909341SAndroid Build Coastguard Worker umlal v5.8h, v3.8b, v1.8b 738*c0909341SAndroid Build Coastguard Worker rshrn v5.8b, v5.8h, #6 739*c0909341SAndroid Build Coastguard Worker add x2, x2, #2 740*c0909341SAndroid Build Coastguard Worker st1 {v5.b}[0], [x0], x1 741*c0909341SAndroid Build Coastguard Worker st1 {v5.b}[1], [x8], x1 742*c0909341SAndroid Build Coastguard Worker b.gt 2b 743*c0909341SAndroid Build Coastguard Worker ret 744*c0909341SAndroid Build Coastguard Worker40: 745*c0909341SAndroid Build Coastguard Worker AARCH64_VALID_JUMP_TARGET 746*c0909341SAndroid Build Coastguard Worker ld1r {v0.2s}, [x5] 747*c0909341SAndroid Build Coastguard Worker sub x1, x1, #2 748*c0909341SAndroid Build Coastguard Worker sub v1.8b, v4.8b, v0.8b 749*c0909341SAndroid Build Coastguard Worker4: 750*c0909341SAndroid Build Coastguard Worker ld1 {v2.8b}, [x2], #8 751*c0909341SAndroid Build Coastguard Worker ldr s3, [x0] 752*c0909341SAndroid Build Coastguard Worker ld1 {v3.s}[1], [x8] 753*c0909341SAndroid Build Coastguard Worker subs w4, w4, #2 754*c0909341SAndroid Build Coastguard Worker umull v5.8h, v2.8b, v0.8b 755*c0909341SAndroid Build Coastguard Worker umlal v5.8h, v3.8b, v1.8b 756*c0909341SAndroid Build Coastguard Worker rshrn v5.8b, v5.8h, #6 757*c0909341SAndroid Build Coastguard Worker str h5, [x0], #2 758*c0909341SAndroid Build Coastguard Worker st1 {v5.h}[2], [x8], #2 759*c0909341SAndroid Build Coastguard Worker st1 {v5.b}[2], [x0], x1 760*c0909341SAndroid Build Coastguard Worker st1 {v5.b}[6], [x8], x1 761*c0909341SAndroid Build Coastguard Worker b.gt 4b 762*c0909341SAndroid Build Coastguard Worker ret 763*c0909341SAndroid Build Coastguard Worker80: 764*c0909341SAndroid Build Coastguard Worker AARCH64_VALID_JUMP_TARGET 765*c0909341SAndroid Build Coastguard Worker ld1r {v0.2d}, [x5] 766*c0909341SAndroid Build Coastguard Worker sub x1, x1, #4 767*c0909341SAndroid Build Coastguard Worker sub v1.16b, v4.16b, v0.16b 768*c0909341SAndroid Build Coastguard Worker zip2 v16.2d, v1.2d, v1.2d 769*c0909341SAndroid Build Coastguard Worker8: 770*c0909341SAndroid Build Coastguard Worker ld1 {v2.16b}, [x2], #16 771*c0909341SAndroid Build Coastguard Worker ldr d3, [x0] 772*c0909341SAndroid Build Coastguard Worker ldr d4, [x8] 773*c0909341SAndroid Build Coastguard Worker subs w4, w4, #2 774*c0909341SAndroid Build Coastguard Worker umull v5.8h, v0.8b, v2.8b 775*c0909341SAndroid Build Coastguard Worker umlal v5.8h, v3.8b, v1.8b 776*c0909341SAndroid Build Coastguard Worker umull2 v6.8h, v0.16b, v2.16b 777*c0909341SAndroid Build Coastguard Worker umlal v6.8h, v4.8b, v16.8b 778*c0909341SAndroid Build Coastguard Worker rshrn v7.8b, v5.8h, #6 779*c0909341SAndroid Build Coastguard Worker rshrn v17.8b, v6.8h, #6 780*c0909341SAndroid Build Coastguard Worker str s7, [x0], #4 781*c0909341SAndroid Build Coastguard Worker str s17, [x8], #4 782*c0909341SAndroid Build Coastguard Worker st1 {v7.h}[2], [x0], x1 783*c0909341SAndroid Build Coastguard Worker st1 {v17.h}[2], [x8], x1 784*c0909341SAndroid Build Coastguard Worker b.gt 8b 785*c0909341SAndroid Build Coastguard Worker ret 786*c0909341SAndroid Build Coastguard Worker160: 787*c0909341SAndroid Build Coastguard Worker AARCH64_VALID_JUMP_TARGET 788*c0909341SAndroid Build Coastguard Worker ld1 {v0.16b}, [x5] 789*c0909341SAndroid Build Coastguard Worker sub x1, x1, #8 790*c0909341SAndroid Build Coastguard Worker sub v2.16b, v4.16b, v0.16b 791*c0909341SAndroid Build Coastguard Worker16: 792*c0909341SAndroid Build Coastguard Worker ld1 {v5.16b, v6.16b}, [x2], #32 793*c0909341SAndroid Build Coastguard Worker ld1 {v7.16b}, [x0] 794*c0909341SAndroid Build Coastguard Worker subs w4, w4, #2 795*c0909341SAndroid Build Coastguard Worker ld1 {v16.16b}, [x8] 796*c0909341SAndroid Build Coastguard Worker umull v17.8h, v5.8b, v0.8b 797*c0909341SAndroid Build Coastguard Worker umlal v17.8h, v7.8b, v2.8b 798*c0909341SAndroid Build Coastguard Worker umull2 v18.8h, v5.16b, v0.16b 799*c0909341SAndroid Build Coastguard Worker umlal2 v18.8h, v7.16b, v2.16b 800*c0909341SAndroid Build Coastguard Worker umull v20.8h, v6.8b, v0.8b 801*c0909341SAndroid Build Coastguard Worker umlal v20.8h, v16.8b, v2.8b 802*c0909341SAndroid Build Coastguard Worker umull2 v21.8h, v6.16b, v0.16b 803*c0909341SAndroid Build Coastguard Worker umlal2 v21.8h, v16.16b, v2.16b 804*c0909341SAndroid Build Coastguard Worker rshrn v19.8b, v17.8h, #6 805*c0909341SAndroid Build Coastguard Worker rshrn2 v19.16b, v18.8h, #6 806*c0909341SAndroid Build Coastguard Worker rshrn v22.8b, v20.8h, #6 807*c0909341SAndroid Build Coastguard Worker rshrn2 v22.16b, v21.8h, #6 808*c0909341SAndroid Build Coastguard Worker st1 {v19.8b}, [x0], #8 809*c0909341SAndroid Build Coastguard Worker st1 {v22.8b}, [x8], #8 810*c0909341SAndroid Build Coastguard Worker st1 {v19.s}[2], [x0], x1 811*c0909341SAndroid Build Coastguard Worker st1 {v22.s}[2], [x8], x1 812*c0909341SAndroid Build Coastguard Worker b.gt 16b 813*c0909341SAndroid Build Coastguard Worker ret 814*c0909341SAndroid Build Coastguard Worker320: 815*c0909341SAndroid Build Coastguard Worker AARCH64_VALID_JUMP_TARGET 816*c0909341SAndroid Build Coastguard Worker ld1 {v0.16b, v1.16b}, [x5] 817*c0909341SAndroid Build Coastguard Worker sub x1, x1, #16 818*c0909341SAndroid Build Coastguard Worker sub v2.16b, v4.16b, v0.16b 819*c0909341SAndroid Build Coastguard Worker sub v3.8b, v4.8b, v1.8b 820*c0909341SAndroid Build Coastguard Worker32: 821*c0909341SAndroid Build Coastguard Worker ld1 {v16.16b, v17.16b, v18.16b, v19.16b}, [x2], #64 822*c0909341SAndroid Build Coastguard Worker ld1 {v5.16b, v6.16b}, [x0] 823*c0909341SAndroid Build Coastguard Worker subs w4, w4, #2 824*c0909341SAndroid Build Coastguard Worker ld1 {v20.16b, v21.16b}, [x8] 825*c0909341SAndroid Build Coastguard Worker umull v22.8h, v16.8b, v0.8b 826*c0909341SAndroid Build Coastguard Worker umlal v22.8h, v5.8b, v2.8b 827*c0909341SAndroid Build Coastguard Worker umull2 v23.8h, v16.16b, v0.16b 828*c0909341SAndroid Build Coastguard Worker umlal2 v23.8h, v5.16b, v2.16b 829*c0909341SAndroid Build Coastguard Worker umull v28.8h, v17.8b, v1.8b 830*c0909341SAndroid Build Coastguard Worker umlal v28.8h, v6.8b, v3.8b 831*c0909341SAndroid Build Coastguard Worker umull v30.8h, v18.8b, v0.8b 832*c0909341SAndroid Build Coastguard Worker umlal v30.8h, v20.8b, v2.8b 833*c0909341SAndroid Build Coastguard Worker umull2 v31.8h, v18.16b, v0.16b 834*c0909341SAndroid Build Coastguard Worker umlal2 v31.8h, v20.16b, v2.16b 835*c0909341SAndroid Build Coastguard Worker umull v25.8h, v19.8b, v1.8b 836*c0909341SAndroid Build Coastguard Worker umlal v25.8h, v21.8b, v3.8b 837*c0909341SAndroid Build Coastguard Worker rshrn v24.8b, v22.8h, #6 838*c0909341SAndroid Build Coastguard Worker rshrn2 v24.16b, v23.8h, #6 839*c0909341SAndroid Build Coastguard Worker rshrn v28.8b, v28.8h, #6 840*c0909341SAndroid Build Coastguard Worker rshrn v30.8b, v30.8h, #6 841*c0909341SAndroid Build Coastguard Worker rshrn2 v30.16b, v31.8h, #6 842*c0909341SAndroid Build Coastguard Worker rshrn v27.8b, v25.8h, #6 843*c0909341SAndroid Build Coastguard Worker st1 {v24.16b}, [x0], #16 844*c0909341SAndroid Build Coastguard Worker st1 {v30.16b}, [x8], #16 845*c0909341SAndroid Build Coastguard Worker st1 {v28.8b}, [x0], x1 846*c0909341SAndroid Build Coastguard Worker st1 {v27.8b}, [x8], x1 847*c0909341SAndroid Build Coastguard Worker b.gt 32b 848*c0909341SAndroid Build Coastguard Worker ret 849*c0909341SAndroid Build Coastguard Workerendfunc 850*c0909341SAndroid Build Coastguard Worker 851*c0909341SAndroid Build Coastguard Workerjumptable blend_v_tbl 852*c0909341SAndroid Build Coastguard Worker .word 320b - blend_v_tbl 853*c0909341SAndroid Build Coastguard Worker .word 160b - blend_v_tbl 854*c0909341SAndroid Build Coastguard Worker .word 80b - blend_v_tbl 855*c0909341SAndroid Build Coastguard Worker .word 40b - blend_v_tbl 856*c0909341SAndroid Build Coastguard Worker .word 20b - blend_v_tbl 857*c0909341SAndroid Build Coastguard Workerendjumptable 858*c0909341SAndroid Build Coastguard Worker 859*c0909341SAndroid Build Coastguard Worker 860*c0909341SAndroid Build Coastguard Worker// This has got the same signature as the put_8tap functions, 861*c0909341SAndroid Build Coastguard Worker// and assumes that x8 is set to (clz(w)-24). 862*c0909341SAndroid Build Coastguard Workerfunction put_neon, export=1 863*c0909341SAndroid Build Coastguard Worker movrel x9, put_tbl 864*c0909341SAndroid Build Coastguard Worker ldrsw x8, [x9, x8, lsl #2] 865*c0909341SAndroid Build Coastguard Worker add x9, x9, x8 866*c0909341SAndroid Build Coastguard Worker br x9 867*c0909341SAndroid Build Coastguard Worker 868*c0909341SAndroid Build Coastguard Worker20: 869*c0909341SAndroid Build Coastguard Worker AARCH64_VALID_JUMP_TARGET 870*c0909341SAndroid Build Coastguard Worker2: 871*c0909341SAndroid Build Coastguard Worker ldrh w9, [x2] 872*c0909341SAndroid Build Coastguard Worker ldrh w10, [x2, x3] 873*c0909341SAndroid Build Coastguard Worker add x2, x2, x3, lsl #1 874*c0909341SAndroid Build Coastguard Worker subs w5, w5, #2 875*c0909341SAndroid Build Coastguard Worker strh w9, [x0] 876*c0909341SAndroid Build Coastguard Worker strh w10, [x0, x1] 877*c0909341SAndroid Build Coastguard Worker add x0, x0, x1, lsl #1 878*c0909341SAndroid Build Coastguard Worker b.gt 2b 879*c0909341SAndroid Build Coastguard Worker ret 880*c0909341SAndroid Build Coastguard Worker40: 881*c0909341SAndroid Build Coastguard Worker AARCH64_VALID_JUMP_TARGET 882*c0909341SAndroid Build Coastguard Worker4: 883*c0909341SAndroid Build Coastguard Worker ldr w9, [x2] 884*c0909341SAndroid Build Coastguard Worker ldr w10, [x2, x3] 885*c0909341SAndroid Build Coastguard Worker add x2, x2, x3, lsl #1 886*c0909341SAndroid Build Coastguard Worker subs w5, w5, #2 887*c0909341SAndroid Build Coastguard Worker str w9, [x0] 888*c0909341SAndroid Build Coastguard Worker str w10, [x0, x1] 889*c0909341SAndroid Build Coastguard Worker add x0, x0, x1, lsl #1 890*c0909341SAndroid Build Coastguard Worker b.gt 4b 891*c0909341SAndroid Build Coastguard Worker ret 892*c0909341SAndroid Build Coastguard Worker80: 893*c0909341SAndroid Build Coastguard Worker AARCH64_VALID_JUMP_TARGET 894*c0909341SAndroid Build Coastguard Worker8: 895*c0909341SAndroid Build Coastguard Worker ldr x9, [x2] 896*c0909341SAndroid Build Coastguard Worker ldr x10, [x2, x3] 897*c0909341SAndroid Build Coastguard Worker add x2, x2, x3, lsl #1 898*c0909341SAndroid Build Coastguard Worker subs w5, w5, #2 899*c0909341SAndroid Build Coastguard Worker str x9, [x0] 900*c0909341SAndroid Build Coastguard Worker str x10, [x0, x1] 901*c0909341SAndroid Build Coastguard Worker add x0, x0, x1, lsl #1 902*c0909341SAndroid Build Coastguard Worker b.gt 8b 903*c0909341SAndroid Build Coastguard Worker ret 904*c0909341SAndroid Build Coastguard Worker160: 905*c0909341SAndroid Build Coastguard Worker AARCH64_VALID_JUMP_TARGET 906*c0909341SAndroid Build Coastguard Worker16: 907*c0909341SAndroid Build Coastguard Worker ldr q0, [x2] 908*c0909341SAndroid Build Coastguard Worker ldr q1, [x2, x3] 909*c0909341SAndroid Build Coastguard Worker add x2, x2, x3, lsl #1 910*c0909341SAndroid Build Coastguard Worker subs w5, w5, #2 911*c0909341SAndroid Build Coastguard Worker str q0, [x0] 912*c0909341SAndroid Build Coastguard Worker str q1, [x0, x1] 913*c0909341SAndroid Build Coastguard Worker add x0, x0, x1, lsl #1 914*c0909341SAndroid Build Coastguard Worker b.gt 16b 915*c0909341SAndroid Build Coastguard Worker ret 916*c0909341SAndroid Build Coastguard Worker320: 917*c0909341SAndroid Build Coastguard Worker AARCH64_VALID_JUMP_TARGET 918*c0909341SAndroid Build Coastguard Worker32: 919*c0909341SAndroid Build Coastguard Worker ldp q0, q1, [x2] 920*c0909341SAndroid Build Coastguard Worker add x2, x2, x3 921*c0909341SAndroid Build Coastguard Worker stp q0, q1, [x0] 922*c0909341SAndroid Build Coastguard Worker add x0, x0, x1 923*c0909341SAndroid Build Coastguard Worker ldp q2, q3, [x2] 924*c0909341SAndroid Build Coastguard Worker add x2, x2, x3 925*c0909341SAndroid Build Coastguard Worker stp q2, q3, [x0] 926*c0909341SAndroid Build Coastguard Worker subs w5, w5, #2 927*c0909341SAndroid Build Coastguard Worker add x0, x0, x1 928*c0909341SAndroid Build Coastguard Worker b.gt 32b 929*c0909341SAndroid Build Coastguard Worker ret 930*c0909341SAndroid Build Coastguard Worker640: 931*c0909341SAndroid Build Coastguard Worker AARCH64_VALID_JUMP_TARGET 932*c0909341SAndroid Build Coastguard Worker64: 933*c0909341SAndroid Build Coastguard Worker ldp q0, q1, [x2] 934*c0909341SAndroid Build Coastguard Worker stp q0, q1, [x0] 935*c0909341SAndroid Build Coastguard Worker ldp q2, q3, [x2, #32] 936*c0909341SAndroid Build Coastguard Worker add x2, x2, x3 937*c0909341SAndroid Build Coastguard Worker stp q2, q3, [x0, #32] 938*c0909341SAndroid Build Coastguard Worker subs w5, w5, #1 939*c0909341SAndroid Build Coastguard Worker add x0, x0, x1 940*c0909341SAndroid Build Coastguard Worker b.gt 64b 941*c0909341SAndroid Build Coastguard Worker ret 942*c0909341SAndroid Build Coastguard Worker1280: 943*c0909341SAndroid Build Coastguard Worker AARCH64_VALID_JUMP_TARGET 944*c0909341SAndroid Build Coastguard Worker128: 945*c0909341SAndroid Build Coastguard Worker ldp q0, q1, [x2] 946*c0909341SAndroid Build Coastguard Worker stp q0, q1, [x0] 947*c0909341SAndroid Build Coastguard Worker ldp q2, q3, [x2, #32] 948*c0909341SAndroid Build Coastguard Worker stp q2, q3, [x0, #32] 949*c0909341SAndroid Build Coastguard Worker ldp q4, q5, [x2, #64] 950*c0909341SAndroid Build Coastguard Worker stp q4, q5, [x0, #64] 951*c0909341SAndroid Build Coastguard Worker ldp q6, q7, [x2, #96] 952*c0909341SAndroid Build Coastguard Worker add x2, x2, x3 953*c0909341SAndroid Build Coastguard Worker stp q6, q7, [x0, #96] 954*c0909341SAndroid Build Coastguard Worker subs w5, w5, #1 955*c0909341SAndroid Build Coastguard Worker add x0, x0, x1 956*c0909341SAndroid Build Coastguard Worker b.gt 128b 957*c0909341SAndroid Build Coastguard Worker ret 958*c0909341SAndroid Build Coastguard Workerendfunc 959*c0909341SAndroid Build Coastguard Worker 960*c0909341SAndroid Build Coastguard Workerjumptable put_tbl 961*c0909341SAndroid Build Coastguard Worker .word 1280b - put_tbl 962*c0909341SAndroid Build Coastguard Worker .word 640b - put_tbl 963*c0909341SAndroid Build Coastguard Worker .word 320b - put_tbl 964*c0909341SAndroid Build Coastguard Worker .word 160b - put_tbl 965*c0909341SAndroid Build Coastguard Worker .word 80b - put_tbl 966*c0909341SAndroid Build Coastguard Worker .word 40b - put_tbl 967*c0909341SAndroid Build Coastguard Worker .word 20b - put_tbl 968*c0909341SAndroid Build Coastguard Workerendjumptable 969*c0909341SAndroid Build Coastguard Worker 970*c0909341SAndroid Build Coastguard Worker 971*c0909341SAndroid Build Coastguard Worker// This has got the same signature as the prep_8tap functions, 972*c0909341SAndroid Build Coastguard Worker// and assumes that x8 is set to (clz(w)-24), and x7 to w*2. 973*c0909341SAndroid Build Coastguard Workerfunction prep_neon, export=1 974*c0909341SAndroid Build Coastguard Worker movrel x9, prep_tbl 975*c0909341SAndroid Build Coastguard Worker ldrsw x8, [x9, x8, lsl #2] 976*c0909341SAndroid Build Coastguard Worker movi v24.16b, #16 977*c0909341SAndroid Build Coastguard Worker add x9, x9, x8 978*c0909341SAndroid Build Coastguard Worker br x9 979*c0909341SAndroid Build Coastguard Worker 980*c0909341SAndroid Build Coastguard Worker40: 981*c0909341SAndroid Build Coastguard Worker AARCH64_VALID_JUMP_TARGET 982*c0909341SAndroid Build Coastguard Worker4: 983*c0909341SAndroid Build Coastguard Worker ldr s0, [x1] 984*c0909341SAndroid Build Coastguard Worker ldr s2, [x1, x2] 985*c0909341SAndroid Build Coastguard Worker add x1, x1, x2, lsl #1 986*c0909341SAndroid Build Coastguard Worker ldr s1, [x1] 987*c0909341SAndroid Build Coastguard Worker ldr s3, [x1, x2] 988*c0909341SAndroid Build Coastguard Worker add x1, x1, x2, lsl #1 989*c0909341SAndroid Build Coastguard Worker mov v0.s[1], v2.s[0] 990*c0909341SAndroid Build Coastguard Worker mov v1.s[1], v3.s[0] 991*c0909341SAndroid Build Coastguard Worker ushll v0.8h, v0.8b, #4 992*c0909341SAndroid Build Coastguard Worker ushll v1.8h, v1.8b, #4 993*c0909341SAndroid Build Coastguard Worker subs w4, w4, #4 994*c0909341SAndroid Build Coastguard Worker stp q0, q1, [x0], #32 995*c0909341SAndroid Build Coastguard Worker b.gt 4b 996*c0909341SAndroid Build Coastguard Worker ret 997*c0909341SAndroid Build Coastguard Worker80: 998*c0909341SAndroid Build Coastguard Worker AARCH64_VALID_JUMP_TARGET 999*c0909341SAndroid Build Coastguard Worker8: 1000*c0909341SAndroid Build Coastguard Worker ldr d0, [x1] 1001*c0909341SAndroid Build Coastguard Worker ldr d1, [x1, x2] 1002*c0909341SAndroid Build Coastguard Worker add x1, x1, x2, lsl #1 1003*c0909341SAndroid Build Coastguard Worker ldr d2, [x1] 1004*c0909341SAndroid Build Coastguard Worker ldr d3, [x1, x2] 1005*c0909341SAndroid Build Coastguard Worker add x1, x1, x2, lsl #1 1006*c0909341SAndroid Build Coastguard Worker ushll v0.8h, v0.8b, #4 1007*c0909341SAndroid Build Coastguard Worker ushll v1.8h, v1.8b, #4 1008*c0909341SAndroid Build Coastguard Worker umull v2.8h, v2.8b, v24.8b 1009*c0909341SAndroid Build Coastguard Worker umull v3.8h, v3.8b, v24.8b 1010*c0909341SAndroid Build Coastguard Worker subs w4, w4, #4 1011*c0909341SAndroid Build Coastguard Worker stp q0, q1, [x0] 1012*c0909341SAndroid Build Coastguard Worker stp q2, q3, [x0, #32] 1013*c0909341SAndroid Build Coastguard Worker add x0, x0, #64 1014*c0909341SAndroid Build Coastguard Worker b.gt 8b 1015*c0909341SAndroid Build Coastguard Worker ret 1016*c0909341SAndroid Build Coastguard Worker160: 1017*c0909341SAndroid Build Coastguard Worker AARCH64_VALID_JUMP_TARGET 1018*c0909341SAndroid Build Coastguard Worker16: 1019*c0909341SAndroid Build Coastguard Worker ldr q1, [x1] 1020*c0909341SAndroid Build Coastguard Worker ldr q3, [x1, x2] 1021*c0909341SAndroid Build Coastguard Worker add x1, x1, x2, lsl #1 1022*c0909341SAndroid Build Coastguard Worker ushll v0.8h, v1.8b, #4 1023*c0909341SAndroid Build Coastguard Worker ushll2 v1.8h, v1.16b, #4 1024*c0909341SAndroid Build Coastguard Worker ldr q5, [x1] 1025*c0909341SAndroid Build Coastguard Worker ldr q7, [x1, x2] 1026*c0909341SAndroid Build Coastguard Worker add x1, x1, x2, lsl #1 1027*c0909341SAndroid Build Coastguard Worker umull v2.8h, v3.8b, v24.8b 1028*c0909341SAndroid Build Coastguard Worker umull2 v3.8h, v3.16b, v24.16b 1029*c0909341SAndroid Build Coastguard Worker ushll v4.8h, v5.8b, #4 1030*c0909341SAndroid Build Coastguard Worker ushll2 v5.8h, v5.16b, #4 1031*c0909341SAndroid Build Coastguard Worker umull v6.8h, v7.8b, v24.8b 1032*c0909341SAndroid Build Coastguard Worker umull2 v7.8h, v7.16b, v24.16b 1033*c0909341SAndroid Build Coastguard Worker subs w4, w4, #4 1034*c0909341SAndroid Build Coastguard Worker stp q0, q1, [x0] 1035*c0909341SAndroid Build Coastguard Worker stp q2, q3, [x0, #32] 1036*c0909341SAndroid Build Coastguard Worker stp q4, q5, [x0, #64] 1037*c0909341SAndroid Build Coastguard Worker stp q6, q7, [x0, #96] 1038*c0909341SAndroid Build Coastguard Worker add x0, x0, #128 1039*c0909341SAndroid Build Coastguard Worker b.gt 16b 1040*c0909341SAndroid Build Coastguard Worker ret 1041*c0909341SAndroid Build Coastguard Worker320: 1042*c0909341SAndroid Build Coastguard Worker AARCH64_VALID_JUMP_TARGET 1043*c0909341SAndroid Build Coastguard Worker32: 1044*c0909341SAndroid Build Coastguard Worker ldp q4, q5, [x1] 1045*c0909341SAndroid Build Coastguard Worker add x1, x1, x2 1046*c0909341SAndroid Build Coastguard Worker ldp q6, q7, [x1] 1047*c0909341SAndroid Build Coastguard Worker add x1, x1, x2 1048*c0909341SAndroid Build Coastguard Worker ushll v0.8h, v4.8b, #4 1049*c0909341SAndroid Build Coastguard Worker ushll2 v1.8h, v4.16b, #4 1050*c0909341SAndroid Build Coastguard Worker umull v2.8h, v5.8b, v24.8b 1051*c0909341SAndroid Build Coastguard Worker umull2 v3.8h, v5.16b, v24.16b 1052*c0909341SAndroid Build Coastguard Worker ushll v4.8h, v6.8b, #4 1053*c0909341SAndroid Build Coastguard Worker ushll2 v5.8h, v6.16b, #4 1054*c0909341SAndroid Build Coastguard Worker umull v6.8h, v7.8b, v24.8b 1055*c0909341SAndroid Build Coastguard Worker umull2 v7.8h, v7.16b, v24.16b 1056*c0909341SAndroid Build Coastguard Worker subs w4, w4, #2 1057*c0909341SAndroid Build Coastguard Worker stp q0, q1, [x0] 1058*c0909341SAndroid Build Coastguard Worker stp q2, q3, [x0, #32] 1059*c0909341SAndroid Build Coastguard Worker stp q4, q5, [x0, #64] 1060*c0909341SAndroid Build Coastguard Worker stp q6, q7, [x0, #96] 1061*c0909341SAndroid Build Coastguard Worker add x0, x0, #128 1062*c0909341SAndroid Build Coastguard Worker b.gt 32b 1063*c0909341SAndroid Build Coastguard Worker ret 1064*c0909341SAndroid Build Coastguard Worker640: 1065*c0909341SAndroid Build Coastguard Worker AARCH64_VALID_JUMP_TARGET 1066*c0909341SAndroid Build Coastguard Worker64: 1067*c0909341SAndroid Build Coastguard Worker ldp q4, q5, [x1] 1068*c0909341SAndroid Build Coastguard Worker ldp q6, q7, [x1, #32] 1069*c0909341SAndroid Build Coastguard Worker add x1, x1, x2 1070*c0909341SAndroid Build Coastguard Worker ushll v0.8h, v4.8b, #4 1071*c0909341SAndroid Build Coastguard Worker ushll2 v1.8h, v4.16b, #4 1072*c0909341SAndroid Build Coastguard Worker umull v2.8h, v5.8b, v24.8b 1073*c0909341SAndroid Build Coastguard Worker umull2 v3.8h, v5.16b, v24.16b 1074*c0909341SAndroid Build Coastguard Worker ushll v4.8h, v6.8b, #4 1075*c0909341SAndroid Build Coastguard Worker ushll2 v5.8h, v6.16b, #4 1076*c0909341SAndroid Build Coastguard Worker umull v6.8h, v7.8b, v24.8b 1077*c0909341SAndroid Build Coastguard Worker umull2 v7.8h, v7.16b, v24.16b 1078*c0909341SAndroid Build Coastguard Worker subs w4, w4, #1 1079*c0909341SAndroid Build Coastguard Worker stp q0, q1, [x0] 1080*c0909341SAndroid Build Coastguard Worker stp q2, q3, [x0, #32] 1081*c0909341SAndroid Build Coastguard Worker stp q4, q5, [x0, #64] 1082*c0909341SAndroid Build Coastguard Worker stp q6, q7, [x0, #96] 1083*c0909341SAndroid Build Coastguard Worker add x0, x0, #128 1084*c0909341SAndroid Build Coastguard Worker b.gt 64b 1085*c0909341SAndroid Build Coastguard Worker ret 1086*c0909341SAndroid Build Coastguard Worker1280: 1087*c0909341SAndroid Build Coastguard Worker AARCH64_VALID_JUMP_TARGET 1088*c0909341SAndroid Build Coastguard Worker128: 1089*c0909341SAndroid Build Coastguard Worker ldp q28, q29, [x1] 1090*c0909341SAndroid Build Coastguard Worker ldp q30, q31, [x1, #32] 1091*c0909341SAndroid Build Coastguard Worker ushll v16.8h, v28.8b, #4 1092*c0909341SAndroid Build Coastguard Worker ushll2 v17.8h, v28.16b, #4 1093*c0909341SAndroid Build Coastguard Worker umull v18.8h, v29.8b, v24.8b 1094*c0909341SAndroid Build Coastguard Worker umull2 v19.8h, v29.16b, v24.16b 1095*c0909341SAndroid Build Coastguard Worker ushll v20.8h, v30.8b, #4 1096*c0909341SAndroid Build Coastguard Worker ushll2 v21.8h, v30.16b, #4 1097*c0909341SAndroid Build Coastguard Worker umull v22.8h, v31.8b, v24.8b 1098*c0909341SAndroid Build Coastguard Worker umull2 v23.8h, v31.16b, v24.16b 1099*c0909341SAndroid Build Coastguard Worker ldp q28, q29, [x1, #64] 1100*c0909341SAndroid Build Coastguard Worker ldp q30, q31, [x1, #96] 1101*c0909341SAndroid Build Coastguard Worker add x1, x1, x2 1102*c0909341SAndroid Build Coastguard Worker stp q16, q17, [x0] 1103*c0909341SAndroid Build Coastguard Worker stp q18, q19, [x0, #32] 1104*c0909341SAndroid Build Coastguard Worker stp q20, q21, [x0, #64] 1105*c0909341SAndroid Build Coastguard Worker stp q22, q23, [x0, #96] 1106*c0909341SAndroid Build Coastguard Worker ushll v16.8h, v28.8b, #4 1107*c0909341SAndroid Build Coastguard Worker ushll2 v17.8h, v28.16b, #4 1108*c0909341SAndroid Build Coastguard Worker umull v18.8h, v29.8b, v24.8b 1109*c0909341SAndroid Build Coastguard Worker umull2 v19.8h, v29.16b, v24.16b 1110*c0909341SAndroid Build Coastguard Worker ushll v20.8h, v30.8b, #4 1111*c0909341SAndroid Build Coastguard Worker ushll2 v21.8h, v30.16b, #4 1112*c0909341SAndroid Build Coastguard Worker umull v22.8h, v31.8b, v24.8b 1113*c0909341SAndroid Build Coastguard Worker umull2 v23.8h, v31.16b, v24.16b 1114*c0909341SAndroid Build Coastguard Worker subs w4, w4, #1 1115*c0909341SAndroid Build Coastguard Worker stp q16, q17, [x0, #128] 1116*c0909341SAndroid Build Coastguard Worker stp q18, q19, [x0, #160] 1117*c0909341SAndroid Build Coastguard Worker stp q20, q21, [x0, #192] 1118*c0909341SAndroid Build Coastguard Worker stp q22, q23, [x0, #224] 1119*c0909341SAndroid Build Coastguard Worker add x0, x0, #256 1120*c0909341SAndroid Build Coastguard Worker b.gt 128b 1121*c0909341SAndroid Build Coastguard Worker ret 1122*c0909341SAndroid Build Coastguard Workerendfunc 1123*c0909341SAndroid Build Coastguard Worker 1124*c0909341SAndroid Build Coastguard Workerjumptable prep_tbl 1125*c0909341SAndroid Build Coastguard Worker .word 1280b - prep_tbl 1126*c0909341SAndroid Build Coastguard Worker .word 640b - prep_tbl 1127*c0909341SAndroid Build Coastguard Worker .word 320b - prep_tbl 1128*c0909341SAndroid Build Coastguard Worker .word 160b - prep_tbl 1129*c0909341SAndroid Build Coastguard Worker .word 80b - prep_tbl 1130*c0909341SAndroid Build Coastguard Worker .word 40b - prep_tbl 1131*c0909341SAndroid Build Coastguard Workerendjumptable 1132*c0909341SAndroid Build Coastguard Worker 1133*c0909341SAndroid Build Coastguard Worker 1134*c0909341SAndroid Build Coastguard Worker.macro load_slice s0, s1, strd, wd, d0, d1, d2, d3, d4, d5, d6 1135*c0909341SAndroid Build Coastguard Worker ld1 {\d0\wd}[0], [\s0], \strd 1136*c0909341SAndroid Build Coastguard Worker ld1 {\d1\wd}[0], [\s1], \strd 1137*c0909341SAndroid Build Coastguard Worker.ifnb \d2 1138*c0909341SAndroid Build Coastguard Worker ld1 {\d2\wd}[0], [\s0], \strd 1139*c0909341SAndroid Build Coastguard Worker ld1 {\d3\wd}[0], [\s1], \strd 1140*c0909341SAndroid Build Coastguard Worker.endif 1141*c0909341SAndroid Build Coastguard Worker.ifnb \d4 1142*c0909341SAndroid Build Coastguard Worker ld1 {\d4\wd}[0], [\s0], \strd 1143*c0909341SAndroid Build Coastguard Worker.endif 1144*c0909341SAndroid Build Coastguard Worker.ifnb \d5 1145*c0909341SAndroid Build Coastguard Worker ld1 {\d5\wd}[0], [\s1], \strd 1146*c0909341SAndroid Build Coastguard Worker.endif 1147*c0909341SAndroid Build Coastguard Worker.ifnb \d6 1148*c0909341SAndroid Build Coastguard Worker ld1 {\d6\wd}[0], [\s0], \strd 1149*c0909341SAndroid Build Coastguard Worker.endif 1150*c0909341SAndroid Build Coastguard Worker.endm 1151*c0909341SAndroid Build Coastguard Worker.macro load_reg s0, s1, strd, wd, d0, d1, d2, d3, d4, d5, d6 1152*c0909341SAndroid Build Coastguard Worker ld1 {\d0\wd}, [\s0], \strd 1153*c0909341SAndroid Build Coastguard Worker ld1 {\d1\wd}, [\s1], \strd 1154*c0909341SAndroid Build Coastguard Worker.ifnb \d2 1155*c0909341SAndroid Build Coastguard Worker ld1 {\d2\wd}, [\s0], \strd 1156*c0909341SAndroid Build Coastguard Worker ld1 {\d3\wd}, [\s1], \strd 1157*c0909341SAndroid Build Coastguard Worker.endif 1158*c0909341SAndroid Build Coastguard Worker.ifnb \d4 1159*c0909341SAndroid Build Coastguard Worker ld1 {\d4\wd}, [\s0], \strd 1160*c0909341SAndroid Build Coastguard Worker.endif 1161*c0909341SAndroid Build Coastguard Worker.ifnb \d5 1162*c0909341SAndroid Build Coastguard Worker ld1 {\d5\wd}, [\s1], \strd 1163*c0909341SAndroid Build Coastguard Worker.endif 1164*c0909341SAndroid Build Coastguard Worker.ifnb \d6 1165*c0909341SAndroid Build Coastguard Worker ld1 {\d6\wd}, [\s0], \strd 1166*c0909341SAndroid Build Coastguard Worker.endif 1167*c0909341SAndroid Build Coastguard Worker.endm 1168*c0909341SAndroid Build Coastguard Worker.macro load_h s0, s1, strd, d0, d1, d2, d3, d4, d5, d6 1169*c0909341SAndroid Build Coastguard Worker load_slice \s0, \s1, \strd, .h, \d0, \d1, \d2, \d3, \d4, \d5, \d6 1170*c0909341SAndroid Build Coastguard Worker.endm 1171*c0909341SAndroid Build Coastguard Worker.macro load_s s0, s1, strd, d0, d1, d2, d3, d4, d5, d6 1172*c0909341SAndroid Build Coastguard Worker load_slice \s0, \s1, \strd, .s, \d0, \d1, \d2, \d3, \d4, \d5, \d6 1173*c0909341SAndroid Build Coastguard Worker.endm 1174*c0909341SAndroid Build Coastguard Worker.macro load_8b s0, s1, strd, d0, d1, d2, d3, d4, d5, d6 1175*c0909341SAndroid Build Coastguard Worker load_reg \s0, \s1, \strd, .8b, \d0, \d1, \d2, \d3, \d4, \d5, \d6 1176*c0909341SAndroid Build Coastguard Worker.endm 1177*c0909341SAndroid Build Coastguard Worker.macro load_16b s0, s1, strd, d0, d1, d2, d3, d4, d5, d6 1178*c0909341SAndroid Build Coastguard Worker load_reg \s0, \s1, \strd, .16b, \d0, \d1, \d2, \d3, \d4, \d5, \d6 1179*c0909341SAndroid Build Coastguard Worker.endm 1180*c0909341SAndroid Build Coastguard Worker.macro interleave_1 wd, r0, r1, r2, r3, r4 1181*c0909341SAndroid Build Coastguard Worker trn1 \r0\wd, \r0\wd, \r1\wd 1182*c0909341SAndroid Build Coastguard Worker trn1 \r1\wd, \r1\wd, \r2\wd 1183*c0909341SAndroid Build Coastguard Worker.ifnb \r3 1184*c0909341SAndroid Build Coastguard Worker trn1 \r2\wd, \r2\wd, \r3\wd 1185*c0909341SAndroid Build Coastguard Worker trn1 \r3\wd, \r3\wd, \r4\wd 1186*c0909341SAndroid Build Coastguard Worker.endif 1187*c0909341SAndroid Build Coastguard Worker.endm 1188*c0909341SAndroid Build Coastguard Worker.macro interleave_1_h r0, r1, r2, r3, r4 1189*c0909341SAndroid Build Coastguard Worker interleave_1 .4h, \r0, \r1, \r2, \r3, \r4 1190*c0909341SAndroid Build Coastguard Worker.endm 1191*c0909341SAndroid Build Coastguard Worker.macro interleave_1_s r0, r1, r2, r3, r4 1192*c0909341SAndroid Build Coastguard Worker interleave_1 .2s, \r0, \r1, \r2, \r3, \r4 1193*c0909341SAndroid Build Coastguard Worker.endm 1194*c0909341SAndroid Build Coastguard Worker.macro interleave_2 wd, r0, r1, r2, r3, r4, r5 1195*c0909341SAndroid Build Coastguard Worker trn1 \r0\wd, \r0\wd, \r2\wd 1196*c0909341SAndroid Build Coastguard Worker trn1 \r1\wd, \r1\wd, \r3\wd 1197*c0909341SAndroid Build Coastguard Worker trn1 \r2\wd, \r2\wd, \r4\wd 1198*c0909341SAndroid Build Coastguard Worker trn1 \r3\wd, \r3\wd, \r5\wd 1199*c0909341SAndroid Build Coastguard Worker.endm 1200*c0909341SAndroid Build Coastguard Worker.macro interleave_2_s r0, r1, r2, r3, r4, r5 1201*c0909341SAndroid Build Coastguard Worker interleave_2 .2s, \r0, \r1, \r2, \r3, \r4, \r5 1202*c0909341SAndroid Build Coastguard Worker.endm 1203*c0909341SAndroid Build Coastguard Worker.macro uxtl_b r0, r1, r2, r3, r4, r5, r6 1204*c0909341SAndroid Build Coastguard Worker uxtl \r0\().8h, \r0\().8b 1205*c0909341SAndroid Build Coastguard Worker uxtl \r1\().8h, \r1\().8b 1206*c0909341SAndroid Build Coastguard Worker.ifnb \r2 1207*c0909341SAndroid Build Coastguard Worker uxtl \r2\().8h, \r2\().8b 1208*c0909341SAndroid Build Coastguard Worker uxtl \r3\().8h, \r3\().8b 1209*c0909341SAndroid Build Coastguard Worker.endif 1210*c0909341SAndroid Build Coastguard Worker.ifnb \r4 1211*c0909341SAndroid Build Coastguard Worker uxtl \r4\().8h, \r4\().8b 1212*c0909341SAndroid Build Coastguard Worker.endif 1213*c0909341SAndroid Build Coastguard Worker.ifnb \r5 1214*c0909341SAndroid Build Coastguard Worker uxtl \r5\().8h, \r5\().8b 1215*c0909341SAndroid Build Coastguard Worker.endif 1216*c0909341SAndroid Build Coastguard Worker.ifnb \r6 1217*c0909341SAndroid Build Coastguard Worker uxtl \r6\().8h, \r6\().8b 1218*c0909341SAndroid Build Coastguard Worker.endif 1219*c0909341SAndroid Build Coastguard Worker.endm 1220*c0909341SAndroid Build Coastguard Worker.macro mul_mla_4tap d, s0, s1, s2, s3, wd 1221*c0909341SAndroid Build Coastguard Worker mul \d\wd, \s0\wd, v0.h[0] 1222*c0909341SAndroid Build Coastguard Worker mla \d\wd, \s1\wd, v0.h[1] 1223*c0909341SAndroid Build Coastguard Worker mla \d\wd, \s2\wd, v0.h[2] 1224*c0909341SAndroid Build Coastguard Worker mla \d\wd, \s3\wd, v0.h[3] 1225*c0909341SAndroid Build Coastguard Worker.endm 1226*c0909341SAndroid Build Coastguard Worker// Interleaving the mul/mla chains actually hurts performance 1227*c0909341SAndroid Build Coastguard Worker// significantly on Cortex A53, thus keeping mul/mla tightly 1228*c0909341SAndroid Build Coastguard Worker// chained like this. 1229*c0909341SAndroid Build Coastguard Worker.macro mul_mla_6tap_0_4h d0, s0, s1, s2, s3, s4, s5, s6, s7 1230*c0909341SAndroid Build Coastguard Worker mul \d0\().4h, \s1\().4h, v0.h[1] 1231*c0909341SAndroid Build Coastguard Worker mla \d0\().4h, \s2\().4h, v0.h[2] 1232*c0909341SAndroid Build Coastguard Worker mla \d0\().4h, \s3\().4h, v0.h[3] 1233*c0909341SAndroid Build Coastguard Worker mla \d0\().4h, \s4\().4h, v0.h[4] 1234*c0909341SAndroid Build Coastguard Worker mla \d0\().4h, \s5\().4h, v0.h[5] 1235*c0909341SAndroid Build Coastguard Worker mla \d0\().4h, \s6\().4h, v0.h[6] 1236*c0909341SAndroid Build Coastguard Worker.endm 1237*c0909341SAndroid Build Coastguard Worker.macro mul_mla_6tap_0 d0, s0, s1, s2, s3, s4, s5, s6, s7 1238*c0909341SAndroid Build Coastguard Worker mul \d0\().8h, \s1\().8h, v0.h[1] 1239*c0909341SAndroid Build Coastguard Worker mla \d0\().8h, \s2\().8h, v0.h[2] 1240*c0909341SAndroid Build Coastguard Worker mla \d0\().8h, \s3\().8h, v0.h[3] 1241*c0909341SAndroid Build Coastguard Worker mla \d0\().8h, \s4\().8h, v0.h[4] 1242*c0909341SAndroid Build Coastguard Worker mla \d0\().8h, \s5\().8h, v0.h[5] 1243*c0909341SAndroid Build Coastguard Worker mla \d0\().8h, \s6\().8h, v0.h[6] 1244*c0909341SAndroid Build Coastguard Worker.endm 1245*c0909341SAndroid Build Coastguard Worker.macro mul_mla_6tap_1 d0, d1, s0, s1, s2, s3, s4, s5, s6, s7, s8 1246*c0909341SAndroid Build Coastguard Worker mul \d0\().8h, \s1\().8h, v0.h[1] 1247*c0909341SAndroid Build Coastguard Worker mla \d0\().8h, \s2\().8h, v0.h[2] 1248*c0909341SAndroid Build Coastguard Worker mla \d0\().8h, \s3\().8h, v0.h[3] 1249*c0909341SAndroid Build Coastguard Worker mla \d0\().8h, \s4\().8h, v0.h[4] 1250*c0909341SAndroid Build Coastguard Worker mla \d0\().8h, \s5\().8h, v0.h[5] 1251*c0909341SAndroid Build Coastguard Worker mla \d0\().8h, \s6\().8h, v0.h[6] 1252*c0909341SAndroid Build Coastguard Worker mul \d1\().8h, \s2\().8h, v0.h[1] 1253*c0909341SAndroid Build Coastguard Worker mla \d1\().8h, \s3\().8h, v0.h[2] 1254*c0909341SAndroid Build Coastguard Worker mla \d1\().8h, \s4\().8h, v0.h[3] 1255*c0909341SAndroid Build Coastguard Worker mla \d1\().8h, \s5\().8h, v0.h[4] 1256*c0909341SAndroid Build Coastguard Worker mla \d1\().8h, \s6\().8h, v0.h[5] 1257*c0909341SAndroid Build Coastguard Worker mla \d1\().8h, \s7\().8h, v0.h[6] 1258*c0909341SAndroid Build Coastguard Worker.endm 1259*c0909341SAndroid Build Coastguard Worker.macro mul_mla_6tap_2 d0, d1, s0, s1, s2, s3, s4, s5, s6, s7, s8, s9 1260*c0909341SAndroid Build Coastguard Worker mul \d0\().8h, \s1\().8h, v0.h[1] 1261*c0909341SAndroid Build Coastguard Worker mla \d0\().8h, \s2\().8h, v0.h[2] 1262*c0909341SAndroid Build Coastguard Worker mla \d0\().8h, \s3\().8h, v0.h[3] 1263*c0909341SAndroid Build Coastguard Worker mla \d0\().8h, \s4\().8h, v0.h[4] 1264*c0909341SAndroid Build Coastguard Worker mla \d0\().8h, \s5\().8h, v0.h[5] 1265*c0909341SAndroid Build Coastguard Worker mla \d0\().8h, \s6\().8h, v0.h[6] 1266*c0909341SAndroid Build Coastguard Worker mul \d1\().8h, \s3\().8h, v0.h[1] 1267*c0909341SAndroid Build Coastguard Worker mla \d1\().8h, \s4\().8h, v0.h[2] 1268*c0909341SAndroid Build Coastguard Worker mla \d1\().8h, \s5\().8h, v0.h[3] 1269*c0909341SAndroid Build Coastguard Worker mla \d1\().8h, \s6\().8h, v0.h[4] 1270*c0909341SAndroid Build Coastguard Worker mla \d1\().8h, \s7\().8h, v0.h[5] 1271*c0909341SAndroid Build Coastguard Worker mla \d1\().8h, \s8\().8h, v0.h[6] 1272*c0909341SAndroid Build Coastguard Worker.endm 1273*c0909341SAndroid Build Coastguard Worker.macro mul_mla_8tap_0_4h d0, s0, s1, s2, s3, s4, s5, s6, s7 1274*c0909341SAndroid Build Coastguard Worker mul \d0\().4h, \s0\().4h, v0.h[0] 1275*c0909341SAndroid Build Coastguard Worker mla \d0\().4h, \s1\().4h, v0.h[1] 1276*c0909341SAndroid Build Coastguard Worker mla \d0\().4h, \s2\().4h, v0.h[2] 1277*c0909341SAndroid Build Coastguard Worker mla \d0\().4h, \s3\().4h, v0.h[3] 1278*c0909341SAndroid Build Coastguard Worker mla \d0\().4h, \s4\().4h, v0.h[4] 1279*c0909341SAndroid Build Coastguard Worker mla \d0\().4h, \s5\().4h, v0.h[5] 1280*c0909341SAndroid Build Coastguard Worker mla \d0\().4h, \s6\().4h, v0.h[6] 1281*c0909341SAndroid Build Coastguard Worker mla \d0\().4h, \s7\().4h, v0.h[7] 1282*c0909341SAndroid Build Coastguard Worker.endm 1283*c0909341SAndroid Build Coastguard Worker.macro mul_mla_8tap_0 d0, s0, s1, s2, s3, s4, s5, s6, s7 1284*c0909341SAndroid Build Coastguard Worker mul \d0\().8h, \s0\().8h, v0.h[0] 1285*c0909341SAndroid Build Coastguard Worker mla \d0\().8h, \s1\().8h, v0.h[1] 1286*c0909341SAndroid Build Coastguard Worker mla \d0\().8h, \s2\().8h, v0.h[2] 1287*c0909341SAndroid Build Coastguard Worker mla \d0\().8h, \s3\().8h, v0.h[3] 1288*c0909341SAndroid Build Coastguard Worker mla \d0\().8h, \s4\().8h, v0.h[4] 1289*c0909341SAndroid Build Coastguard Worker mla \d0\().8h, \s5\().8h, v0.h[5] 1290*c0909341SAndroid Build Coastguard Worker mla \d0\().8h, \s6\().8h, v0.h[6] 1291*c0909341SAndroid Build Coastguard Worker mla \d0\().8h, \s7\().8h, v0.h[7] 1292*c0909341SAndroid Build Coastguard Worker.endm 1293*c0909341SAndroid Build Coastguard Worker.macro mul_mla_8tap_1 d0, d1, s0, s1, s2, s3, s4, s5, s6, s7, s8 1294*c0909341SAndroid Build Coastguard Worker mul \d0\().8h, \s0\().8h, v0.h[0] 1295*c0909341SAndroid Build Coastguard Worker mla \d0\().8h, \s1\().8h, v0.h[1] 1296*c0909341SAndroid Build Coastguard Worker mla \d0\().8h, \s2\().8h, v0.h[2] 1297*c0909341SAndroid Build Coastguard Worker mla \d0\().8h, \s3\().8h, v0.h[3] 1298*c0909341SAndroid Build Coastguard Worker mla \d0\().8h, \s4\().8h, v0.h[4] 1299*c0909341SAndroid Build Coastguard Worker mla \d0\().8h, \s5\().8h, v0.h[5] 1300*c0909341SAndroid Build Coastguard Worker mla \d0\().8h, \s6\().8h, v0.h[6] 1301*c0909341SAndroid Build Coastguard Worker mla \d0\().8h, \s7\().8h, v0.h[7] 1302*c0909341SAndroid Build Coastguard Worker mul \d1\().8h, \s1\().8h, v0.h[0] 1303*c0909341SAndroid Build Coastguard Worker mla \d1\().8h, \s2\().8h, v0.h[1] 1304*c0909341SAndroid Build Coastguard Worker mla \d1\().8h, \s3\().8h, v0.h[2] 1305*c0909341SAndroid Build Coastguard Worker mla \d1\().8h, \s4\().8h, v0.h[3] 1306*c0909341SAndroid Build Coastguard Worker mla \d1\().8h, \s5\().8h, v0.h[4] 1307*c0909341SAndroid Build Coastguard Worker mla \d1\().8h, \s6\().8h, v0.h[5] 1308*c0909341SAndroid Build Coastguard Worker mla \d1\().8h, \s7\().8h, v0.h[6] 1309*c0909341SAndroid Build Coastguard Worker mla \d1\().8h, \s8\().8h, v0.h[7] 1310*c0909341SAndroid Build Coastguard Worker.endm 1311*c0909341SAndroid Build Coastguard Worker.macro mul_mla_8tap_2 d0, d1, s0, s1, s2, s3, s4, s5, s6, s7, s8, s9 1312*c0909341SAndroid Build Coastguard Worker mul \d0\().8h, \s0\().8h, v0.h[0] 1313*c0909341SAndroid Build Coastguard Worker mla \d0\().8h, \s1\().8h, v0.h[1] 1314*c0909341SAndroid Build Coastguard Worker mla \d0\().8h, \s2\().8h, v0.h[2] 1315*c0909341SAndroid Build Coastguard Worker mla \d0\().8h, \s3\().8h, v0.h[3] 1316*c0909341SAndroid Build Coastguard Worker mla \d0\().8h, \s4\().8h, v0.h[4] 1317*c0909341SAndroid Build Coastguard Worker mla \d0\().8h, \s5\().8h, v0.h[5] 1318*c0909341SAndroid Build Coastguard Worker mla \d0\().8h, \s6\().8h, v0.h[6] 1319*c0909341SAndroid Build Coastguard Worker mla \d0\().8h, \s7\().8h, v0.h[7] 1320*c0909341SAndroid Build Coastguard Worker mul \d1\().8h, \s2\().8h, v0.h[0] 1321*c0909341SAndroid Build Coastguard Worker mla \d1\().8h, \s3\().8h, v0.h[1] 1322*c0909341SAndroid Build Coastguard Worker mla \d1\().8h, \s4\().8h, v0.h[2] 1323*c0909341SAndroid Build Coastguard Worker mla \d1\().8h, \s5\().8h, v0.h[3] 1324*c0909341SAndroid Build Coastguard Worker mla \d1\().8h, \s6\().8h, v0.h[4] 1325*c0909341SAndroid Build Coastguard Worker mla \d1\().8h, \s7\().8h, v0.h[5] 1326*c0909341SAndroid Build Coastguard Worker mla \d1\().8h, \s8\().8h, v0.h[6] 1327*c0909341SAndroid Build Coastguard Worker mla \d1\().8h, \s9\().8h, v0.h[7] 1328*c0909341SAndroid Build Coastguard Worker.endm 1329*c0909341SAndroid Build Coastguard Worker.macro sqrshrun_b shift, r0, r1, r2, r3 1330*c0909341SAndroid Build Coastguard Worker sqrshrun \r0\().8b, \r0\().8h, #\shift 1331*c0909341SAndroid Build Coastguard Worker.ifnb \r1 1332*c0909341SAndroid Build Coastguard Worker sqrshrun \r1\().8b, \r1\().8h, #\shift 1333*c0909341SAndroid Build Coastguard Worker.endif 1334*c0909341SAndroid Build Coastguard Worker.ifnb \r2 1335*c0909341SAndroid Build Coastguard Worker sqrshrun \r2\().8b, \r2\().8h, #\shift 1336*c0909341SAndroid Build Coastguard Worker sqrshrun \r3\().8b, \r3\().8h, #\shift 1337*c0909341SAndroid Build Coastguard Worker.endif 1338*c0909341SAndroid Build Coastguard Worker.endm 1339*c0909341SAndroid Build Coastguard Worker.macro srshr_h shift, r0, r1, r2, r3 1340*c0909341SAndroid Build Coastguard Worker srshr \r0\().8h, \r0\().8h, #\shift 1341*c0909341SAndroid Build Coastguard Worker.ifnb \r1 1342*c0909341SAndroid Build Coastguard Worker srshr \r1\().8h, \r1\().8h, #\shift 1343*c0909341SAndroid Build Coastguard Worker.endif 1344*c0909341SAndroid Build Coastguard Worker.ifnb \r2 1345*c0909341SAndroid Build Coastguard Worker srshr \r2\().8h, \r2\().8h, #\shift 1346*c0909341SAndroid Build Coastguard Worker srshr \r3\().8h, \r3\().8h, #\shift 1347*c0909341SAndroid Build Coastguard Worker.endif 1348*c0909341SAndroid Build Coastguard Worker.endm 1349*c0909341SAndroid Build Coastguard Worker.macro st_h strd, reg, lanes 1350*c0909341SAndroid Build Coastguard Worker st1 {\reg\().h}[0], [x0], \strd 1351*c0909341SAndroid Build Coastguard Worker st1 {\reg\().h}[1], [x8], \strd 1352*c0909341SAndroid Build Coastguard Worker.if \lanes > 2 1353*c0909341SAndroid Build Coastguard Worker st1 {\reg\().h}[2], [x0], \strd 1354*c0909341SAndroid Build Coastguard Worker st1 {\reg\().h}[3], [x8], \strd 1355*c0909341SAndroid Build Coastguard Worker.endif 1356*c0909341SAndroid Build Coastguard Worker.endm 1357*c0909341SAndroid Build Coastguard Worker.macro st_s strd, r0, r1 1358*c0909341SAndroid Build Coastguard Worker st1 {\r0\().s}[0], [x0], \strd 1359*c0909341SAndroid Build Coastguard Worker st1 {\r0\().s}[1], [x8], \strd 1360*c0909341SAndroid Build Coastguard Worker.ifnb \r1 1361*c0909341SAndroid Build Coastguard Worker st1 {\r1\().s}[0], [x0], \strd 1362*c0909341SAndroid Build Coastguard Worker st1 {\r1\().s}[1], [x8], \strd 1363*c0909341SAndroid Build Coastguard Worker.endif 1364*c0909341SAndroid Build Coastguard Worker.endm 1365*c0909341SAndroid Build Coastguard Worker.macro st_d strd, r0, r1 1366*c0909341SAndroid Build Coastguard Worker st1 {\r0\().8b}, [x0], \strd 1367*c0909341SAndroid Build Coastguard Worker st1 {\r0\().d}[1], [x8], \strd 1368*c0909341SAndroid Build Coastguard Worker.ifnb \r1 1369*c0909341SAndroid Build Coastguard Worker st1 {\r1\().8b}, [x0], \strd 1370*c0909341SAndroid Build Coastguard Worker st1 {\r1\().d}[1], [x8], \strd 1371*c0909341SAndroid Build Coastguard Worker.endif 1372*c0909341SAndroid Build Coastguard Worker.endm 1373*c0909341SAndroid Build Coastguard Worker.macro shift_store_4 type, strd, r0, r1 1374*c0909341SAndroid Build Coastguard Worker.ifc \type, put 1375*c0909341SAndroid Build Coastguard Worker sqrshrun_b 6, \r0, \r1 1376*c0909341SAndroid Build Coastguard Worker st_s \strd, \r0, \r1 1377*c0909341SAndroid Build Coastguard Worker.else 1378*c0909341SAndroid Build Coastguard Worker srshr_h 2, \r0, \r1 1379*c0909341SAndroid Build Coastguard Worker st_d \strd, \r0, \r1 1380*c0909341SAndroid Build Coastguard Worker.endif 1381*c0909341SAndroid Build Coastguard Worker.endm 1382*c0909341SAndroid Build Coastguard Worker.macro st_reg strd, wd, r0, r1, r2, r3, r4, r5, r6, r7 1383*c0909341SAndroid Build Coastguard Worker st1 {\r0\wd}, [x0], \strd 1384*c0909341SAndroid Build Coastguard Worker st1 {\r1\wd}, [x8], \strd 1385*c0909341SAndroid Build Coastguard Worker.ifnb \r2 1386*c0909341SAndroid Build Coastguard Worker st1 {\r2\wd}, [x0], \strd 1387*c0909341SAndroid Build Coastguard Worker st1 {\r3\wd}, [x8], \strd 1388*c0909341SAndroid Build Coastguard Worker.endif 1389*c0909341SAndroid Build Coastguard Worker.ifnb \r4 1390*c0909341SAndroid Build Coastguard Worker st1 {\r4\wd}, [x0], \strd 1391*c0909341SAndroid Build Coastguard Worker st1 {\r5\wd}, [x8], \strd 1392*c0909341SAndroid Build Coastguard Worker st1 {\r6\wd}, [x0], \strd 1393*c0909341SAndroid Build Coastguard Worker st1 {\r7\wd}, [x8], \strd 1394*c0909341SAndroid Build Coastguard Worker.endif 1395*c0909341SAndroid Build Coastguard Worker.endm 1396*c0909341SAndroid Build Coastguard Worker.macro st_8b strd, r0, r1, r2, r3, r4, r5, r6, r7 1397*c0909341SAndroid Build Coastguard Worker st_reg \strd, .8b, \r0, \r1, \r2, \r3, \r4, \r5, \r6, \r7 1398*c0909341SAndroid Build Coastguard Worker.endm 1399*c0909341SAndroid Build Coastguard Worker.macro st_16b strd, r0, r1, r2, r3, r4, r5, r6, r7 1400*c0909341SAndroid Build Coastguard Worker st_reg \strd, .16b, \r0, \r1, \r2, \r3, \r4, \r5, \r6, \r7 1401*c0909341SAndroid Build Coastguard Worker.endm 1402*c0909341SAndroid Build Coastguard Worker.macro shift_store_8 type, strd, r0, r1, r2, r3 1403*c0909341SAndroid Build Coastguard Worker.ifc \type, put 1404*c0909341SAndroid Build Coastguard Worker sqrshrun_b 6, \r0, \r1, \r2, \r3 1405*c0909341SAndroid Build Coastguard Worker st_8b \strd, \r0, \r1, \r2, \r3 1406*c0909341SAndroid Build Coastguard Worker.else 1407*c0909341SAndroid Build Coastguard Worker srshr_h 2, \r0, \r1, \r2, \r3 1408*c0909341SAndroid Build Coastguard Worker st_16b \strd, \r0, \r1, \r2, \r3 1409*c0909341SAndroid Build Coastguard Worker.endif 1410*c0909341SAndroid Build Coastguard Worker.endm 1411*c0909341SAndroid Build Coastguard Worker.macro shift_store_16 type, strd, r0, r1, r2, r3 1412*c0909341SAndroid Build Coastguard Worker.ifc \type, put 1413*c0909341SAndroid Build Coastguard Worker sqrshrun \r0\().8b, \r0\().8h, #6 1414*c0909341SAndroid Build Coastguard Worker sqrshrun2 \r0\().16b, \r1\().8h, #6 1415*c0909341SAndroid Build Coastguard Worker sqrshrun \r2\().8b, \r2\().8h, #6 1416*c0909341SAndroid Build Coastguard Worker sqrshrun2 \r2\().16b, \r3\().8h, #6 1417*c0909341SAndroid Build Coastguard Worker st_16b \strd, \r0, \r2 1418*c0909341SAndroid Build Coastguard Worker.else 1419*c0909341SAndroid Build Coastguard Worker srshr_h 2, \r0, \r1, \r2, \r3 1420*c0909341SAndroid Build Coastguard Worker st1 {\r0\().8h, \r1\().8h}, [x0], \strd 1421*c0909341SAndroid Build Coastguard Worker st1 {\r2\().8h, \r3\().8h}, [x8], \strd 1422*c0909341SAndroid Build Coastguard Worker.endif 1423*c0909341SAndroid Build Coastguard Worker.endm 1424*c0909341SAndroid Build Coastguard Worker 1425*c0909341SAndroid Build Coastguard Worker.macro make_8tap_fn op, type, type_h, type_v, taps 1426*c0909341SAndroid Build Coastguard Workerfunction \op\()_8tap_\type\()_8bpc_neon, export=1 1427*c0909341SAndroid Build Coastguard Worker mov x8, \type_h 1428*c0909341SAndroid Build Coastguard Worker mov x9, \type_v 1429*c0909341SAndroid Build Coastguard Worker b \op\()_\taps\()_neon 1430*c0909341SAndroid Build Coastguard Workerendfunc 1431*c0909341SAndroid Build Coastguard Worker.endm 1432*c0909341SAndroid Build Coastguard Worker 1433*c0909341SAndroid Build Coastguard Worker// No spaces in these expressions, due to gas-preprocessor. 1434*c0909341SAndroid Build Coastguard Worker#define REGULAR ((0*15<<7)|3*15) 1435*c0909341SAndroid Build Coastguard Worker#define SMOOTH ((1*15<<7)|4*15) 1436*c0909341SAndroid Build Coastguard Worker#define SHARP ((2*15<<7)|3*15) 1437*c0909341SAndroid Build Coastguard Worker 1438*c0909341SAndroid Build Coastguard Worker.macro filter_fn type, dst, d_strd, src, s_strd, w, h, mx, xmx, my, xmy, ds2, sr2, shift_hv, taps 1439*c0909341SAndroid Build Coastguard Workerfunction \type\()_\taps\()_neon 1440*c0909341SAndroid Build Coastguard Worker mov w10, #0x4081 // (1 << 14) | (1 << 7) | (1 << 0) 1441*c0909341SAndroid Build Coastguard Worker mul \mx, \mx, w10 1442*c0909341SAndroid Build Coastguard Worker mul \my, \my, w10 1443*c0909341SAndroid Build Coastguard Worker add \mx, \mx, w8 // mx, 8tap_h, 4tap_h 1444*c0909341SAndroid Build Coastguard Worker add \my, \my, w9 // my, 8tap_v, 4tap_v 1445*c0909341SAndroid Build Coastguard Worker.ifc \type, prep 1446*c0909341SAndroid Build Coastguard Worker uxtw \d_strd, \w 1447*c0909341SAndroid Build Coastguard Worker lsl \d_strd, \d_strd, #1 1448*c0909341SAndroid Build Coastguard Worker.endif 1449*c0909341SAndroid Build Coastguard Worker 1450*c0909341SAndroid Build Coastguard Worker clz w8, \w 1451*c0909341SAndroid Build Coastguard Worker tst \mx, #(0x7f << 14) 1452*c0909341SAndroid Build Coastguard Worker sub w8, w8, #24 1453*c0909341SAndroid Build Coastguard Worker movrel x10, X(mc_subpel_filters), -8 1454*c0909341SAndroid Build Coastguard Worker b.ne L(\type\()_\taps\()_h) 1455*c0909341SAndroid Build Coastguard Worker tst \my, #(0x7f << 14) 1456*c0909341SAndroid Build Coastguard Worker b.ne L(\type\()_\taps\()_v) 1457*c0909341SAndroid Build Coastguard Worker b \type\()_neon 1458*c0909341SAndroid Build Coastguard Worker 1459*c0909341SAndroid Build Coastguard WorkerL(\type\()_\taps\()_h): 1460*c0909341SAndroid Build Coastguard Worker cmp \w, #4 1461*c0909341SAndroid Build Coastguard Worker ubfx w9, \mx, #7, #7 1462*c0909341SAndroid Build Coastguard Worker and \mx, \mx, #0x7f 1463*c0909341SAndroid Build Coastguard Worker b.le 4f 1464*c0909341SAndroid Build Coastguard Worker mov \mx, w9 1465*c0909341SAndroid Build Coastguard Worker4: 1466*c0909341SAndroid Build Coastguard Worker tst \my, #(0x7f << 14) 1467*c0909341SAndroid Build Coastguard Worker add \xmx, x10, \mx, uxtw #3 1468*c0909341SAndroid Build Coastguard Worker b.ne L(\type\()_\taps\()_hv) 1469*c0909341SAndroid Build Coastguard Worker 1470*c0909341SAndroid Build Coastguard Worker movrel x9, \type\()_\taps\()_h_tbl 1471*c0909341SAndroid Build Coastguard Worker ldrsw x8, [x9, x8, lsl #2] 1472*c0909341SAndroid Build Coastguard Worker add x9, x9, x8 1473*c0909341SAndroid Build Coastguard Worker br x9 1474*c0909341SAndroid Build Coastguard Worker 1475*c0909341SAndroid Build Coastguard Worker20: // 2xN h 1476*c0909341SAndroid Build Coastguard Worker AARCH64_VALID_JUMP_TARGET 1477*c0909341SAndroid Build Coastguard Worker.ifc \type, put 1478*c0909341SAndroid Build Coastguard Worker ldur s0, [\xmx, #2] 1479*c0909341SAndroid Build Coastguard Worker sub \src, \src, #1 1480*c0909341SAndroid Build Coastguard Worker add \ds2, \dst, \d_strd 1481*c0909341SAndroid Build Coastguard Worker add \sr2, \src, \s_strd 1482*c0909341SAndroid Build Coastguard Worker lsl \d_strd, \d_strd, #1 1483*c0909341SAndroid Build Coastguard Worker lsl \s_strd, \s_strd, #1 1484*c0909341SAndroid Build Coastguard Worker sxtl v0.8h, v0.8b 1485*c0909341SAndroid Build Coastguard Worker2: 1486*c0909341SAndroid Build Coastguard Worker ld1 {v4.8b}, [\src], \s_strd 1487*c0909341SAndroid Build Coastguard Worker ld1 {v6.8b}, [\sr2], \s_strd 1488*c0909341SAndroid Build Coastguard Worker uxtl v4.8h, v4.8b 1489*c0909341SAndroid Build Coastguard Worker uxtl v6.8h, v6.8b 1490*c0909341SAndroid Build Coastguard Worker ext v5.16b, v4.16b, v4.16b, #2 1491*c0909341SAndroid Build Coastguard Worker ext v7.16b, v6.16b, v6.16b, #2 1492*c0909341SAndroid Build Coastguard Worker subs \h, \h, #2 1493*c0909341SAndroid Build Coastguard Worker trn1 v3.2s, v4.2s, v6.2s 1494*c0909341SAndroid Build Coastguard Worker trn2 v6.2s, v4.2s, v6.2s 1495*c0909341SAndroid Build Coastguard Worker trn1 v4.2s, v5.2s, v7.2s 1496*c0909341SAndroid Build Coastguard Worker trn2 v7.2s, v5.2s, v7.2s 1497*c0909341SAndroid Build Coastguard Worker mul v3.4h, v3.4h, v0.h[0] 1498*c0909341SAndroid Build Coastguard Worker mla v3.4h, v4.4h, v0.h[1] 1499*c0909341SAndroid Build Coastguard Worker mla v3.4h, v6.4h, v0.h[2] 1500*c0909341SAndroid Build Coastguard Worker mla v3.4h, v7.4h, v0.h[3] 1501*c0909341SAndroid Build Coastguard Worker srshr v3.4h, v3.4h, #2 1502*c0909341SAndroid Build Coastguard Worker sqrshrun v3.8b, v3.8h, #4 1503*c0909341SAndroid Build Coastguard Worker st1 {v3.h}[0], [\dst], \d_strd 1504*c0909341SAndroid Build Coastguard Worker st1 {v3.h}[1], [\ds2], \d_strd 1505*c0909341SAndroid Build Coastguard Worker b.gt 2b 1506*c0909341SAndroid Build Coastguard Worker ret 1507*c0909341SAndroid Build Coastguard Worker.endif 1508*c0909341SAndroid Build Coastguard Worker 1509*c0909341SAndroid Build Coastguard Worker40: // 4xN h 1510*c0909341SAndroid Build Coastguard Worker AARCH64_VALID_JUMP_TARGET 1511*c0909341SAndroid Build Coastguard Worker ldur s0, [\xmx, #2] 1512*c0909341SAndroid Build Coastguard Worker sub \src, \src, #1 1513*c0909341SAndroid Build Coastguard Worker add \ds2, \dst, \d_strd 1514*c0909341SAndroid Build Coastguard Worker add \sr2, \src, \s_strd 1515*c0909341SAndroid Build Coastguard Worker lsl \d_strd, \d_strd, #1 1516*c0909341SAndroid Build Coastguard Worker lsl \s_strd, \s_strd, #1 1517*c0909341SAndroid Build Coastguard Worker sxtl v0.8h, v0.8b 1518*c0909341SAndroid Build Coastguard Worker4: 1519*c0909341SAndroid Build Coastguard Worker ld1 {v16.8b}, [\src], \s_strd 1520*c0909341SAndroid Build Coastguard Worker ld1 {v20.8b}, [\sr2], \s_strd 1521*c0909341SAndroid Build Coastguard Worker uxtl v16.8h, v16.8b 1522*c0909341SAndroid Build Coastguard Worker uxtl v20.8h, v20.8b 1523*c0909341SAndroid Build Coastguard Worker ext v17.16b, v16.16b, v16.16b, #2 1524*c0909341SAndroid Build Coastguard Worker ext v18.16b, v16.16b, v16.16b, #4 1525*c0909341SAndroid Build Coastguard Worker ext v19.16b, v16.16b, v16.16b, #6 1526*c0909341SAndroid Build Coastguard Worker ext v21.16b, v20.16b, v20.16b, #2 1527*c0909341SAndroid Build Coastguard Worker ext v22.16b, v20.16b, v20.16b, #4 1528*c0909341SAndroid Build Coastguard Worker ext v23.16b, v20.16b, v20.16b, #6 1529*c0909341SAndroid Build Coastguard Worker subs \h, \h, #2 1530*c0909341SAndroid Build Coastguard Worker mul v16.4h, v16.4h, v0.h[0] 1531*c0909341SAndroid Build Coastguard Worker mla v16.4h, v17.4h, v0.h[1] 1532*c0909341SAndroid Build Coastguard Worker mla v16.4h, v18.4h, v0.h[2] 1533*c0909341SAndroid Build Coastguard Worker mla v16.4h, v19.4h, v0.h[3] 1534*c0909341SAndroid Build Coastguard Worker mul v20.4h, v20.4h, v0.h[0] 1535*c0909341SAndroid Build Coastguard Worker mla v20.4h, v21.4h, v0.h[1] 1536*c0909341SAndroid Build Coastguard Worker mla v20.4h, v22.4h, v0.h[2] 1537*c0909341SAndroid Build Coastguard Worker mla v20.4h, v23.4h, v0.h[3] 1538*c0909341SAndroid Build Coastguard Worker srshr v16.4h, v16.4h, #2 1539*c0909341SAndroid Build Coastguard Worker srshr v20.4h, v20.4h, #2 1540*c0909341SAndroid Build Coastguard Worker.ifc \type, put 1541*c0909341SAndroid Build Coastguard Worker sqrshrun v16.8b, v16.8h, #4 1542*c0909341SAndroid Build Coastguard Worker sqrshrun v20.8b, v20.8h, #4 1543*c0909341SAndroid Build Coastguard Worker str s16, [\dst] 1544*c0909341SAndroid Build Coastguard Worker str s20, [\ds2] 1545*c0909341SAndroid Build Coastguard Worker add \dst, \dst, \d_strd 1546*c0909341SAndroid Build Coastguard Worker add \ds2, \ds2, \d_strd 1547*c0909341SAndroid Build Coastguard Worker.else 1548*c0909341SAndroid Build Coastguard Worker st1 {v16.4h}, [\dst], \d_strd 1549*c0909341SAndroid Build Coastguard Worker st1 {v20.4h}, [\ds2], \d_strd 1550*c0909341SAndroid Build Coastguard Worker.endif 1551*c0909341SAndroid Build Coastguard Worker b.gt 4b 1552*c0909341SAndroid Build Coastguard Worker ret 1553*c0909341SAndroid Build Coastguard Worker 1554*c0909341SAndroid Build Coastguard Worker80: // 8xN h 1555*c0909341SAndroid Build Coastguard Worker AARCH64_VALID_JUMP_TARGET 1556*c0909341SAndroid Build Coastguard Worker ld1 {v0.8b}, [\xmx] 1557*c0909341SAndroid Build Coastguard Worker.ifc \taps, 6tap 1558*c0909341SAndroid Build Coastguard Worker sub \src, \src, #2 1559*c0909341SAndroid Build Coastguard Worker.else 1560*c0909341SAndroid Build Coastguard Worker sub \src, \src, #3 1561*c0909341SAndroid Build Coastguard Worker.endif 1562*c0909341SAndroid Build Coastguard Worker add \ds2, \dst, \d_strd 1563*c0909341SAndroid Build Coastguard Worker add \sr2, \src, \s_strd 1564*c0909341SAndroid Build Coastguard Worker lsl \d_strd, \d_strd, #1 1565*c0909341SAndroid Build Coastguard Worker lsl \s_strd, \s_strd, #1 1566*c0909341SAndroid Build Coastguard Worker sxtl v0.8h, v0.8b 1567*c0909341SAndroid Build Coastguard Worker8: 1568*c0909341SAndroid Build Coastguard Worker ld1 {v16.8b, v17.8b}, [\src], \s_strd 1569*c0909341SAndroid Build Coastguard Worker ld1 {v20.8b, v21.8b}, [\sr2], \s_strd 1570*c0909341SAndroid Build Coastguard Worker uxtl v16.8h, v16.8b 1571*c0909341SAndroid Build Coastguard Worker uxtl v17.8h, v17.8b 1572*c0909341SAndroid Build Coastguard Worker uxtl v20.8h, v20.8b 1573*c0909341SAndroid Build Coastguard Worker uxtl v21.8h, v21.8b 1574*c0909341SAndroid Build Coastguard Worker 1575*c0909341SAndroid Build Coastguard Worker.ifc \taps, 6tap 1576*c0909341SAndroid Build Coastguard Worker mul v18.8h, v16.8h, v0.h[1] 1577*c0909341SAndroid Build Coastguard Worker mul v22.8h, v20.8h, v0.h[1] 1578*c0909341SAndroid Build Coastguard Worker .irpc i, 23456 1579*c0909341SAndroid Build Coastguard Worker ext v19.16b, v16.16b, v17.16b, #(2*\i-2) 1580*c0909341SAndroid Build Coastguard Worker ext v23.16b, v20.16b, v21.16b, #(2*\i-2) 1581*c0909341SAndroid Build Coastguard Worker mla v18.8h, v19.8h, v0.h[\i] 1582*c0909341SAndroid Build Coastguard Worker mla v22.8h, v23.8h, v0.h[\i] 1583*c0909341SAndroid Build Coastguard Worker .endr 1584*c0909341SAndroid Build Coastguard Worker.else // 8tap 1585*c0909341SAndroid Build Coastguard Worker mul v18.8h, v16.8h, v0.h[0] 1586*c0909341SAndroid Build Coastguard Worker mul v22.8h, v20.8h, v0.h[0] 1587*c0909341SAndroid Build Coastguard Worker .irpc i, 1234567 1588*c0909341SAndroid Build Coastguard Worker ext v19.16b, v16.16b, v17.16b, #(2*\i) 1589*c0909341SAndroid Build Coastguard Worker ext v23.16b, v20.16b, v21.16b, #(2*\i) 1590*c0909341SAndroid Build Coastguard Worker mla v18.8h, v19.8h, v0.h[\i] 1591*c0909341SAndroid Build Coastguard Worker mla v22.8h, v23.8h, v0.h[\i] 1592*c0909341SAndroid Build Coastguard Worker .endr 1593*c0909341SAndroid Build Coastguard Worker.endif 1594*c0909341SAndroid Build Coastguard Worker subs \h, \h, #2 1595*c0909341SAndroid Build Coastguard Worker srshr v18.8h, v18.8h, #2 1596*c0909341SAndroid Build Coastguard Worker srshr v22.8h, v22.8h, #2 1597*c0909341SAndroid Build Coastguard Worker.ifc \type, put 1598*c0909341SAndroid Build Coastguard Worker sqrshrun v18.8b, v18.8h, #4 1599*c0909341SAndroid Build Coastguard Worker sqrshrun v22.8b, v22.8h, #4 1600*c0909341SAndroid Build Coastguard Worker st1 {v18.8b}, [\dst], \d_strd 1601*c0909341SAndroid Build Coastguard Worker st1 {v22.8b}, [\ds2], \d_strd 1602*c0909341SAndroid Build Coastguard Worker.else 1603*c0909341SAndroid Build Coastguard Worker st1 {v18.8h}, [\dst], \d_strd 1604*c0909341SAndroid Build Coastguard Worker st1 {v22.8h}, [\ds2], \d_strd 1605*c0909341SAndroid Build Coastguard Worker.endif 1606*c0909341SAndroid Build Coastguard Worker b.gt 8b 1607*c0909341SAndroid Build Coastguard Worker ret 1608*c0909341SAndroid Build Coastguard Worker160: 1609*c0909341SAndroid Build Coastguard Worker320: 1610*c0909341SAndroid Build Coastguard Worker640: 1611*c0909341SAndroid Build Coastguard Worker1280: // 16xN, 32xN, ... h 1612*c0909341SAndroid Build Coastguard Worker AARCH64_VALID_JUMP_TARGET 1613*c0909341SAndroid Build Coastguard Worker ld1 {v0.8b}, [\xmx] 1614*c0909341SAndroid Build Coastguard Worker.ifc \taps, 6tap 1615*c0909341SAndroid Build Coastguard Worker sub \src, \src, #2 1616*c0909341SAndroid Build Coastguard Worker.else 1617*c0909341SAndroid Build Coastguard Worker sub \src, \src, #3 1618*c0909341SAndroid Build Coastguard Worker.endif 1619*c0909341SAndroid Build Coastguard Worker add \ds2, \dst, \d_strd 1620*c0909341SAndroid Build Coastguard Worker add \sr2, \src, \s_strd 1621*c0909341SAndroid Build Coastguard Worker lsl \s_strd, \s_strd, #1 1622*c0909341SAndroid Build Coastguard Worker sxtl v0.8h, v0.8b 1623*c0909341SAndroid Build Coastguard Worker 1624*c0909341SAndroid Build Coastguard Worker sub \s_strd, \s_strd, \w, uxtw 1625*c0909341SAndroid Build Coastguard Worker sub \s_strd, \s_strd, #8 1626*c0909341SAndroid Build Coastguard Worker.ifc \type, put 1627*c0909341SAndroid Build Coastguard Worker lsl \d_strd, \d_strd, #1 1628*c0909341SAndroid Build Coastguard Worker sub \d_strd, \d_strd, \w, uxtw 1629*c0909341SAndroid Build Coastguard Worker.endif 1630*c0909341SAndroid Build Coastguard Worker161: 1631*c0909341SAndroid Build Coastguard Worker ld1 {v16.8b, v17.8b, v18.8b}, [\src], #24 1632*c0909341SAndroid Build Coastguard Worker ld1 {v20.8b, v21.8b, v22.8b}, [\sr2], #24 1633*c0909341SAndroid Build Coastguard Worker mov \mx, \w 1634*c0909341SAndroid Build Coastguard Worker uxtl v16.8h, v16.8b 1635*c0909341SAndroid Build Coastguard Worker uxtl v17.8h, v17.8b 1636*c0909341SAndroid Build Coastguard Worker uxtl v18.8h, v18.8b 1637*c0909341SAndroid Build Coastguard Worker uxtl v20.8h, v20.8b 1638*c0909341SAndroid Build Coastguard Worker uxtl v21.8h, v21.8b 1639*c0909341SAndroid Build Coastguard Worker uxtl v22.8h, v22.8b 1640*c0909341SAndroid Build Coastguard Worker 1641*c0909341SAndroid Build Coastguard Worker16: 1642*c0909341SAndroid Build Coastguard Worker.ifc \taps, 6tap 1643*c0909341SAndroid Build Coastguard Worker mul v24.8h, v16.8h, v0.h[1] 1644*c0909341SAndroid Build Coastguard Worker mul v25.8h, v17.8h, v0.h[1] 1645*c0909341SAndroid Build Coastguard Worker mul v26.8h, v20.8h, v0.h[1] 1646*c0909341SAndroid Build Coastguard Worker mul v27.8h, v21.8h, v0.h[1] 1647*c0909341SAndroid Build Coastguard Worker .irpc i, 23456 1648*c0909341SAndroid Build Coastguard Worker ext v28.16b, v16.16b, v17.16b, #(2*\i-2) 1649*c0909341SAndroid Build Coastguard Worker ext v29.16b, v17.16b, v18.16b, #(2*\i-2) 1650*c0909341SAndroid Build Coastguard Worker ext v30.16b, v20.16b, v21.16b, #(2*\i-2) 1651*c0909341SAndroid Build Coastguard Worker ext v31.16b, v21.16b, v22.16b, #(2*\i-2) 1652*c0909341SAndroid Build Coastguard Worker mla v24.8h, v28.8h, v0.h[\i] 1653*c0909341SAndroid Build Coastguard Worker mla v25.8h, v29.8h, v0.h[\i] 1654*c0909341SAndroid Build Coastguard Worker mla v26.8h, v30.8h, v0.h[\i] 1655*c0909341SAndroid Build Coastguard Worker mla v27.8h, v31.8h, v0.h[\i] 1656*c0909341SAndroid Build Coastguard Worker .endr 1657*c0909341SAndroid Build Coastguard Worker.else // 8tap 1658*c0909341SAndroid Build Coastguard Worker mul v24.8h, v16.8h, v0.h[0] 1659*c0909341SAndroid Build Coastguard Worker mul v25.8h, v17.8h, v0.h[0] 1660*c0909341SAndroid Build Coastguard Worker mul v26.8h, v20.8h, v0.h[0] 1661*c0909341SAndroid Build Coastguard Worker mul v27.8h, v21.8h, v0.h[0] 1662*c0909341SAndroid Build Coastguard Worker .irpc i, 1234567 1663*c0909341SAndroid Build Coastguard Worker ext v28.16b, v16.16b, v17.16b, #(2*\i) 1664*c0909341SAndroid Build Coastguard Worker ext v29.16b, v17.16b, v18.16b, #(2*\i) 1665*c0909341SAndroid Build Coastguard Worker ext v30.16b, v20.16b, v21.16b, #(2*\i) 1666*c0909341SAndroid Build Coastguard Worker ext v31.16b, v21.16b, v22.16b, #(2*\i) 1667*c0909341SAndroid Build Coastguard Worker mla v24.8h, v28.8h, v0.h[\i] 1668*c0909341SAndroid Build Coastguard Worker mla v25.8h, v29.8h, v0.h[\i] 1669*c0909341SAndroid Build Coastguard Worker mla v26.8h, v30.8h, v0.h[\i] 1670*c0909341SAndroid Build Coastguard Worker mla v27.8h, v31.8h, v0.h[\i] 1671*c0909341SAndroid Build Coastguard Worker .endr 1672*c0909341SAndroid Build Coastguard Worker.endif 1673*c0909341SAndroid Build Coastguard Worker srshr v24.8h, v24.8h, #2 1674*c0909341SAndroid Build Coastguard Worker srshr v25.8h, v25.8h, #2 1675*c0909341SAndroid Build Coastguard Worker srshr v26.8h, v26.8h, #2 1676*c0909341SAndroid Build Coastguard Worker srshr v27.8h, v27.8h, #2 1677*c0909341SAndroid Build Coastguard Worker subs \mx, \mx, #16 1678*c0909341SAndroid Build Coastguard Worker.ifc \type, put 1679*c0909341SAndroid Build Coastguard Worker sqrshrun v24.8b, v24.8h, #4 1680*c0909341SAndroid Build Coastguard Worker sqrshrun2 v24.16b, v25.8h, #4 1681*c0909341SAndroid Build Coastguard Worker sqrshrun v26.8b, v26.8h, #4 1682*c0909341SAndroid Build Coastguard Worker sqrshrun2 v26.16b, v27.8h, #4 1683*c0909341SAndroid Build Coastguard Worker st1 {v24.16b}, [\dst], #16 1684*c0909341SAndroid Build Coastguard Worker st1 {v26.16b}, [\ds2], #16 1685*c0909341SAndroid Build Coastguard Worker.else 1686*c0909341SAndroid Build Coastguard Worker st1 {v24.8h, v25.8h}, [\dst], #32 1687*c0909341SAndroid Build Coastguard Worker st1 {v26.8h, v27.8h}, [\ds2], #32 1688*c0909341SAndroid Build Coastguard Worker.endif 1689*c0909341SAndroid Build Coastguard Worker b.le 9f 1690*c0909341SAndroid Build Coastguard Worker 1691*c0909341SAndroid Build Coastguard Worker mov v16.16b, v18.16b 1692*c0909341SAndroid Build Coastguard Worker mov v20.16b, v22.16b 1693*c0909341SAndroid Build Coastguard Worker ld1 {v17.8b, v18.8b}, [\src], #16 1694*c0909341SAndroid Build Coastguard Worker ld1 {v21.8b, v22.8b}, [\sr2], #16 1695*c0909341SAndroid Build Coastguard Worker uxtl v17.8h, v17.8b 1696*c0909341SAndroid Build Coastguard Worker uxtl v18.8h, v18.8b 1697*c0909341SAndroid Build Coastguard Worker uxtl v21.8h, v21.8b 1698*c0909341SAndroid Build Coastguard Worker uxtl v22.8h, v22.8b 1699*c0909341SAndroid Build Coastguard Worker b 16b 1700*c0909341SAndroid Build Coastguard Worker 1701*c0909341SAndroid Build Coastguard Worker9: 1702*c0909341SAndroid Build Coastguard Worker add \dst, \dst, \d_strd 1703*c0909341SAndroid Build Coastguard Worker add \ds2, \ds2, \d_strd 1704*c0909341SAndroid Build Coastguard Worker add \src, \src, \s_strd 1705*c0909341SAndroid Build Coastguard Worker add \sr2, \sr2, \s_strd 1706*c0909341SAndroid Build Coastguard Worker 1707*c0909341SAndroid Build Coastguard Worker subs \h, \h, #2 1708*c0909341SAndroid Build Coastguard Worker b.gt 161b 1709*c0909341SAndroid Build Coastguard Worker ret 1710*c0909341SAndroid Build Coastguard Workerendfunc 1711*c0909341SAndroid Build Coastguard Worker 1712*c0909341SAndroid Build Coastguard Workerjumptable \type\()_\taps\()_h_tbl 1713*c0909341SAndroid Build Coastguard Worker .word 1280b - \type\()_\taps\()_h_tbl 1714*c0909341SAndroid Build Coastguard Worker .word 640b - \type\()_\taps\()_h_tbl 1715*c0909341SAndroid Build Coastguard Worker .word 320b - \type\()_\taps\()_h_tbl 1716*c0909341SAndroid Build Coastguard Worker .word 160b - \type\()_\taps\()_h_tbl 1717*c0909341SAndroid Build Coastguard Worker .word 80b - \type\()_\taps\()_h_tbl 1718*c0909341SAndroid Build Coastguard Worker .word 40b - \type\()_\taps\()_h_tbl 1719*c0909341SAndroid Build Coastguard Worker .word 20b - \type\()_\taps\()_h_tbl 1720*c0909341SAndroid Build Coastguard Workerendjumptable 1721*c0909341SAndroid Build Coastguard Worker 1722*c0909341SAndroid Build Coastguard Workerfunction L(\type\()_\taps\()_v) 1723*c0909341SAndroid Build Coastguard Worker cmp \h, #4 1724*c0909341SAndroid Build Coastguard Worker ubfx w9, \my, #7, #7 1725*c0909341SAndroid Build Coastguard Worker and \my, \my, #0x7f 1726*c0909341SAndroid Build Coastguard Worker b.le 4f 1727*c0909341SAndroid Build Coastguard Worker mov \my, w9 1728*c0909341SAndroid Build Coastguard Worker4: 1729*c0909341SAndroid Build Coastguard Worker add \xmy, x10, \my, uxtw #3 1730*c0909341SAndroid Build Coastguard Worker 1731*c0909341SAndroid Build Coastguard Worker movrel x9, \type\()_\taps\()_v_tbl 1732*c0909341SAndroid Build Coastguard Worker ldrsw x8, [x9, x8, lsl #2] 1733*c0909341SAndroid Build Coastguard Worker add x9, x9, x8 1734*c0909341SAndroid Build Coastguard Worker br x9 1735*c0909341SAndroid Build Coastguard Worker 1736*c0909341SAndroid Build Coastguard Worker20: // 2xN v 1737*c0909341SAndroid Build Coastguard Worker AARCH64_VALID_JUMP_TARGET 1738*c0909341SAndroid Build Coastguard Worker.ifc \type, put 1739*c0909341SAndroid Build Coastguard Worker b.gt 28f 1740*c0909341SAndroid Build Coastguard Worker 1741*c0909341SAndroid Build Coastguard Worker cmp \h, #2 1742*c0909341SAndroid Build Coastguard Worker ldur s0, [\xmy, #2] 1743*c0909341SAndroid Build Coastguard Worker sub \src, \src, \s_strd 1744*c0909341SAndroid Build Coastguard Worker add \ds2, \dst, \d_strd 1745*c0909341SAndroid Build Coastguard Worker add \sr2, \src, \s_strd 1746*c0909341SAndroid Build Coastguard Worker lsl \s_strd, \s_strd, #1 1747*c0909341SAndroid Build Coastguard Worker lsl \d_strd, \d_strd, #1 1748*c0909341SAndroid Build Coastguard Worker sxtl v0.8h, v0.8b 1749*c0909341SAndroid Build Coastguard Worker 1750*c0909341SAndroid Build Coastguard Worker // 2x2 v 1751*c0909341SAndroid Build Coastguard Worker load_h \src, \sr2, \s_strd, v1, v2, v3, v4, v5 1752*c0909341SAndroid Build Coastguard Worker interleave_1_h v1, v2, v3, v4, v5 1753*c0909341SAndroid Build Coastguard Worker b.gt 24f 1754*c0909341SAndroid Build Coastguard Worker uxtl_b v1, v2, v3, v4 1755*c0909341SAndroid Build Coastguard Worker mul_mla_4tap v6, v1, v2, v3, v4, .4h 1756*c0909341SAndroid Build Coastguard Worker sqrshrun_b 6, v6 1757*c0909341SAndroid Build Coastguard Worker st_h \d_strd, v6, 2 1758*c0909341SAndroid Build Coastguard Worker ret 1759*c0909341SAndroid Build Coastguard Worker 1760*c0909341SAndroid Build Coastguard Worker24: // 2x4 v 1761*c0909341SAndroid Build Coastguard Worker load_h \sr2, \src, \s_strd, v6, v7 1762*c0909341SAndroid Build Coastguard Worker interleave_1_h v5, v6, v7 1763*c0909341SAndroid Build Coastguard Worker interleave_2_s v1, v2, v3, v4, v5, v6 1764*c0909341SAndroid Build Coastguard Worker uxtl_b v1, v2, v3, v4 1765*c0909341SAndroid Build Coastguard Worker mul_mla_4tap v6, v1, v2, v3, v4, .8h 1766*c0909341SAndroid Build Coastguard Worker sqrshrun_b 6, v6 1767*c0909341SAndroid Build Coastguard Worker st_h \d_strd, v6, 4 1768*c0909341SAndroid Build Coastguard Worker ret 1769*c0909341SAndroid Build Coastguard Worker 1770*c0909341SAndroid Build Coastguard Worker28: // 2x6, 2x8, 2x12, 2x16 v 1771*c0909341SAndroid Build Coastguard Worker ld1 {v0.8b}, [\xmy] 1772*c0909341SAndroid Build Coastguard Worker sub \sr2, \src, \s_strd, lsl #1 1773*c0909341SAndroid Build Coastguard Worker add \ds2, \dst, \d_strd 1774*c0909341SAndroid Build Coastguard Worker sub \src, \sr2, \s_strd 1775*c0909341SAndroid Build Coastguard Worker lsl \d_strd, \d_strd, #1 1776*c0909341SAndroid Build Coastguard Worker lsl \s_strd, \s_strd, #1 1777*c0909341SAndroid Build Coastguard Worker sxtl v0.8h, v0.8b 1778*c0909341SAndroid Build Coastguard Worker 1779*c0909341SAndroid Build Coastguard Worker load_h \src, \sr2, \s_strd, v1, v2, v3, v4, v5, v6, v7 1780*c0909341SAndroid Build Coastguard Worker interleave_1_h v1, v2, v3, v4, v5 1781*c0909341SAndroid Build Coastguard Worker interleave_1_h v5, v6, v7 1782*c0909341SAndroid Build Coastguard Worker interleave_2_s v1, v2, v3, v4, v5, v6 1783*c0909341SAndroid Build Coastguard Worker uxtl_b v1, v2, v3, v4 1784*c0909341SAndroid Build Coastguard Worker216: 1785*c0909341SAndroid Build Coastguard Worker subs \h, \h, #4 1786*c0909341SAndroid Build Coastguard Worker load_h \sr2, \src, \s_strd, v16, v17, v18, v19 1787*c0909341SAndroid Build Coastguard Worker interleave_1_h v7, v16, v17, v18, v19 1788*c0909341SAndroid Build Coastguard Worker interleave_2_s v5, v6, v7, v16, v17, v18 1789*c0909341SAndroid Build Coastguard Worker uxtl_b v5, v6, v7, v16 1790*c0909341SAndroid Build Coastguard Worker mul_mla_\taps\()_0 v30, v1, v2, v3, v4, v5, v6, v7, v16 1791*c0909341SAndroid Build Coastguard Worker sqrshrun_b 6, v30 1792*c0909341SAndroid Build Coastguard Worker st_h \d_strd, v30, 4 1793*c0909341SAndroid Build Coastguard Worker b.le 0f 1794*c0909341SAndroid Build Coastguard Worker cmp \h, #2 1795*c0909341SAndroid Build Coastguard Worker mov v1.16b, v5.16b 1796*c0909341SAndroid Build Coastguard Worker mov v2.16b, v6.16b 1797*c0909341SAndroid Build Coastguard Worker mov v3.16b, v7.16b 1798*c0909341SAndroid Build Coastguard Worker mov v4.16b, v16.16b 1799*c0909341SAndroid Build Coastguard Worker mov v5.16b, v17.16b 1800*c0909341SAndroid Build Coastguard Worker mov v6.16b, v18.16b 1801*c0909341SAndroid Build Coastguard Worker mov v7.16b, v19.16b 1802*c0909341SAndroid Build Coastguard Worker b.eq 26f 1803*c0909341SAndroid Build Coastguard Worker b 216b 1804*c0909341SAndroid Build Coastguard Worker26: 1805*c0909341SAndroid Build Coastguard Worker load_h \sr2, \src, \s_strd, v16, v17 1806*c0909341SAndroid Build Coastguard Worker interleave_1_h v7, v16, v17 1807*c0909341SAndroid Build Coastguard Worker uxtl_b v5, v6, v7, v16 1808*c0909341SAndroid Build Coastguard Worker mul_mla_\taps\()_0_4h v30, v1, v2, v3, v4, v5, v6, v7, v16 1809*c0909341SAndroid Build Coastguard Worker sqrshrun_b 6, v30 1810*c0909341SAndroid Build Coastguard Worker st_h \d_strd, v30, 2 1811*c0909341SAndroid Build Coastguard Worker0: 1812*c0909341SAndroid Build Coastguard Worker ret 1813*c0909341SAndroid Build Coastguard Worker.endif 1814*c0909341SAndroid Build Coastguard Worker 1815*c0909341SAndroid Build Coastguard Worker40: 1816*c0909341SAndroid Build Coastguard Worker AARCH64_VALID_JUMP_TARGET 1817*c0909341SAndroid Build Coastguard Worker b.gt 480f 1818*c0909341SAndroid Build Coastguard Worker 1819*c0909341SAndroid Build Coastguard Worker // 4x2, 4x4 v 1820*c0909341SAndroid Build Coastguard Worker cmp \h, #2 1821*c0909341SAndroid Build Coastguard Worker ldur s0, [\xmy, #2] 1822*c0909341SAndroid Build Coastguard Worker sub \src, \src, \s_strd 1823*c0909341SAndroid Build Coastguard Worker add \ds2, \dst, \d_strd 1824*c0909341SAndroid Build Coastguard Worker add \sr2, \src, \s_strd 1825*c0909341SAndroid Build Coastguard Worker lsl \s_strd, \s_strd, #1 1826*c0909341SAndroid Build Coastguard Worker lsl \d_strd, \d_strd, #1 1827*c0909341SAndroid Build Coastguard Worker sxtl v0.8h, v0.8b 1828*c0909341SAndroid Build Coastguard Worker 1829*c0909341SAndroid Build Coastguard Worker load_s \src, \sr2, \s_strd, v1, v2, v3, v4, v5 1830*c0909341SAndroid Build Coastguard Worker interleave_1_s v1, v2, v3, v4, v5 1831*c0909341SAndroid Build Coastguard Worker uxtl_b v1, v2, v3, v4 1832*c0909341SAndroid Build Coastguard Worker mul_mla_4tap v6, v1, v2, v3, v4, .8h 1833*c0909341SAndroid Build Coastguard Worker shift_store_4 \type, \d_strd, v6 1834*c0909341SAndroid Build Coastguard Worker b.le 0f 1835*c0909341SAndroid Build Coastguard Worker load_s \sr2, \src, \s_strd, v6, v7 1836*c0909341SAndroid Build Coastguard Worker interleave_1_s v5, v6, v7 1837*c0909341SAndroid Build Coastguard Worker uxtl_b v5, v6 1838*c0909341SAndroid Build Coastguard Worker mul_mla_4tap v7, v3, v4, v5, v6, .8h 1839*c0909341SAndroid Build Coastguard Worker shift_store_4 \type, \d_strd, v7 1840*c0909341SAndroid Build Coastguard Worker0: 1841*c0909341SAndroid Build Coastguard Worker ret 1842*c0909341SAndroid Build Coastguard Worker 1843*c0909341SAndroid Build Coastguard Worker480: // 4x6, 4x8, 4x12, 4x16 v 1844*c0909341SAndroid Build Coastguard Worker ld1 {v0.8b}, [\xmy] 1845*c0909341SAndroid Build Coastguard Worker sub \sr2, \src, \s_strd, lsl #1 1846*c0909341SAndroid Build Coastguard Worker add \ds2, \dst, \d_strd 1847*c0909341SAndroid Build Coastguard Worker sub \src, \sr2, \s_strd 1848*c0909341SAndroid Build Coastguard Worker lsl \s_strd, \s_strd, #1 1849*c0909341SAndroid Build Coastguard Worker lsl \d_strd, \d_strd, #1 1850*c0909341SAndroid Build Coastguard Worker sxtl v0.8h, v0.8b 1851*c0909341SAndroid Build Coastguard Worker 1852*c0909341SAndroid Build Coastguard Worker load_s \src, \sr2, \s_strd, v16, v17, v18, v19, v20, v21, v22 1853*c0909341SAndroid Build Coastguard Worker interleave_1_s v16, v17, v18 1854*c0909341SAndroid Build Coastguard Worker interleave_1_s v18, v19, v20, v21, v22 1855*c0909341SAndroid Build Coastguard Worker uxtl_b v16, v17 1856*c0909341SAndroid Build Coastguard Worker uxtl_b v18, v19, v20, v21 1857*c0909341SAndroid Build Coastguard Worker 1858*c0909341SAndroid Build Coastguard Worker48: 1859*c0909341SAndroid Build Coastguard Worker subs \h, \h, #4 1860*c0909341SAndroid Build Coastguard Worker load_s \sr2, \src, \s_strd, v23, v24, v25, v26 1861*c0909341SAndroid Build Coastguard Worker interleave_1_s v22, v23, v24, v25, v26 1862*c0909341SAndroid Build Coastguard Worker uxtl_b v22, v23, v24, v25 1863*c0909341SAndroid Build Coastguard Worker mul_mla_\taps\()_2 v1, v2, v16, v17, v18, v19, v20, v21, v22, v23, v24, v25 1864*c0909341SAndroid Build Coastguard Worker shift_store_4 \type, \d_strd, v1, v2 1865*c0909341SAndroid Build Coastguard Worker b.le 0f 1866*c0909341SAndroid Build Coastguard Worker load_s \sr2, \src, \s_strd, v27, v16 1867*c0909341SAndroid Build Coastguard Worker subs \h, \h, #2 1868*c0909341SAndroid Build Coastguard Worker interleave_1_s v26, v27, v16 1869*c0909341SAndroid Build Coastguard Worker uxtl_b v26, v27 1870*c0909341SAndroid Build Coastguard Worker mul_mla_\taps\()_0 v1, v20, v21, v22, v23, v24, v25, v26, v27 1871*c0909341SAndroid Build Coastguard Worker shift_store_4 \type, \d_strd, v1 1872*c0909341SAndroid Build Coastguard Worker b.le 0f 1873*c0909341SAndroid Build Coastguard Worker load_s \sr2, \src, \s_strd, v17, v18 1874*c0909341SAndroid Build Coastguard Worker subs \h, \h, #2 1875*c0909341SAndroid Build Coastguard Worker interleave_1_s v16, v17, v18 1876*c0909341SAndroid Build Coastguard Worker uxtl_b v16, v17 1877*c0909341SAndroid Build Coastguard Worker mul_mla_\taps\()_0 v2, v22, v23, v24, v25, v26, v27, v16, v17 1878*c0909341SAndroid Build Coastguard Worker shift_store_4 \type, \d_strd, v2 1879*c0909341SAndroid Build Coastguard Worker b.le 0f 1880*c0909341SAndroid Build Coastguard Worker subs \h, \h, #4 1881*c0909341SAndroid Build Coastguard Worker load_s \sr2, \src, \s_strd, v19, v20, v21, v22 1882*c0909341SAndroid Build Coastguard Worker interleave_1_s v18, v19, v20, v21, v22 1883*c0909341SAndroid Build Coastguard Worker uxtl_b v18, v19, v20, v21 1884*c0909341SAndroid Build Coastguard Worker mul_mla_\taps\()_2 v1, v2, v24, v25, v26, v27, v16, v17, v18, v19, v20, v21 1885*c0909341SAndroid Build Coastguard Worker shift_store_4 \type, \d_strd, v1, v2 1886*c0909341SAndroid Build Coastguard Worker b.gt 48b 1887*c0909341SAndroid Build Coastguard Worker0: 1888*c0909341SAndroid Build Coastguard Worker ret 1889*c0909341SAndroid Build Coastguard Worker 1890*c0909341SAndroid Build Coastguard Worker80: 1891*c0909341SAndroid Build Coastguard Worker AARCH64_VALID_JUMP_TARGET 1892*c0909341SAndroid Build Coastguard Worker b.gt 880f 1893*c0909341SAndroid Build Coastguard Worker 1894*c0909341SAndroid Build Coastguard Worker // 8x2, 8x4 v 1895*c0909341SAndroid Build Coastguard Worker cmp \h, #2 1896*c0909341SAndroid Build Coastguard Worker ldur s0, [\xmy, #2] 1897*c0909341SAndroid Build Coastguard Worker sub \src, \src, \s_strd 1898*c0909341SAndroid Build Coastguard Worker add \ds2, \dst, \d_strd 1899*c0909341SAndroid Build Coastguard Worker add \sr2, \src, \s_strd 1900*c0909341SAndroid Build Coastguard Worker lsl \s_strd, \s_strd, #1 1901*c0909341SAndroid Build Coastguard Worker lsl \d_strd, \d_strd, #1 1902*c0909341SAndroid Build Coastguard Worker sxtl v0.8h, v0.8b 1903*c0909341SAndroid Build Coastguard Worker 1904*c0909341SAndroid Build Coastguard Worker load_8b \src, \sr2, \s_strd, v1, v2, v3, v4, v5 1905*c0909341SAndroid Build Coastguard Worker uxtl_b v1, v2, v3, v4, v5 1906*c0909341SAndroid Build Coastguard Worker mul_mla_4tap v6, v1, v2, v3, v4, .8h 1907*c0909341SAndroid Build Coastguard Worker mul_mla_4tap v7, v2, v3, v4, v5, .8h 1908*c0909341SAndroid Build Coastguard Worker shift_store_8 \type, \d_strd, v6, v7 1909*c0909341SAndroid Build Coastguard Worker b.le 0f 1910*c0909341SAndroid Build Coastguard Worker load_8b \sr2, \src, \s_strd, v6, v7 1911*c0909341SAndroid Build Coastguard Worker uxtl_b v6, v7 1912*c0909341SAndroid Build Coastguard Worker mul_mla_4tap v1, v3, v4, v5, v6, .8h 1913*c0909341SAndroid Build Coastguard Worker mul_mla_4tap v2, v4, v5, v6, v7, .8h 1914*c0909341SAndroid Build Coastguard Worker shift_store_8 \type, \d_strd, v1, v2 1915*c0909341SAndroid Build Coastguard Worker0: 1916*c0909341SAndroid Build Coastguard Worker ret 1917*c0909341SAndroid Build Coastguard Worker 1918*c0909341SAndroid Build Coastguard Worker880: // 8x6, 8x8, 8x16, 8x32 v 1919*c0909341SAndroid Build Coastguard Worker1680: // 16x8, 16x16, ... 1920*c0909341SAndroid Build Coastguard Worker320: // 32x8, 32x16, ... 1921*c0909341SAndroid Build Coastguard Worker640: 1922*c0909341SAndroid Build Coastguard Worker1280: 1923*c0909341SAndroid Build Coastguard Worker AARCH64_VALID_JUMP_TARGET 1924*c0909341SAndroid Build Coastguard Worker ld1 {v0.8b}, [\xmy] 1925*c0909341SAndroid Build Coastguard Worker sub \src, \src, \s_strd 1926*c0909341SAndroid Build Coastguard Worker sub \src, \src, \s_strd, lsl #1 1927*c0909341SAndroid Build Coastguard Worker sxtl v0.8h, v0.8b 1928*c0909341SAndroid Build Coastguard Worker mov \my, \h 1929*c0909341SAndroid Build Coastguard Worker168: 1930*c0909341SAndroid Build Coastguard Worker add \ds2, \dst, \d_strd 1931*c0909341SAndroid Build Coastguard Worker add \sr2, \src, \s_strd 1932*c0909341SAndroid Build Coastguard Worker lsl \s_strd, \s_strd, #1 1933*c0909341SAndroid Build Coastguard Worker lsl \d_strd, \d_strd, #1 1934*c0909341SAndroid Build Coastguard Worker 1935*c0909341SAndroid Build Coastguard Worker load_8b \src, \sr2, \s_strd, v16, v17, v18, v19, v20, v21, v22 1936*c0909341SAndroid Build Coastguard Worker uxtl_b v16, v17, v18, v19, v20, v21, v22 1937*c0909341SAndroid Build Coastguard Worker 1938*c0909341SAndroid Build Coastguard Worker88: 1939*c0909341SAndroid Build Coastguard Worker subs \h, \h, #2 1940*c0909341SAndroid Build Coastguard Worker load_8b \sr2, \src, \s_strd, v23, v24 1941*c0909341SAndroid Build Coastguard Worker uxtl_b v23, v24 1942*c0909341SAndroid Build Coastguard Worker mul_mla_\taps\()_1 v1, v2, v16, v17, v18, v19, v20, v21, v22, v23, v24 1943*c0909341SAndroid Build Coastguard Worker shift_store_8 \type, \d_strd, v1, v2 1944*c0909341SAndroid Build Coastguard Worker b.le 9f 1945*c0909341SAndroid Build Coastguard Worker subs \h, \h, #2 1946*c0909341SAndroid Build Coastguard Worker load_8b \sr2, \src, \s_strd, v25, v26 1947*c0909341SAndroid Build Coastguard Worker uxtl_b v25, v26 1948*c0909341SAndroid Build Coastguard Worker mul_mla_\taps\()_1 v3, v4, v18, v19, v20, v21, v22, v23, v24, v25, v26 1949*c0909341SAndroid Build Coastguard Worker shift_store_8 \type, \d_strd, v3, v4 1950*c0909341SAndroid Build Coastguard Worker b.le 9f 1951*c0909341SAndroid Build Coastguard Worker subs \h, \h, #2 1952*c0909341SAndroid Build Coastguard Worker load_8b \sr2, \src, \s_strd, v27, v16 1953*c0909341SAndroid Build Coastguard Worker uxtl_b v27, v16 1954*c0909341SAndroid Build Coastguard Worker mul_mla_\taps\()_1 v1, v2, v20, v21, v22, v23, v24, v25, v26, v27, v16 1955*c0909341SAndroid Build Coastguard Worker shift_store_8 \type, \d_strd, v1, v2 1956*c0909341SAndroid Build Coastguard Worker b.le 9f 1957*c0909341SAndroid Build Coastguard Worker subs \h, \h, #2 1958*c0909341SAndroid Build Coastguard Worker load_8b \sr2, \src, \s_strd, v17, v18 1959*c0909341SAndroid Build Coastguard Worker uxtl_b v17, v18 1960*c0909341SAndroid Build Coastguard Worker mul_mla_\taps\()_1 v3, v4, v22, v23, v24, v25, v26, v27, v16, v17, v18 1961*c0909341SAndroid Build Coastguard Worker shift_store_8 \type, \d_strd, v3, v4 1962*c0909341SAndroid Build Coastguard Worker b.le 9f 1963*c0909341SAndroid Build Coastguard Worker subs \h, \h, #4 1964*c0909341SAndroid Build Coastguard Worker load_8b \sr2, \src, \s_strd, v19, v20, v21, v22 1965*c0909341SAndroid Build Coastguard Worker uxtl_b v19, v20, v21, v22 1966*c0909341SAndroid Build Coastguard Worker mul_mla_\taps\()_1 v1, v2, v24, v25, v26, v27, v16, v17, v18, v19, v20 1967*c0909341SAndroid Build Coastguard Worker mul_mla_\taps\()_1 v3, v4, v26, v27, v16, v17, v18, v19, v20, v21, v22 1968*c0909341SAndroid Build Coastguard Worker shift_store_8 \type, \d_strd, v1, v2, v3, v4 1969*c0909341SAndroid Build Coastguard Worker b.gt 88b 1970*c0909341SAndroid Build Coastguard Worker9: 1971*c0909341SAndroid Build Coastguard Worker subs \w, \w, #8 1972*c0909341SAndroid Build Coastguard Worker b.le 0f 1973*c0909341SAndroid Build Coastguard Worker asr \s_strd, \s_strd, #1 1974*c0909341SAndroid Build Coastguard Worker asr \d_strd, \d_strd, #1 1975*c0909341SAndroid Build Coastguard Worker msub \src, \s_strd, \xmy, \src 1976*c0909341SAndroid Build Coastguard Worker msub \dst, \d_strd, \xmy, \dst 1977*c0909341SAndroid Build Coastguard Worker sub \src, \src, \s_strd, lsl #3 1978*c0909341SAndroid Build Coastguard Worker mov \h, \my 1979*c0909341SAndroid Build Coastguard Worker add \src, \src, #8 1980*c0909341SAndroid Build Coastguard Worker.ifc \type, put 1981*c0909341SAndroid Build Coastguard Worker add \dst, \dst, #8 1982*c0909341SAndroid Build Coastguard Worker.else 1983*c0909341SAndroid Build Coastguard Worker add \dst, \dst, #16 1984*c0909341SAndroid Build Coastguard Worker.endif 1985*c0909341SAndroid Build Coastguard Worker b 168b 1986*c0909341SAndroid Build Coastguard Worker0: 1987*c0909341SAndroid Build Coastguard Worker ret 1988*c0909341SAndroid Build Coastguard Worker 1989*c0909341SAndroid Build Coastguard Worker160: 1990*c0909341SAndroid Build Coastguard Worker AARCH64_VALID_JUMP_TARGET 1991*c0909341SAndroid Build Coastguard Worker b.gt 1680b 1992*c0909341SAndroid Build Coastguard Worker 1993*c0909341SAndroid Build Coastguard Worker // 16x2, 16x4 v 1994*c0909341SAndroid Build Coastguard Worker ldur s0, [\xmy, #2] 1995*c0909341SAndroid Build Coastguard Worker sub \src, \src, \s_strd 1996*c0909341SAndroid Build Coastguard Worker add \ds2, \dst, \d_strd 1997*c0909341SAndroid Build Coastguard Worker add \sr2, \src, \s_strd 1998*c0909341SAndroid Build Coastguard Worker lsl \s_strd, \s_strd, #1 1999*c0909341SAndroid Build Coastguard Worker lsl \d_strd, \d_strd, #1 2000*c0909341SAndroid Build Coastguard Worker sxtl v0.8h, v0.8b 2001*c0909341SAndroid Build Coastguard Worker 2002*c0909341SAndroid Build Coastguard Worker cmp \h, #2 2003*c0909341SAndroid Build Coastguard Worker load_16b \src, \sr2, \s_strd, v1, v2, v3, v4, v5 2004*c0909341SAndroid Build Coastguard Worker uxtl v16.8h, v1.8b 2005*c0909341SAndroid Build Coastguard Worker uxtl v17.8h, v2.8b 2006*c0909341SAndroid Build Coastguard Worker uxtl v18.8h, v3.8b 2007*c0909341SAndroid Build Coastguard Worker uxtl v19.8h, v4.8b 2008*c0909341SAndroid Build Coastguard Worker uxtl v20.8h, v5.8b 2009*c0909341SAndroid Build Coastguard Worker uxtl2 v23.8h, v1.16b 2010*c0909341SAndroid Build Coastguard Worker uxtl2 v24.8h, v2.16b 2011*c0909341SAndroid Build Coastguard Worker uxtl2 v25.8h, v3.16b 2012*c0909341SAndroid Build Coastguard Worker uxtl2 v26.8h, v4.16b 2013*c0909341SAndroid Build Coastguard Worker uxtl2 v27.8h, v5.16b 2014*c0909341SAndroid Build Coastguard Worker mul_mla_4tap v1, v16, v17, v18, v19, .8h 2015*c0909341SAndroid Build Coastguard Worker mul_mla_4tap v16, v17, v18, v19, v20, .8h 2016*c0909341SAndroid Build Coastguard Worker mul_mla_4tap v2, v23, v24, v25, v26, .8h 2017*c0909341SAndroid Build Coastguard Worker mul_mla_4tap v17, v24, v25, v26, v27, .8h 2018*c0909341SAndroid Build Coastguard Worker shift_store_16 \type, \d_strd, v1, v2, v16, v17 2019*c0909341SAndroid Build Coastguard Worker b.le 0f 2020*c0909341SAndroid Build Coastguard Worker load_16b \sr2, \src, \s_strd, v6, v7 2021*c0909341SAndroid Build Coastguard Worker uxtl v21.8h, v6.8b 2022*c0909341SAndroid Build Coastguard Worker uxtl v22.8h, v7.8b 2023*c0909341SAndroid Build Coastguard Worker uxtl2 v28.8h, v6.16b 2024*c0909341SAndroid Build Coastguard Worker uxtl2 v29.8h, v7.16b 2025*c0909341SAndroid Build Coastguard Worker mul_mla_4tap v1, v18, v19, v20, v21, .8h 2026*c0909341SAndroid Build Coastguard Worker mul_mla_4tap v3, v19, v20, v21, v22, .8h 2027*c0909341SAndroid Build Coastguard Worker mul_mla_4tap v2, v25, v26, v27, v28, .8h 2028*c0909341SAndroid Build Coastguard Worker mul_mla_4tap v4, v26, v27, v28, v29, .8h 2029*c0909341SAndroid Build Coastguard Worker shift_store_16 \type, \d_strd, v1, v2, v3, v4 2030*c0909341SAndroid Build Coastguard Worker0: 2031*c0909341SAndroid Build Coastguard Worker ret 2032*c0909341SAndroid Build Coastguard Workerendfunc 2033*c0909341SAndroid Build Coastguard Worker 2034*c0909341SAndroid Build Coastguard Workerjumptable \type\()_\taps\()_v_tbl 2035*c0909341SAndroid Build Coastguard Worker .word 1280b - \type\()_\taps\()_v_tbl 2036*c0909341SAndroid Build Coastguard Worker .word 640b - \type\()_\taps\()_v_tbl 2037*c0909341SAndroid Build Coastguard Worker .word 320b - \type\()_\taps\()_v_tbl 2038*c0909341SAndroid Build Coastguard Worker .word 160b - \type\()_\taps\()_v_tbl 2039*c0909341SAndroid Build Coastguard Worker .word 80b - \type\()_\taps\()_v_tbl 2040*c0909341SAndroid Build Coastguard Worker .word 40b - \type\()_\taps\()_v_tbl 2041*c0909341SAndroid Build Coastguard Worker .word 20b - \type\()_\taps\()_v_tbl 2042*c0909341SAndroid Build Coastguard Workerendjumptable 2043*c0909341SAndroid Build Coastguard Worker 2044*c0909341SAndroid Build Coastguard Workerfunction L(\type\()_\taps\()_hv) 2045*c0909341SAndroid Build Coastguard Worker cmp \h, #4 2046*c0909341SAndroid Build Coastguard Worker ubfx w9, \my, #7, #7 2047*c0909341SAndroid Build Coastguard Worker and \my, \my, #0x7f 2048*c0909341SAndroid Build Coastguard Worker b.le 4f 2049*c0909341SAndroid Build Coastguard Worker mov \my, w9 2050*c0909341SAndroid Build Coastguard Worker4: 2051*c0909341SAndroid Build Coastguard Worker add \xmy, x10, \my, uxtw #3 2052*c0909341SAndroid Build Coastguard Worker 2053*c0909341SAndroid Build Coastguard Worker movrel x9, \type\()_\taps\()_hv_tbl 2054*c0909341SAndroid Build Coastguard Worker ldrsw x8, [x9, x8, lsl #2] 2055*c0909341SAndroid Build Coastguard Worker add x9, x9, x8 2056*c0909341SAndroid Build Coastguard Worker br x9 2057*c0909341SAndroid Build Coastguard Worker 2058*c0909341SAndroid Build Coastguard Worker20: 2059*c0909341SAndroid Build Coastguard Worker AARCH64_VALID_JUMP_TARGET 2060*c0909341SAndroid Build Coastguard Worker.ifc \type, put 2061*c0909341SAndroid Build Coastguard Worker ldur s0, [\xmx, #2] 2062*c0909341SAndroid Build Coastguard Worker b.gt 280f 2063*c0909341SAndroid Build Coastguard Worker ldur s1, [\xmy, #2] 2064*c0909341SAndroid Build Coastguard Worker 2065*c0909341SAndroid Build Coastguard Worker // 2x2, 2x4 hv 2066*c0909341SAndroid Build Coastguard Worker sub \sr2, \src, #1 2067*c0909341SAndroid Build Coastguard Worker sub \src, \sr2, \s_strd 2068*c0909341SAndroid Build Coastguard Worker add \ds2, \dst, \d_strd 2069*c0909341SAndroid Build Coastguard Worker lsl \s_strd, \s_strd, #1 2070*c0909341SAndroid Build Coastguard Worker lsl \d_strd, \d_strd, #1 2071*c0909341SAndroid Build Coastguard Worker sxtl v0.8h, v0.8b 2072*c0909341SAndroid Build Coastguard Worker sxtl v1.8h, v1.8b 2073*c0909341SAndroid Build Coastguard Worker mov x15, x30 2074*c0909341SAndroid Build Coastguard Worker 2075*c0909341SAndroid Build Coastguard Worker ld1 {v28.8b}, [\src], \s_strd 2076*c0909341SAndroid Build Coastguard Worker uxtl v28.8h, v28.8b 2077*c0909341SAndroid Build Coastguard Worker ext v29.16b, v28.16b, v28.16b, #2 2078*c0909341SAndroid Build Coastguard Worker mul v28.4h, v28.4h, v0.4h 2079*c0909341SAndroid Build Coastguard Worker mul v29.4h, v29.4h, v0.4h 2080*c0909341SAndroid Build Coastguard Worker addp v28.4h, v28.4h, v29.4h 2081*c0909341SAndroid Build Coastguard Worker addp v16.4h, v28.4h, v28.4h 2082*c0909341SAndroid Build Coastguard Worker srshr v16.4h, v16.4h, #2 2083*c0909341SAndroid Build Coastguard Worker bl L(\type\()_\taps\()_filter_2) 2084*c0909341SAndroid Build Coastguard Worker 2085*c0909341SAndroid Build Coastguard Worker trn1 v16.2s, v16.2s, v28.2s 2086*c0909341SAndroid Build Coastguard Worker mov v17.8b, v28.8b 2087*c0909341SAndroid Build Coastguard Worker 2088*c0909341SAndroid Build Coastguard Worker2: 2089*c0909341SAndroid Build Coastguard Worker bl L(\type\()_\taps\()_filter_2) 2090*c0909341SAndroid Build Coastguard Worker 2091*c0909341SAndroid Build Coastguard Worker ext v18.8b, v17.8b, v28.8b, #4 2092*c0909341SAndroid Build Coastguard Worker smull v2.4s, v16.4h, v1.h[0] 2093*c0909341SAndroid Build Coastguard Worker smlal v2.4s, v17.4h, v1.h[1] 2094*c0909341SAndroid Build Coastguard Worker smlal v2.4s, v18.4h, v1.h[2] 2095*c0909341SAndroid Build Coastguard Worker smlal v2.4s, v28.4h, v1.h[3] 2096*c0909341SAndroid Build Coastguard Worker 2097*c0909341SAndroid Build Coastguard Worker sqrshrn v2.4h, v2.4s, #\shift_hv 2098*c0909341SAndroid Build Coastguard Worker sqxtun v2.8b, v2.8h 2099*c0909341SAndroid Build Coastguard Worker subs \h, \h, #2 2100*c0909341SAndroid Build Coastguard Worker st1 {v2.h}[0], [\dst], \d_strd 2101*c0909341SAndroid Build Coastguard Worker st1 {v2.h}[1], [\ds2], \d_strd 2102*c0909341SAndroid Build Coastguard Worker b.le 0f 2103*c0909341SAndroid Build Coastguard Worker mov v16.8b, v18.8b 2104*c0909341SAndroid Build Coastguard Worker mov v17.8b, v28.8b 2105*c0909341SAndroid Build Coastguard Worker b 2b 2106*c0909341SAndroid Build Coastguard Worker 2107*c0909341SAndroid Build Coastguard Worker280: // 2x8, 2x16, 2x32 hv 2108*c0909341SAndroid Build Coastguard Worker ld1 {v1.8b}, [\xmy] 2109*c0909341SAndroid Build Coastguard Worker sub \src, \src, #1 2110*c0909341SAndroid Build Coastguard Worker sub \sr2, \src, \s_strd, lsl #1 2111*c0909341SAndroid Build Coastguard Worker sub \src, \sr2, \s_strd 2112*c0909341SAndroid Build Coastguard Worker add \ds2, \dst, \d_strd 2113*c0909341SAndroid Build Coastguard Worker lsl \s_strd, \s_strd, #1 2114*c0909341SAndroid Build Coastguard Worker lsl \d_strd, \d_strd, #1 2115*c0909341SAndroid Build Coastguard Worker sxtl v0.8h, v0.8b 2116*c0909341SAndroid Build Coastguard Worker sxtl v1.8h, v1.8b 2117*c0909341SAndroid Build Coastguard Worker mov x15, x30 2118*c0909341SAndroid Build Coastguard Worker 2119*c0909341SAndroid Build Coastguard Worker ld1 {v28.8b}, [\src], \s_strd 2120*c0909341SAndroid Build Coastguard Worker uxtl v28.8h, v28.8b 2121*c0909341SAndroid Build Coastguard Worker ext v29.16b, v28.16b, v28.16b, #2 2122*c0909341SAndroid Build Coastguard Worker mul v28.4h, v28.4h, v0.4h 2123*c0909341SAndroid Build Coastguard Worker mul v29.4h, v29.4h, v0.4h 2124*c0909341SAndroid Build Coastguard Worker addp v28.4h, v28.4h, v29.4h 2125*c0909341SAndroid Build Coastguard Worker addp v16.4h, v28.4h, v28.4h 2126*c0909341SAndroid Build Coastguard Worker srshr v16.4h, v16.4h, #2 2127*c0909341SAndroid Build Coastguard Worker 2128*c0909341SAndroid Build Coastguard Worker bl L(\type\()_\taps\()_filter_2) 2129*c0909341SAndroid Build Coastguard Worker trn1 v16.2s, v16.2s, v28.2s 2130*c0909341SAndroid Build Coastguard Worker mov v17.8b, v28.8b 2131*c0909341SAndroid Build Coastguard Worker bl L(\type\()_\taps\()_filter_2) 2132*c0909341SAndroid Build Coastguard Worker ext v18.8b, v17.8b, v28.8b, #4 2133*c0909341SAndroid Build Coastguard Worker mov v19.8b, v28.8b 2134*c0909341SAndroid Build Coastguard Worker bl L(\type\()_\taps\()_filter_2) 2135*c0909341SAndroid Build Coastguard Worker ext v20.8b, v19.8b, v28.8b, #4 2136*c0909341SAndroid Build Coastguard Worker mov v21.8b, v28.8b 2137*c0909341SAndroid Build Coastguard Worker 2138*c0909341SAndroid Build Coastguard Worker28: 2139*c0909341SAndroid Build Coastguard Worker bl L(\type\()_\taps\()_filter_2) 2140*c0909341SAndroid Build Coastguard Worker ext v22.8b, v21.8b, v28.8b, #4 2141*c0909341SAndroid Build Coastguard Worker.ifc \taps, 6tap 2142*c0909341SAndroid Build Coastguard Worker smull v2.4s, v17.4h, v1.h[1] 2143*c0909341SAndroid Build Coastguard Worker smlal v2.4s, v18.4h, v1.h[2] 2144*c0909341SAndroid Build Coastguard Worker smlal v2.4s, v19.4h, v1.h[3] 2145*c0909341SAndroid Build Coastguard Worker smlal v2.4s, v20.4h, v1.h[4] 2146*c0909341SAndroid Build Coastguard Worker smlal v2.4s, v21.4h, v1.h[5] 2147*c0909341SAndroid Build Coastguard Worker smlal v2.4s, v22.4h, v1.h[6] 2148*c0909341SAndroid Build Coastguard Worker.else // 8tap 2149*c0909341SAndroid Build Coastguard Worker smull v2.4s, v16.4h, v1.h[0] 2150*c0909341SAndroid Build Coastguard Worker smlal v2.4s, v17.4h, v1.h[1] 2151*c0909341SAndroid Build Coastguard Worker smlal v2.4s, v18.4h, v1.h[2] 2152*c0909341SAndroid Build Coastguard Worker smlal v2.4s, v19.4h, v1.h[3] 2153*c0909341SAndroid Build Coastguard Worker smlal v2.4s, v20.4h, v1.h[4] 2154*c0909341SAndroid Build Coastguard Worker smlal v2.4s, v21.4h, v1.h[5] 2155*c0909341SAndroid Build Coastguard Worker smlal v2.4s, v22.4h, v1.h[6] 2156*c0909341SAndroid Build Coastguard Worker smlal v2.4s, v28.4h, v1.h[7] 2157*c0909341SAndroid Build Coastguard Worker.endif 2158*c0909341SAndroid Build Coastguard Worker 2159*c0909341SAndroid Build Coastguard Worker sqrshrn v2.4h, v2.4s, #\shift_hv 2160*c0909341SAndroid Build Coastguard Worker sqxtun v2.8b, v2.8h 2161*c0909341SAndroid Build Coastguard Worker subs \h, \h, #2 2162*c0909341SAndroid Build Coastguard Worker st1 {v2.h}[0], [\dst], \d_strd 2163*c0909341SAndroid Build Coastguard Worker st1 {v2.h}[1], [\ds2], \d_strd 2164*c0909341SAndroid Build Coastguard Worker b.le 0f 2165*c0909341SAndroid Build Coastguard Worker mov v16.8b, v18.8b 2166*c0909341SAndroid Build Coastguard Worker mov v17.8b, v19.8b 2167*c0909341SAndroid Build Coastguard Worker mov v18.8b, v20.8b 2168*c0909341SAndroid Build Coastguard Worker mov v19.8b, v21.8b 2169*c0909341SAndroid Build Coastguard Worker mov v20.8b, v22.8b 2170*c0909341SAndroid Build Coastguard Worker mov v21.8b, v28.8b 2171*c0909341SAndroid Build Coastguard Worker b 28b 2172*c0909341SAndroid Build Coastguard Worker 2173*c0909341SAndroid Build Coastguard Worker0: 2174*c0909341SAndroid Build Coastguard Worker ret x15 2175*c0909341SAndroid Build Coastguard Worker 2176*c0909341SAndroid Build Coastguard WorkerL(\type\()_\taps\()_filter_2): 2177*c0909341SAndroid Build Coastguard Worker ld1 {v28.8b}, [\sr2], \s_strd 2178*c0909341SAndroid Build Coastguard Worker ld1 {v30.8b}, [\src], \s_strd 2179*c0909341SAndroid Build Coastguard Worker uxtl v28.8h, v28.8b 2180*c0909341SAndroid Build Coastguard Worker uxtl v30.8h, v30.8b 2181*c0909341SAndroid Build Coastguard Worker ext v29.16b, v28.16b, v28.16b, #2 2182*c0909341SAndroid Build Coastguard Worker ext v31.16b, v30.16b, v30.16b, #2 2183*c0909341SAndroid Build Coastguard Worker trn1 v27.2s, v28.2s, v30.2s 2184*c0909341SAndroid Build Coastguard Worker trn2 v30.2s, v28.2s, v30.2s 2185*c0909341SAndroid Build Coastguard Worker trn1 v28.2s, v29.2s, v31.2s 2186*c0909341SAndroid Build Coastguard Worker trn2 v31.2s, v29.2s, v31.2s 2187*c0909341SAndroid Build Coastguard Worker mul v27.4h, v27.4h, v0.h[0] 2188*c0909341SAndroid Build Coastguard Worker mla v27.4h, v28.4h, v0.h[1] 2189*c0909341SAndroid Build Coastguard Worker mla v27.4h, v30.4h, v0.h[2] 2190*c0909341SAndroid Build Coastguard Worker mla v27.4h, v31.4h, v0.h[3] 2191*c0909341SAndroid Build Coastguard Worker srshr v28.4h, v27.4h, #2 2192*c0909341SAndroid Build Coastguard Worker ret 2193*c0909341SAndroid Build Coastguard Worker.endif 2194*c0909341SAndroid Build Coastguard Worker 2195*c0909341SAndroid Build Coastguard Worker40: 2196*c0909341SAndroid Build Coastguard Worker AARCH64_VALID_JUMP_TARGET 2197*c0909341SAndroid Build Coastguard Worker ldur s0, [\xmx, #2] 2198*c0909341SAndroid Build Coastguard Worker b.gt 480f 2199*c0909341SAndroid Build Coastguard Worker ldur s1, [\xmy, #2] 2200*c0909341SAndroid Build Coastguard Worker sub \sr2, \src, #1 2201*c0909341SAndroid Build Coastguard Worker sub \src, \sr2, \s_strd 2202*c0909341SAndroid Build Coastguard Worker add \ds2, \dst, \d_strd 2203*c0909341SAndroid Build Coastguard Worker lsl \s_strd, \s_strd, #1 2204*c0909341SAndroid Build Coastguard Worker lsl \d_strd, \d_strd, #1 2205*c0909341SAndroid Build Coastguard Worker sxtl v0.8h, v0.8b 2206*c0909341SAndroid Build Coastguard Worker sxtl v1.8h, v1.8b 2207*c0909341SAndroid Build Coastguard Worker mov x15, x30 2208*c0909341SAndroid Build Coastguard Worker 2209*c0909341SAndroid Build Coastguard Worker // 4x2, 4x4 hv 2210*c0909341SAndroid Build Coastguard Worker ld1 {v26.8b}, [\src], \s_strd 2211*c0909341SAndroid Build Coastguard Worker uxtl v26.8h, v26.8b 2212*c0909341SAndroid Build Coastguard Worker ext v28.16b, v26.16b, v26.16b, #2 2213*c0909341SAndroid Build Coastguard Worker ext v29.16b, v26.16b, v26.16b, #4 2214*c0909341SAndroid Build Coastguard Worker ext v30.16b, v26.16b, v26.16b, #6 2215*c0909341SAndroid Build Coastguard Worker mul v31.4h, v26.4h, v0.h[0] 2216*c0909341SAndroid Build Coastguard Worker mla v31.4h, v28.4h, v0.h[1] 2217*c0909341SAndroid Build Coastguard Worker mla v31.4h, v29.4h, v0.h[2] 2218*c0909341SAndroid Build Coastguard Worker mla v31.4h, v30.4h, v0.h[3] 2219*c0909341SAndroid Build Coastguard Worker srshr v16.4h, v31.4h, #2 2220*c0909341SAndroid Build Coastguard Worker 2221*c0909341SAndroid Build Coastguard Worker bl L(\type\()_\taps\()_filter_4) 2222*c0909341SAndroid Build Coastguard Worker mov v17.8b, v28.8b 2223*c0909341SAndroid Build Coastguard Worker mov v18.8b, v29.8b 2224*c0909341SAndroid Build Coastguard Worker 2225*c0909341SAndroid Build Coastguard Worker4: 2226*c0909341SAndroid Build Coastguard Worker bl L(\type\()_\taps\()_filter_4) 2227*c0909341SAndroid Build Coastguard Worker // Interleaving the mul/mla chains actually hurts performance 2228*c0909341SAndroid Build Coastguard Worker // significantly on Cortex A53, thus keeping mul/mla tightly 2229*c0909341SAndroid Build Coastguard Worker // chained like this. 2230*c0909341SAndroid Build Coastguard Worker smull v2.4s, v16.4h, v1.h[0] 2231*c0909341SAndroid Build Coastguard Worker smlal v2.4s, v17.4h, v1.h[1] 2232*c0909341SAndroid Build Coastguard Worker smlal v2.4s, v18.4h, v1.h[2] 2233*c0909341SAndroid Build Coastguard Worker smlal v2.4s, v28.4h, v1.h[3] 2234*c0909341SAndroid Build Coastguard Worker smull v3.4s, v17.4h, v1.h[0] 2235*c0909341SAndroid Build Coastguard Worker smlal v3.4s, v18.4h, v1.h[1] 2236*c0909341SAndroid Build Coastguard Worker smlal v3.4s, v28.4h, v1.h[2] 2237*c0909341SAndroid Build Coastguard Worker smlal v3.4s, v29.4h, v1.h[3] 2238*c0909341SAndroid Build Coastguard Worker sqrshrn v2.4h, v2.4s, #\shift_hv 2239*c0909341SAndroid Build Coastguard Worker sqrshrn v3.4h, v3.4s, #\shift_hv 2240*c0909341SAndroid Build Coastguard Worker subs \h, \h, #2 2241*c0909341SAndroid Build Coastguard Worker.ifc \type, put 2242*c0909341SAndroid Build Coastguard Worker sqxtun v2.8b, v2.8h 2243*c0909341SAndroid Build Coastguard Worker sqxtun v3.8b, v3.8h 2244*c0909341SAndroid Build Coastguard Worker str s2, [\dst] 2245*c0909341SAndroid Build Coastguard Worker str s3, [\ds2] 2246*c0909341SAndroid Build Coastguard Worker add \dst, \dst, \d_strd 2247*c0909341SAndroid Build Coastguard Worker add \ds2, \ds2, \d_strd 2248*c0909341SAndroid Build Coastguard Worker.else 2249*c0909341SAndroid Build Coastguard Worker st1 {v2.4h}, [\dst], \d_strd 2250*c0909341SAndroid Build Coastguard Worker st1 {v3.4h}, [\ds2], \d_strd 2251*c0909341SAndroid Build Coastguard Worker.endif 2252*c0909341SAndroid Build Coastguard Worker b.le 0f 2253*c0909341SAndroid Build Coastguard Worker mov v16.8b, v18.8b 2254*c0909341SAndroid Build Coastguard Worker mov v17.8b, v28.8b 2255*c0909341SAndroid Build Coastguard Worker mov v18.8b, v29.8b 2256*c0909341SAndroid Build Coastguard Worker b 4b 2257*c0909341SAndroid Build Coastguard Worker 2258*c0909341SAndroid Build Coastguard Worker480: // 4x8, 4x16, 4x32 hv 2259*c0909341SAndroid Build Coastguard Worker ld1 {v1.8b}, [\xmy] 2260*c0909341SAndroid Build Coastguard Worker sub \src, \src, #1 2261*c0909341SAndroid Build Coastguard Worker.ifc \taps, 6tap 2262*c0909341SAndroid Build Coastguard Worker sub \sr2, \src, \s_strd 2263*c0909341SAndroid Build Coastguard Worker sub \src, \src, \s_strd, lsl #1 2264*c0909341SAndroid Build Coastguard Worker.else 2265*c0909341SAndroid Build Coastguard Worker sub \sr2, \src, \s_strd, lsl #1 2266*c0909341SAndroid Build Coastguard Worker sub \src, \sr2, \s_strd 2267*c0909341SAndroid Build Coastguard Worker.endif 2268*c0909341SAndroid Build Coastguard Worker add \ds2, \dst, \d_strd 2269*c0909341SAndroid Build Coastguard Worker lsl \s_strd, \s_strd, #1 2270*c0909341SAndroid Build Coastguard Worker lsl \d_strd, \d_strd, #1 2271*c0909341SAndroid Build Coastguard Worker sxtl v0.8h, v0.8b 2272*c0909341SAndroid Build Coastguard Worker sxtl v1.8h, v1.8b 2273*c0909341SAndroid Build Coastguard Worker mov x15, x30 2274*c0909341SAndroid Build Coastguard Worker 2275*c0909341SAndroid Build Coastguard Worker ld1 {v26.8b}, [\src], \s_strd 2276*c0909341SAndroid Build Coastguard Worker uxtl v26.8h, v26.8b 2277*c0909341SAndroid Build Coastguard Worker ext v28.16b, v26.16b, v26.16b, #2 2278*c0909341SAndroid Build Coastguard Worker ext v29.16b, v26.16b, v26.16b, #4 2279*c0909341SAndroid Build Coastguard Worker ext v30.16b, v26.16b, v26.16b, #6 2280*c0909341SAndroid Build Coastguard Worker mul v31.4h, v26.4h, v0.h[0] 2281*c0909341SAndroid Build Coastguard Worker mla v31.4h, v28.4h, v0.h[1] 2282*c0909341SAndroid Build Coastguard Worker mla v31.4h, v29.4h, v0.h[2] 2283*c0909341SAndroid Build Coastguard Worker mla v31.4h, v30.4h, v0.h[3] 2284*c0909341SAndroid Build Coastguard Worker.ifc \taps, 6tap 2285*c0909341SAndroid Build Coastguard Worker srshr v18.4h, v31.4h, #2 2286*c0909341SAndroid Build Coastguard Worker.else 2287*c0909341SAndroid Build Coastguard Worker srshr v16.4h, v31.4h, #2 2288*c0909341SAndroid Build Coastguard Worker 2289*c0909341SAndroid Build Coastguard Worker bl L(\type\()_\taps\()_filter_4) 2290*c0909341SAndroid Build Coastguard Worker mov v17.8b, v28.8b 2291*c0909341SAndroid Build Coastguard Worker mov v18.8b, v29.8b 2292*c0909341SAndroid Build Coastguard Worker.endif 2293*c0909341SAndroid Build Coastguard Worker bl L(\type\()_\taps\()_filter_4) 2294*c0909341SAndroid Build Coastguard Worker mov v19.8b, v28.8b 2295*c0909341SAndroid Build Coastguard Worker mov v20.8b, v29.8b 2296*c0909341SAndroid Build Coastguard Worker bl L(\type\()_\taps\()_filter_4) 2297*c0909341SAndroid Build Coastguard Worker mov v21.8b, v28.8b 2298*c0909341SAndroid Build Coastguard Worker mov v22.8b, v29.8b 2299*c0909341SAndroid Build Coastguard Worker 2300*c0909341SAndroid Build Coastguard Worker48: 2301*c0909341SAndroid Build Coastguard Worker bl L(\type\()_\taps\()_filter_4) 2302*c0909341SAndroid Build Coastguard Worker.ifc \taps, 6tap 2303*c0909341SAndroid Build Coastguard Worker smull v2.4s, v18.4h, v1.h[1] 2304*c0909341SAndroid Build Coastguard Worker smlal v2.4s, v19.4h, v1.h[2] 2305*c0909341SAndroid Build Coastguard Worker smlal v2.4s, v20.4h, v1.h[3] 2306*c0909341SAndroid Build Coastguard Worker smlal v2.4s, v21.4h, v1.h[4] 2307*c0909341SAndroid Build Coastguard Worker smlal v2.4s, v22.4h, v1.h[5] 2308*c0909341SAndroid Build Coastguard Worker smlal v2.4s, v28.4h, v1.h[6] 2309*c0909341SAndroid Build Coastguard Worker smull v3.4s, v19.4h, v1.h[1] 2310*c0909341SAndroid Build Coastguard Worker smlal v3.4s, v20.4h, v1.h[2] 2311*c0909341SAndroid Build Coastguard Worker smlal v3.4s, v21.4h, v1.h[3] 2312*c0909341SAndroid Build Coastguard Worker smlal v3.4s, v22.4h, v1.h[4] 2313*c0909341SAndroid Build Coastguard Worker smlal v3.4s, v28.4h, v1.h[5] 2314*c0909341SAndroid Build Coastguard Worker smlal v3.4s, v29.4h, v1.h[6] 2315*c0909341SAndroid Build Coastguard Worker.else // 8tap 2316*c0909341SAndroid Build Coastguard Worker smull v2.4s, v16.4h, v1.h[0] 2317*c0909341SAndroid Build Coastguard Worker smlal v2.4s, v17.4h, v1.h[1] 2318*c0909341SAndroid Build Coastguard Worker smlal v2.4s, v18.4h, v1.h[2] 2319*c0909341SAndroid Build Coastguard Worker smlal v2.4s, v19.4h, v1.h[3] 2320*c0909341SAndroid Build Coastguard Worker smlal v2.4s, v20.4h, v1.h[4] 2321*c0909341SAndroid Build Coastguard Worker smlal v2.4s, v21.4h, v1.h[5] 2322*c0909341SAndroid Build Coastguard Worker smlal v2.4s, v22.4h, v1.h[6] 2323*c0909341SAndroid Build Coastguard Worker smlal v2.4s, v28.4h, v1.h[7] 2324*c0909341SAndroid Build Coastguard Worker smull v3.4s, v17.4h, v1.h[0] 2325*c0909341SAndroid Build Coastguard Worker smlal v3.4s, v18.4h, v1.h[1] 2326*c0909341SAndroid Build Coastguard Worker smlal v3.4s, v19.4h, v1.h[2] 2327*c0909341SAndroid Build Coastguard Worker smlal v3.4s, v20.4h, v1.h[3] 2328*c0909341SAndroid Build Coastguard Worker smlal v3.4s, v21.4h, v1.h[4] 2329*c0909341SAndroid Build Coastguard Worker smlal v3.4s, v22.4h, v1.h[5] 2330*c0909341SAndroid Build Coastguard Worker smlal v3.4s, v28.4h, v1.h[6] 2331*c0909341SAndroid Build Coastguard Worker smlal v3.4s, v29.4h, v1.h[7] 2332*c0909341SAndroid Build Coastguard Worker.endif 2333*c0909341SAndroid Build Coastguard Worker sqrshrn v2.4h, v2.4s, #\shift_hv 2334*c0909341SAndroid Build Coastguard Worker sqrshrn v3.4h, v3.4s, #\shift_hv 2335*c0909341SAndroid Build Coastguard Worker subs \h, \h, #2 2336*c0909341SAndroid Build Coastguard Worker.ifc \type, put 2337*c0909341SAndroid Build Coastguard Worker sqxtun v2.8b, v2.8h 2338*c0909341SAndroid Build Coastguard Worker sqxtun v3.8b, v3.8h 2339*c0909341SAndroid Build Coastguard Worker str s2, [\dst] 2340*c0909341SAndroid Build Coastguard Worker str s3, [\ds2] 2341*c0909341SAndroid Build Coastguard Worker add \dst, \dst, \d_strd 2342*c0909341SAndroid Build Coastguard Worker add \ds2, \ds2, \d_strd 2343*c0909341SAndroid Build Coastguard Worker.else 2344*c0909341SAndroid Build Coastguard Worker st1 {v2.4h}, [\dst], \d_strd 2345*c0909341SAndroid Build Coastguard Worker st1 {v3.4h}, [\ds2], \d_strd 2346*c0909341SAndroid Build Coastguard Worker.endif 2347*c0909341SAndroid Build Coastguard Worker b.le 0f 2348*c0909341SAndroid Build Coastguard Worker.ifc \taps, 8tap 2349*c0909341SAndroid Build Coastguard Worker mov v16.8b, v18.8b 2350*c0909341SAndroid Build Coastguard Worker mov v17.8b, v19.8b 2351*c0909341SAndroid Build Coastguard Worker.endif 2352*c0909341SAndroid Build Coastguard Worker mov v18.8b, v20.8b 2353*c0909341SAndroid Build Coastguard Worker mov v19.8b, v21.8b 2354*c0909341SAndroid Build Coastguard Worker mov v20.8b, v22.8b 2355*c0909341SAndroid Build Coastguard Worker mov v21.8b, v28.8b 2356*c0909341SAndroid Build Coastguard Worker mov v22.8b, v29.8b 2357*c0909341SAndroid Build Coastguard Worker b 48b 2358*c0909341SAndroid Build Coastguard Worker0: 2359*c0909341SAndroid Build Coastguard Worker ret x15 2360*c0909341SAndroid Build Coastguard Worker 2361*c0909341SAndroid Build Coastguard WorkerL(\type\()_\taps\()_filter_4): 2362*c0909341SAndroid Build Coastguard Worker ld1 {v26.8b}, [\sr2], \s_strd 2363*c0909341SAndroid Build Coastguard Worker ld1 {v27.8b}, [\src], \s_strd 2364*c0909341SAndroid Build Coastguard Worker uxtl v26.8h, v26.8b 2365*c0909341SAndroid Build Coastguard Worker uxtl v27.8h, v27.8b 2366*c0909341SAndroid Build Coastguard Worker ext v28.16b, v26.16b, v26.16b, #2 2367*c0909341SAndroid Build Coastguard Worker ext v29.16b, v26.16b, v26.16b, #4 2368*c0909341SAndroid Build Coastguard Worker ext v30.16b, v26.16b, v26.16b, #6 2369*c0909341SAndroid Build Coastguard Worker mul v31.4h, v26.4h, v0.h[0] 2370*c0909341SAndroid Build Coastguard Worker mla v31.4h, v28.4h, v0.h[1] 2371*c0909341SAndroid Build Coastguard Worker mla v31.4h, v29.4h, v0.h[2] 2372*c0909341SAndroid Build Coastguard Worker mla v31.4h, v30.4h, v0.h[3] 2373*c0909341SAndroid Build Coastguard Worker ext v28.16b, v27.16b, v27.16b, #2 2374*c0909341SAndroid Build Coastguard Worker ext v29.16b, v27.16b, v27.16b, #4 2375*c0909341SAndroid Build Coastguard Worker ext v30.16b, v27.16b, v27.16b, #6 2376*c0909341SAndroid Build Coastguard Worker mul v27.4h, v27.4h, v0.h[0] 2377*c0909341SAndroid Build Coastguard Worker mla v27.4h, v28.4h, v0.h[1] 2378*c0909341SAndroid Build Coastguard Worker mla v27.4h, v29.4h, v0.h[2] 2379*c0909341SAndroid Build Coastguard Worker mla v27.4h, v30.4h, v0.h[3] 2380*c0909341SAndroid Build Coastguard Worker srshr v28.4h, v31.4h, #2 2381*c0909341SAndroid Build Coastguard Worker srshr v29.4h, v27.4h, #2 2382*c0909341SAndroid Build Coastguard Worker ret 2383*c0909341SAndroid Build Coastguard Worker 2384*c0909341SAndroid Build Coastguard Worker80: 2385*c0909341SAndroid Build Coastguard Worker160: 2386*c0909341SAndroid Build Coastguard Worker320: 2387*c0909341SAndroid Build Coastguard Worker AARCH64_VALID_JUMP_TARGET 2388*c0909341SAndroid Build Coastguard Worker b.gt 880f 2389*c0909341SAndroid Build Coastguard Worker ld1 {v0.8b}, [\xmx] 2390*c0909341SAndroid Build Coastguard Worker ldur s1, [\xmy, #2] 2391*c0909341SAndroid Build Coastguard Worker.ifc \taps, 6tap 2392*c0909341SAndroid Build Coastguard Worker sub \src, \src, #2 2393*c0909341SAndroid Build Coastguard Worker.else 2394*c0909341SAndroid Build Coastguard Worker sub \src, \src, #3 2395*c0909341SAndroid Build Coastguard Worker.endif 2396*c0909341SAndroid Build Coastguard Worker sub \src, \src, \s_strd 2397*c0909341SAndroid Build Coastguard Worker sxtl v0.8h, v0.8b 2398*c0909341SAndroid Build Coastguard Worker sxtl v1.8h, v1.8b 2399*c0909341SAndroid Build Coastguard Worker mov x15, x30 2400*c0909341SAndroid Build Coastguard Worker mov \my, \h 2401*c0909341SAndroid Build Coastguard Worker 2402*c0909341SAndroid Build Coastguard Worker164: // 8x2, 8x4, 16x2, 16x4, 32x2, 32x4 hv 2403*c0909341SAndroid Build Coastguard Worker add \ds2, \dst, \d_strd 2404*c0909341SAndroid Build Coastguard Worker add \sr2, \src, \s_strd 2405*c0909341SAndroid Build Coastguard Worker lsl \d_strd, \d_strd, #1 2406*c0909341SAndroid Build Coastguard Worker lsl \s_strd, \s_strd, #1 2407*c0909341SAndroid Build Coastguard Worker 2408*c0909341SAndroid Build Coastguard Worker bl L(\type\()_\taps\()_filter_8_first) 2409*c0909341SAndroid Build Coastguard Worker bl L(\type\()_\taps\()_filter_8) 2410*c0909341SAndroid Build Coastguard Worker mov v17.16b, v24.16b 2411*c0909341SAndroid Build Coastguard Worker mov v18.16b, v25.16b 2412*c0909341SAndroid Build Coastguard Worker 2413*c0909341SAndroid Build Coastguard Worker8: 2414*c0909341SAndroid Build Coastguard Worker smull v2.4s, v16.4h, v1.h[0] 2415*c0909341SAndroid Build Coastguard Worker smull2 v3.4s, v16.8h, v1.h[0] 2416*c0909341SAndroid Build Coastguard Worker bl L(\type\()_\taps\()_filter_8) 2417*c0909341SAndroid Build Coastguard Worker smull v4.4s, v17.4h, v1.h[0] 2418*c0909341SAndroid Build Coastguard Worker smull2 v5.4s, v17.8h, v1.h[0] 2419*c0909341SAndroid Build Coastguard Worker smlal v2.4s, v17.4h, v1.h[1] 2420*c0909341SAndroid Build Coastguard Worker smlal2 v3.4s, v17.8h, v1.h[1] 2421*c0909341SAndroid Build Coastguard Worker smlal v4.4s, v18.4h, v1.h[1] 2422*c0909341SAndroid Build Coastguard Worker smlal2 v5.4s, v18.8h, v1.h[1] 2423*c0909341SAndroid Build Coastguard Worker smlal v2.4s, v18.4h, v1.h[2] 2424*c0909341SAndroid Build Coastguard Worker smlal2 v3.4s, v18.8h, v1.h[2] 2425*c0909341SAndroid Build Coastguard Worker smlal v4.4s, v24.4h, v1.h[2] 2426*c0909341SAndroid Build Coastguard Worker smlal2 v5.4s, v24.8h, v1.h[2] 2427*c0909341SAndroid Build Coastguard Worker smlal v2.4s, v24.4h, v1.h[3] 2428*c0909341SAndroid Build Coastguard Worker smlal2 v3.4s, v24.8h, v1.h[3] 2429*c0909341SAndroid Build Coastguard Worker smlal v4.4s, v25.4h, v1.h[3] 2430*c0909341SAndroid Build Coastguard Worker smlal2 v5.4s, v25.8h, v1.h[3] 2431*c0909341SAndroid Build Coastguard Worker sqrshrn v2.4h, v2.4s, #\shift_hv 2432*c0909341SAndroid Build Coastguard Worker sqrshrn2 v2.8h, v3.4s, #\shift_hv 2433*c0909341SAndroid Build Coastguard Worker sqrshrn v4.4h, v4.4s, #\shift_hv 2434*c0909341SAndroid Build Coastguard Worker sqrshrn2 v4.8h, v5.4s, #\shift_hv 2435*c0909341SAndroid Build Coastguard Worker subs \h, \h, #2 2436*c0909341SAndroid Build Coastguard Worker.ifc \type, put 2437*c0909341SAndroid Build Coastguard Worker sqxtun v2.8b, v2.8h 2438*c0909341SAndroid Build Coastguard Worker sqxtun v4.8b, v4.8h 2439*c0909341SAndroid Build Coastguard Worker st1 {v2.8b}, [\dst], \d_strd 2440*c0909341SAndroid Build Coastguard Worker st1 {v4.8b}, [\ds2], \d_strd 2441*c0909341SAndroid Build Coastguard Worker.else 2442*c0909341SAndroid Build Coastguard Worker st1 {v2.8h}, [\dst], \d_strd 2443*c0909341SAndroid Build Coastguard Worker st1 {v4.8h}, [\ds2], \d_strd 2444*c0909341SAndroid Build Coastguard Worker.endif 2445*c0909341SAndroid Build Coastguard Worker b.le 9f 2446*c0909341SAndroid Build Coastguard Worker mov v16.16b, v18.16b 2447*c0909341SAndroid Build Coastguard Worker mov v17.16b, v24.16b 2448*c0909341SAndroid Build Coastguard Worker mov v18.16b, v25.16b 2449*c0909341SAndroid Build Coastguard Worker b 8b 2450*c0909341SAndroid Build Coastguard Worker9: 2451*c0909341SAndroid Build Coastguard Worker subs \w, \w, #8 2452*c0909341SAndroid Build Coastguard Worker b.le 0f 2453*c0909341SAndroid Build Coastguard Worker asr \s_strd, \s_strd, #1 2454*c0909341SAndroid Build Coastguard Worker asr \d_strd, \d_strd, #1 2455*c0909341SAndroid Build Coastguard Worker msub \src, \s_strd, \xmy, \src 2456*c0909341SAndroid Build Coastguard Worker msub \dst, \d_strd, \xmy, \dst 2457*c0909341SAndroid Build Coastguard Worker sub \src, \src, \s_strd, lsl #2 2458*c0909341SAndroid Build Coastguard Worker mov \h, \my 2459*c0909341SAndroid Build Coastguard Worker add \src, \src, #8 2460*c0909341SAndroid Build Coastguard Worker.ifc \type, put 2461*c0909341SAndroid Build Coastguard Worker add \dst, \dst, #8 2462*c0909341SAndroid Build Coastguard Worker.else 2463*c0909341SAndroid Build Coastguard Worker add \dst, \dst, #16 2464*c0909341SAndroid Build Coastguard Worker.endif 2465*c0909341SAndroid Build Coastguard Worker b 164b 2466*c0909341SAndroid Build Coastguard Worker 2467*c0909341SAndroid Build Coastguard Worker880: // 8x8, 8x16, ..., 16x8, ..., 32x8, ... hv 2468*c0909341SAndroid Build Coastguard Worker640: 2469*c0909341SAndroid Build Coastguard Worker1280: 2470*c0909341SAndroid Build Coastguard Worker AARCH64_VALID_JUMP_TARGET 2471*c0909341SAndroid Build Coastguard Worker ld1 {v0.8b}, [\xmx] 2472*c0909341SAndroid Build Coastguard Worker ld1 {v1.8b}, [\xmy] 2473*c0909341SAndroid Build Coastguard Worker.ifc \taps, 6tap 2474*c0909341SAndroid Build Coastguard Worker sub \src, \src, #2 2475*c0909341SAndroid Build Coastguard Worker.else 2476*c0909341SAndroid Build Coastguard Worker sub \src, \src, #3 2477*c0909341SAndroid Build Coastguard Worker sub \src, \src, \s_strd 2478*c0909341SAndroid Build Coastguard Worker.endif 2479*c0909341SAndroid Build Coastguard Worker sub \src, \src, \s_strd, lsl #1 2480*c0909341SAndroid Build Coastguard Worker sxtl v0.8h, v0.8b 2481*c0909341SAndroid Build Coastguard Worker sxtl v1.8h, v1.8b 2482*c0909341SAndroid Build Coastguard Worker mov x15, x30 2483*c0909341SAndroid Build Coastguard Worker mov \my, \h 2484*c0909341SAndroid Build Coastguard Worker 2485*c0909341SAndroid Build Coastguard Worker168: 2486*c0909341SAndroid Build Coastguard Worker add \ds2, \dst, \d_strd 2487*c0909341SAndroid Build Coastguard Worker add \sr2, \src, \s_strd 2488*c0909341SAndroid Build Coastguard Worker lsl \d_strd, \d_strd, #1 2489*c0909341SAndroid Build Coastguard Worker lsl \s_strd, \s_strd, #1 2490*c0909341SAndroid Build Coastguard Worker 2491*c0909341SAndroid Build Coastguard Worker bl L(\type\()_\taps\()_filter_8_first) 2492*c0909341SAndroid Build Coastguard Worker.ifc \taps, 6tap 2493*c0909341SAndroid Build Coastguard Worker mov v18.16b, v16.16b 2494*c0909341SAndroid Build Coastguard Worker.else 2495*c0909341SAndroid Build Coastguard Worker bl L(\type\()_\taps\()_filter_8) 2496*c0909341SAndroid Build Coastguard Worker mov v17.16b, v24.16b 2497*c0909341SAndroid Build Coastguard Worker mov v18.16b, v25.16b 2498*c0909341SAndroid Build Coastguard Worker.endif 2499*c0909341SAndroid Build Coastguard Worker bl L(\type\()_\taps\()_filter_8) 2500*c0909341SAndroid Build Coastguard Worker mov v19.16b, v24.16b 2501*c0909341SAndroid Build Coastguard Worker mov v20.16b, v25.16b 2502*c0909341SAndroid Build Coastguard Worker bl L(\type\()_\taps\()_filter_8) 2503*c0909341SAndroid Build Coastguard Worker mov v21.16b, v24.16b 2504*c0909341SAndroid Build Coastguard Worker mov v22.16b, v25.16b 2505*c0909341SAndroid Build Coastguard Worker 2506*c0909341SAndroid Build Coastguard Worker88: 2507*c0909341SAndroid Build Coastguard Worker.ifc \taps, 6tap 2508*c0909341SAndroid Build Coastguard Worker smull v2.4s, v18.4h, v1.h[1] 2509*c0909341SAndroid Build Coastguard Worker smull2 v3.4s, v18.8h, v1.h[1] 2510*c0909341SAndroid Build Coastguard Worker bl L(\type\()_\taps\()_filter_8) 2511*c0909341SAndroid Build Coastguard Worker smull v4.4s, v19.4h, v1.h[1] 2512*c0909341SAndroid Build Coastguard Worker smull2 v5.4s, v19.8h, v1.h[1] 2513*c0909341SAndroid Build Coastguard Worker smlal v2.4s, v19.4h, v1.h[2] 2514*c0909341SAndroid Build Coastguard Worker smlal2 v3.4s, v19.8h, v1.h[2] 2515*c0909341SAndroid Build Coastguard Worker smlal v4.4s, v20.4h, v1.h[2] 2516*c0909341SAndroid Build Coastguard Worker smlal2 v5.4s, v20.8h, v1.h[2] 2517*c0909341SAndroid Build Coastguard Worker smlal v2.4s, v20.4h, v1.h[3] 2518*c0909341SAndroid Build Coastguard Worker smlal2 v3.4s, v20.8h, v1.h[3] 2519*c0909341SAndroid Build Coastguard Worker smlal v4.4s, v21.4h, v1.h[3] 2520*c0909341SAndroid Build Coastguard Worker smlal2 v5.4s, v21.8h, v1.h[3] 2521*c0909341SAndroid Build Coastguard Worker smlal v2.4s, v21.4h, v1.h[4] 2522*c0909341SAndroid Build Coastguard Worker smlal2 v3.4s, v21.8h, v1.h[4] 2523*c0909341SAndroid Build Coastguard Worker smlal v4.4s, v22.4h, v1.h[4] 2524*c0909341SAndroid Build Coastguard Worker smlal2 v5.4s, v22.8h, v1.h[4] 2525*c0909341SAndroid Build Coastguard Worker smlal v2.4s, v22.4h, v1.h[5] 2526*c0909341SAndroid Build Coastguard Worker smlal2 v3.4s, v22.8h, v1.h[5] 2527*c0909341SAndroid Build Coastguard Worker smlal v4.4s, v24.4h, v1.h[5] 2528*c0909341SAndroid Build Coastguard Worker smlal2 v5.4s, v24.8h, v1.h[5] 2529*c0909341SAndroid Build Coastguard Worker smlal v2.4s, v24.4h, v1.h[6] 2530*c0909341SAndroid Build Coastguard Worker smlal2 v3.4s, v24.8h, v1.h[6] 2531*c0909341SAndroid Build Coastguard Worker smlal v4.4s, v25.4h, v1.h[6] 2532*c0909341SAndroid Build Coastguard Worker smlal2 v5.4s, v25.8h, v1.h[6] 2533*c0909341SAndroid Build Coastguard Worker.else // 8tap 2534*c0909341SAndroid Build Coastguard Worker smull v2.4s, v16.4h, v1.h[0] 2535*c0909341SAndroid Build Coastguard Worker smull2 v3.4s, v16.8h, v1.h[0] 2536*c0909341SAndroid Build Coastguard Worker bl L(\type\()_\taps\()_filter_8) 2537*c0909341SAndroid Build Coastguard Worker smull v4.4s, v17.4h, v1.h[0] 2538*c0909341SAndroid Build Coastguard Worker smull2 v5.4s, v17.8h, v1.h[0] 2539*c0909341SAndroid Build Coastguard Worker smlal v2.4s, v17.4h, v1.h[1] 2540*c0909341SAndroid Build Coastguard Worker smlal2 v3.4s, v17.8h, v1.h[1] 2541*c0909341SAndroid Build Coastguard Worker smlal v4.4s, v18.4h, v1.h[1] 2542*c0909341SAndroid Build Coastguard Worker smlal2 v5.4s, v18.8h, v1.h[1] 2543*c0909341SAndroid Build Coastguard Worker smlal v2.4s, v18.4h, v1.h[2] 2544*c0909341SAndroid Build Coastguard Worker smlal2 v3.4s, v18.8h, v1.h[2] 2545*c0909341SAndroid Build Coastguard Worker smlal v4.4s, v19.4h, v1.h[2] 2546*c0909341SAndroid Build Coastguard Worker smlal2 v5.4s, v19.8h, v1.h[2] 2547*c0909341SAndroid Build Coastguard Worker smlal v2.4s, v19.4h, v1.h[3] 2548*c0909341SAndroid Build Coastguard Worker smlal2 v3.4s, v19.8h, v1.h[3] 2549*c0909341SAndroid Build Coastguard Worker smlal v4.4s, v20.4h, v1.h[3] 2550*c0909341SAndroid Build Coastguard Worker smlal2 v5.4s, v20.8h, v1.h[3] 2551*c0909341SAndroid Build Coastguard Worker smlal v2.4s, v20.4h, v1.h[4] 2552*c0909341SAndroid Build Coastguard Worker smlal2 v3.4s, v20.8h, v1.h[4] 2553*c0909341SAndroid Build Coastguard Worker smlal v4.4s, v21.4h, v1.h[4] 2554*c0909341SAndroid Build Coastguard Worker smlal2 v5.4s, v21.8h, v1.h[4] 2555*c0909341SAndroid Build Coastguard Worker smlal v2.4s, v21.4h, v1.h[5] 2556*c0909341SAndroid Build Coastguard Worker smlal2 v3.4s, v21.8h, v1.h[5] 2557*c0909341SAndroid Build Coastguard Worker smlal v4.4s, v22.4h, v1.h[5] 2558*c0909341SAndroid Build Coastguard Worker smlal2 v5.4s, v22.8h, v1.h[5] 2559*c0909341SAndroid Build Coastguard Worker smlal v2.4s, v22.4h, v1.h[6] 2560*c0909341SAndroid Build Coastguard Worker smlal2 v3.4s, v22.8h, v1.h[6] 2561*c0909341SAndroid Build Coastguard Worker smlal v4.4s, v24.4h, v1.h[6] 2562*c0909341SAndroid Build Coastguard Worker smlal2 v5.4s, v24.8h, v1.h[6] 2563*c0909341SAndroid Build Coastguard Worker smlal v2.4s, v24.4h, v1.h[7] 2564*c0909341SAndroid Build Coastguard Worker smlal2 v3.4s, v24.8h, v1.h[7] 2565*c0909341SAndroid Build Coastguard Worker smlal v4.4s, v25.4h, v1.h[7] 2566*c0909341SAndroid Build Coastguard Worker smlal2 v5.4s, v25.8h, v1.h[7] 2567*c0909341SAndroid Build Coastguard Worker.endif 2568*c0909341SAndroid Build Coastguard Worker sqrshrn v2.4h, v2.4s, #\shift_hv 2569*c0909341SAndroid Build Coastguard Worker sqrshrn2 v2.8h, v3.4s, #\shift_hv 2570*c0909341SAndroid Build Coastguard Worker sqrshrn v4.4h, v4.4s, #\shift_hv 2571*c0909341SAndroid Build Coastguard Worker sqrshrn2 v4.8h, v5.4s, #\shift_hv 2572*c0909341SAndroid Build Coastguard Worker subs \h, \h, #2 2573*c0909341SAndroid Build Coastguard Worker.ifc \type, put 2574*c0909341SAndroid Build Coastguard Worker sqxtun v2.8b, v2.8h 2575*c0909341SAndroid Build Coastguard Worker sqxtun v4.8b, v4.8h 2576*c0909341SAndroid Build Coastguard Worker st1 {v2.8b}, [\dst], \d_strd 2577*c0909341SAndroid Build Coastguard Worker st1 {v4.8b}, [\ds2], \d_strd 2578*c0909341SAndroid Build Coastguard Worker.else 2579*c0909341SAndroid Build Coastguard Worker st1 {v2.8h}, [\dst], \d_strd 2580*c0909341SAndroid Build Coastguard Worker st1 {v4.8h}, [\ds2], \d_strd 2581*c0909341SAndroid Build Coastguard Worker.endif 2582*c0909341SAndroid Build Coastguard Worker b.le 9f 2583*c0909341SAndroid Build Coastguard Worker.ifc \taps, 8tap 2584*c0909341SAndroid Build Coastguard Worker mov v16.16b, v18.16b 2585*c0909341SAndroid Build Coastguard Worker mov v17.16b, v19.16b 2586*c0909341SAndroid Build Coastguard Worker.endif 2587*c0909341SAndroid Build Coastguard Worker mov v18.16b, v20.16b 2588*c0909341SAndroid Build Coastguard Worker mov v19.16b, v21.16b 2589*c0909341SAndroid Build Coastguard Worker mov v20.16b, v22.16b 2590*c0909341SAndroid Build Coastguard Worker mov v21.16b, v24.16b 2591*c0909341SAndroid Build Coastguard Worker mov v22.16b, v25.16b 2592*c0909341SAndroid Build Coastguard Worker b 88b 2593*c0909341SAndroid Build Coastguard Worker9: 2594*c0909341SAndroid Build Coastguard Worker subs \w, \w, #8 2595*c0909341SAndroid Build Coastguard Worker b.le 0f 2596*c0909341SAndroid Build Coastguard Worker asr \s_strd, \s_strd, #1 2597*c0909341SAndroid Build Coastguard Worker asr \d_strd, \d_strd, #1 2598*c0909341SAndroid Build Coastguard Worker msub \src, \s_strd, \xmy, \src 2599*c0909341SAndroid Build Coastguard Worker msub \dst, \d_strd, \xmy, \dst 2600*c0909341SAndroid Build Coastguard Worker sub \src, \src, \s_strd, lsl #3 2601*c0909341SAndroid Build Coastguard Worker mov \h, \my 2602*c0909341SAndroid Build Coastguard Worker add \src, \src, #8 2603*c0909341SAndroid Build Coastguard Worker.ifc \type, put 2604*c0909341SAndroid Build Coastguard Worker add \dst, \dst, #8 2605*c0909341SAndroid Build Coastguard Worker.else 2606*c0909341SAndroid Build Coastguard Worker add \dst, \dst, #16 2607*c0909341SAndroid Build Coastguard Worker.endif 2608*c0909341SAndroid Build Coastguard Worker.ifc \taps, 6tap 2609*c0909341SAndroid Build Coastguard Worker add \src, \src, \s_strd, lsl #1 2610*c0909341SAndroid Build Coastguard Worker.endif 2611*c0909341SAndroid Build Coastguard Worker b 168b 2612*c0909341SAndroid Build Coastguard Worker0: 2613*c0909341SAndroid Build Coastguard Worker ret x15 2614*c0909341SAndroid Build Coastguard Worker 2615*c0909341SAndroid Build Coastguard WorkerL(\type\()_\taps\()_filter_8_first): 2616*c0909341SAndroid Build Coastguard Worker ld1 {v28.8b, v29.8b}, [\src], \s_strd 2617*c0909341SAndroid Build Coastguard Worker uxtl v28.8h, v28.8b 2618*c0909341SAndroid Build Coastguard Worker uxtl v29.8h, v29.8b 2619*c0909341SAndroid Build Coastguard Worker.ifc \taps, 6tap 2620*c0909341SAndroid Build Coastguard Worker mul v16.8h, v28.8h, v0.h[1] 2621*c0909341SAndroid Build Coastguard Worker ext v25.16b, v28.16b, v29.16b, #(2*1) 2622*c0909341SAndroid Build Coastguard Worker ext v26.16b, v28.16b, v29.16b, #(2*2) 2623*c0909341SAndroid Build Coastguard Worker ext v27.16b, v28.16b, v29.16b, #(2*3) 2624*c0909341SAndroid Build Coastguard Worker mla v16.8h, v25.8h, v0.h[2] 2625*c0909341SAndroid Build Coastguard Worker mla v16.8h, v26.8h, v0.h[3] 2626*c0909341SAndroid Build Coastguard Worker mla v16.8h, v27.8h, v0.h[4] 2627*c0909341SAndroid Build Coastguard Worker ext v24.16b, v28.16b, v29.16b, #(2*4) 2628*c0909341SAndroid Build Coastguard Worker ext v25.16b, v28.16b, v29.16b, #(2*5) 2629*c0909341SAndroid Build Coastguard Worker mla v16.8h, v24.8h, v0.h[5] 2630*c0909341SAndroid Build Coastguard Worker mla v16.8h, v25.8h, v0.h[6] 2631*c0909341SAndroid Build Coastguard Worker.else // 8tap 2632*c0909341SAndroid Build Coastguard Worker mul v16.8h, v28.8h, v0.h[0] 2633*c0909341SAndroid Build Coastguard Worker ext v24.16b, v28.16b, v29.16b, #(2*1) 2634*c0909341SAndroid Build Coastguard Worker ext v25.16b, v28.16b, v29.16b, #(2*2) 2635*c0909341SAndroid Build Coastguard Worker ext v26.16b, v28.16b, v29.16b, #(2*3) 2636*c0909341SAndroid Build Coastguard Worker ext v27.16b, v28.16b, v29.16b, #(2*4) 2637*c0909341SAndroid Build Coastguard Worker mla v16.8h, v24.8h, v0.h[1] 2638*c0909341SAndroid Build Coastguard Worker mla v16.8h, v25.8h, v0.h[2] 2639*c0909341SAndroid Build Coastguard Worker mla v16.8h, v26.8h, v0.h[3] 2640*c0909341SAndroid Build Coastguard Worker mla v16.8h, v27.8h, v0.h[4] 2641*c0909341SAndroid Build Coastguard Worker ext v24.16b, v28.16b, v29.16b, #(2*5) 2642*c0909341SAndroid Build Coastguard Worker ext v25.16b, v28.16b, v29.16b, #(2*6) 2643*c0909341SAndroid Build Coastguard Worker ext v26.16b, v28.16b, v29.16b, #(2*7) 2644*c0909341SAndroid Build Coastguard Worker mla v16.8h, v24.8h, v0.h[5] 2645*c0909341SAndroid Build Coastguard Worker mla v16.8h, v25.8h, v0.h[6] 2646*c0909341SAndroid Build Coastguard Worker mla v16.8h, v26.8h, v0.h[7] 2647*c0909341SAndroid Build Coastguard Worker.endif 2648*c0909341SAndroid Build Coastguard Worker srshr v16.8h, v16.8h, #2 2649*c0909341SAndroid Build Coastguard Worker ret 2650*c0909341SAndroid Build Coastguard Worker 2651*c0909341SAndroid Build Coastguard WorkerL(\type\()_\taps\()_filter_8): 2652*c0909341SAndroid Build Coastguard Worker ld1 {v28.8b, v29.8b}, [\sr2], \s_strd 2653*c0909341SAndroid Build Coastguard Worker ld1 {v30.8b, v31.8b}, [\src], \s_strd 2654*c0909341SAndroid Build Coastguard Worker uxtl v28.8h, v28.8b 2655*c0909341SAndroid Build Coastguard Worker uxtl v29.8h, v29.8b 2656*c0909341SAndroid Build Coastguard Worker uxtl v30.8h, v30.8b 2657*c0909341SAndroid Build Coastguard Worker uxtl v31.8h, v31.8b 2658*c0909341SAndroid Build Coastguard Worker.ifc \taps, 6tap 2659*c0909341SAndroid Build Coastguard Worker mul v24.8h, v28.8h, v0.h[1] 2660*c0909341SAndroid Build Coastguard Worker mul v25.8h, v30.8h, v0.h[1] 2661*c0909341SAndroid Build Coastguard Worker .irpc i, 23456 2662*c0909341SAndroid Build Coastguard Worker ext v26.16b, v28.16b, v29.16b, #(2*\i-2) 2663*c0909341SAndroid Build Coastguard Worker ext v27.16b, v30.16b, v31.16b, #(2*\i-2) 2664*c0909341SAndroid Build Coastguard Worker mla v24.8h, v26.8h, v0.h[\i] 2665*c0909341SAndroid Build Coastguard Worker mla v25.8h, v27.8h, v0.h[\i] 2666*c0909341SAndroid Build Coastguard Worker .endr 2667*c0909341SAndroid Build Coastguard Worker.else // 8tap 2668*c0909341SAndroid Build Coastguard Worker mul v24.8h, v28.8h, v0.h[0] 2669*c0909341SAndroid Build Coastguard Worker mul v25.8h, v30.8h, v0.h[0] 2670*c0909341SAndroid Build Coastguard Worker .irpc i, 1234567 2671*c0909341SAndroid Build Coastguard Worker ext v26.16b, v28.16b, v29.16b, #(2*\i) 2672*c0909341SAndroid Build Coastguard Worker ext v27.16b, v30.16b, v31.16b, #(2*\i) 2673*c0909341SAndroid Build Coastguard Worker mla v24.8h, v26.8h, v0.h[\i] 2674*c0909341SAndroid Build Coastguard Worker mla v25.8h, v27.8h, v0.h[\i] 2675*c0909341SAndroid Build Coastguard Worker .endr 2676*c0909341SAndroid Build Coastguard Worker.endif 2677*c0909341SAndroid Build Coastguard Worker srshr v24.8h, v24.8h, #2 2678*c0909341SAndroid Build Coastguard Worker srshr v25.8h, v25.8h, #2 2679*c0909341SAndroid Build Coastguard Worker ret 2680*c0909341SAndroid Build Coastguard Workerendfunc 2681*c0909341SAndroid Build Coastguard Worker 2682*c0909341SAndroid Build Coastguard Workerjumptable \type\()_\taps\()_hv_tbl 2683*c0909341SAndroid Build Coastguard Worker .word 1280b - \type\()_\taps\()_hv_tbl 2684*c0909341SAndroid Build Coastguard Worker .word 640b - \type\()_\taps\()_hv_tbl 2685*c0909341SAndroid Build Coastguard Worker .word 320b - \type\()_\taps\()_hv_tbl 2686*c0909341SAndroid Build Coastguard Worker .word 160b - \type\()_\taps\()_hv_tbl 2687*c0909341SAndroid Build Coastguard Worker .word 80b - \type\()_\taps\()_hv_tbl 2688*c0909341SAndroid Build Coastguard Worker .word 40b - \type\()_\taps\()_hv_tbl 2689*c0909341SAndroid Build Coastguard Worker .word 20b - \type\()_\taps\()_hv_tbl 2690*c0909341SAndroid Build Coastguard Workerendjumptable 2691*c0909341SAndroid Build Coastguard Worker.endm 2692*c0909341SAndroid Build Coastguard Worker 2693*c0909341SAndroid Build Coastguard Worker 2694*c0909341SAndroid Build Coastguard Worker.macro filter_bilin_fn type, dst, d_strd, src, s_strd, w, h, mx, xmx, my, xmy, ds2, sr2, shift_hv 2695*c0909341SAndroid Build Coastguard Workerfunction \type\()_bilin_8bpc_neon, export=1 2696*c0909341SAndroid Build Coastguard Worker dup v1.16b, \mx 2697*c0909341SAndroid Build Coastguard Worker dup v3.16b, \my 2698*c0909341SAndroid Build Coastguard Worker mov w9, #16 2699*c0909341SAndroid Build Coastguard Worker sub w8, w9, \mx 2700*c0909341SAndroid Build Coastguard Worker sub w9, w9, \my 2701*c0909341SAndroid Build Coastguard Worker dup v0.16b, w8 2702*c0909341SAndroid Build Coastguard Worker dup v2.16b, w9 2703*c0909341SAndroid Build Coastguard Worker.ifc \type, prep 2704*c0909341SAndroid Build Coastguard Worker uxtw \d_strd, \w 2705*c0909341SAndroid Build Coastguard Worker lsl \d_strd, \d_strd, #1 2706*c0909341SAndroid Build Coastguard Worker.endif 2707*c0909341SAndroid Build Coastguard Worker 2708*c0909341SAndroid Build Coastguard Worker clz w8, \w 2709*c0909341SAndroid Build Coastguard Worker sub w8, w8, #24 2710*c0909341SAndroid Build Coastguard Worker cbnz \mx, L(\type\()_bilin_h) 2711*c0909341SAndroid Build Coastguard Worker cbnz \my, L(\type\()_bilin_v) 2712*c0909341SAndroid Build Coastguard Worker b \type\()_neon 2713*c0909341SAndroid Build Coastguard Worker 2714*c0909341SAndroid Build Coastguard WorkerL(\type\()_bilin_h): 2715*c0909341SAndroid Build Coastguard Worker cbnz \my, L(\type\()_bilin_hv) 2716*c0909341SAndroid Build Coastguard Worker 2717*c0909341SAndroid Build Coastguard Worker movrel x9, \type\()_bilin_h_tbl 2718*c0909341SAndroid Build Coastguard Worker ldrsw x8, [x9, x8, lsl #2] 2719*c0909341SAndroid Build Coastguard Worker add x9, x9, x8 2720*c0909341SAndroid Build Coastguard Worker br x9 2721*c0909341SAndroid Build Coastguard Worker 2722*c0909341SAndroid Build Coastguard Worker20: // 2xN h 2723*c0909341SAndroid Build Coastguard Worker AARCH64_VALID_JUMP_TARGET 2724*c0909341SAndroid Build Coastguard Worker.ifc \type, put 2725*c0909341SAndroid Build Coastguard Worker add \ds2, \dst, \d_strd 2726*c0909341SAndroid Build Coastguard Worker add \sr2, \src, \s_strd 2727*c0909341SAndroid Build Coastguard Worker lsl \d_strd, \d_strd, #1 2728*c0909341SAndroid Build Coastguard Worker lsl \s_strd, \s_strd, #1 2729*c0909341SAndroid Build Coastguard Worker2: 2730*c0909341SAndroid Build Coastguard Worker ld1r {v4.4s}, [\src], \s_strd 2731*c0909341SAndroid Build Coastguard Worker ld1r {v6.4s}, [\sr2], \s_strd 2732*c0909341SAndroid Build Coastguard Worker ext v5.8b, v4.8b, v4.8b, #1 2733*c0909341SAndroid Build Coastguard Worker ext v7.8b, v6.8b, v6.8b, #1 2734*c0909341SAndroid Build Coastguard Worker trn1 v4.4h, v4.4h, v6.4h 2735*c0909341SAndroid Build Coastguard Worker trn1 v5.4h, v5.4h, v7.4h 2736*c0909341SAndroid Build Coastguard Worker subs \h, \h, #2 2737*c0909341SAndroid Build Coastguard Worker umull v4.8h, v4.8b, v0.8b 2738*c0909341SAndroid Build Coastguard Worker umlal v4.8h, v5.8b, v1.8b 2739*c0909341SAndroid Build Coastguard Worker uqrshrn v4.8b, v4.8h, #4 2740*c0909341SAndroid Build Coastguard Worker st1 {v4.h}[0], [\dst], \d_strd 2741*c0909341SAndroid Build Coastguard Worker st1 {v4.h}[1], [\ds2], \d_strd 2742*c0909341SAndroid Build Coastguard Worker b.gt 2b 2743*c0909341SAndroid Build Coastguard Worker ret 2744*c0909341SAndroid Build Coastguard Worker.endif 2745*c0909341SAndroid Build Coastguard Worker 2746*c0909341SAndroid Build Coastguard Worker40: // 4xN h 2747*c0909341SAndroid Build Coastguard Worker AARCH64_VALID_JUMP_TARGET 2748*c0909341SAndroid Build Coastguard Worker add \ds2, \dst, \d_strd 2749*c0909341SAndroid Build Coastguard Worker add \sr2, \src, \s_strd 2750*c0909341SAndroid Build Coastguard Worker lsl \d_strd, \d_strd, #1 2751*c0909341SAndroid Build Coastguard Worker lsl \s_strd, \s_strd, #1 2752*c0909341SAndroid Build Coastguard Worker4: 2753*c0909341SAndroid Build Coastguard Worker ld1 {v4.8b}, [\src], \s_strd 2754*c0909341SAndroid Build Coastguard Worker ld1 {v6.8b}, [\sr2], \s_strd 2755*c0909341SAndroid Build Coastguard Worker ext v5.8b, v4.8b, v4.8b, #1 2756*c0909341SAndroid Build Coastguard Worker ext v7.8b, v6.8b, v6.8b, #1 2757*c0909341SAndroid Build Coastguard Worker trn1 v4.2s, v4.2s, v6.2s 2758*c0909341SAndroid Build Coastguard Worker trn1 v5.2s, v5.2s, v7.2s 2759*c0909341SAndroid Build Coastguard Worker subs \h, \h, #2 2760*c0909341SAndroid Build Coastguard Worker umull v4.8h, v4.8b, v0.8b 2761*c0909341SAndroid Build Coastguard Worker umlal v4.8h, v5.8b, v1.8b 2762*c0909341SAndroid Build Coastguard Worker.ifc \type, put 2763*c0909341SAndroid Build Coastguard Worker uqrshrn v4.8b, v4.8h, #4 2764*c0909341SAndroid Build Coastguard Worker st1 {v4.s}[0], [\dst], \d_strd 2765*c0909341SAndroid Build Coastguard Worker st1 {v4.s}[1], [\ds2], \d_strd 2766*c0909341SAndroid Build Coastguard Worker.else 2767*c0909341SAndroid Build Coastguard Worker st1 {v4.8b}, [\dst], \d_strd 2768*c0909341SAndroid Build Coastguard Worker st1 {v4.d}[1], [\ds2], \d_strd 2769*c0909341SAndroid Build Coastguard Worker.endif 2770*c0909341SAndroid Build Coastguard Worker b.gt 4b 2771*c0909341SAndroid Build Coastguard Worker ret 2772*c0909341SAndroid Build Coastguard Worker 2773*c0909341SAndroid Build Coastguard Worker80: // 8xN h 2774*c0909341SAndroid Build Coastguard Worker AARCH64_VALID_JUMP_TARGET 2775*c0909341SAndroid Build Coastguard Worker add \ds2, \dst, \d_strd 2776*c0909341SAndroid Build Coastguard Worker add \sr2, \src, \s_strd 2777*c0909341SAndroid Build Coastguard Worker lsl \d_strd, \d_strd, #1 2778*c0909341SAndroid Build Coastguard Worker lsl \s_strd, \s_strd, #1 2779*c0909341SAndroid Build Coastguard Worker8: 2780*c0909341SAndroid Build Coastguard Worker ld1 {v4.16b}, [\src], \s_strd 2781*c0909341SAndroid Build Coastguard Worker ld1 {v6.16b}, [\sr2], \s_strd 2782*c0909341SAndroid Build Coastguard Worker ext v5.16b, v4.16b, v4.16b, #1 2783*c0909341SAndroid Build Coastguard Worker ext v7.16b, v6.16b, v6.16b, #1 2784*c0909341SAndroid Build Coastguard Worker subs \h, \h, #2 2785*c0909341SAndroid Build Coastguard Worker umull v4.8h, v4.8b, v0.8b 2786*c0909341SAndroid Build Coastguard Worker umull v6.8h, v6.8b, v0.8b 2787*c0909341SAndroid Build Coastguard Worker umlal v4.8h, v5.8b, v1.8b 2788*c0909341SAndroid Build Coastguard Worker umlal v6.8h, v7.8b, v1.8b 2789*c0909341SAndroid Build Coastguard Worker.ifc \type, put 2790*c0909341SAndroid Build Coastguard Worker uqrshrn v4.8b, v4.8h, #4 2791*c0909341SAndroid Build Coastguard Worker uqrshrn v6.8b, v6.8h, #4 2792*c0909341SAndroid Build Coastguard Worker st1 {v4.8b}, [\dst], \d_strd 2793*c0909341SAndroid Build Coastguard Worker st1 {v6.8b}, [\ds2], \d_strd 2794*c0909341SAndroid Build Coastguard Worker.else 2795*c0909341SAndroid Build Coastguard Worker st1 {v4.8h}, [\dst], \d_strd 2796*c0909341SAndroid Build Coastguard Worker st1 {v6.8h}, [\ds2], \d_strd 2797*c0909341SAndroid Build Coastguard Worker.endif 2798*c0909341SAndroid Build Coastguard Worker b.gt 8b 2799*c0909341SAndroid Build Coastguard Worker ret 2800*c0909341SAndroid Build Coastguard Worker160: 2801*c0909341SAndroid Build Coastguard Worker320: 2802*c0909341SAndroid Build Coastguard Worker640: 2803*c0909341SAndroid Build Coastguard Worker1280: // 16xN, 32xN, ... h 2804*c0909341SAndroid Build Coastguard Worker AARCH64_VALID_JUMP_TARGET 2805*c0909341SAndroid Build Coastguard Worker add \ds2, \dst, \d_strd 2806*c0909341SAndroid Build Coastguard Worker add \sr2, \src, \s_strd 2807*c0909341SAndroid Build Coastguard Worker lsl \s_strd, \s_strd, #1 2808*c0909341SAndroid Build Coastguard Worker 2809*c0909341SAndroid Build Coastguard Worker sub \s_strd, \s_strd, \w, uxtw 2810*c0909341SAndroid Build Coastguard Worker sub \s_strd, \s_strd, #8 2811*c0909341SAndroid Build Coastguard Worker.ifc \type, put 2812*c0909341SAndroid Build Coastguard Worker lsl \d_strd, \d_strd, #1 2813*c0909341SAndroid Build Coastguard Worker sub \d_strd, \d_strd, \w, uxtw 2814*c0909341SAndroid Build Coastguard Worker.endif 2815*c0909341SAndroid Build Coastguard Worker161: 2816*c0909341SAndroid Build Coastguard Worker ld1 {v16.d}[1], [\src], #8 2817*c0909341SAndroid Build Coastguard Worker ld1 {v20.d}[1], [\sr2], #8 2818*c0909341SAndroid Build Coastguard Worker mov \mx, \w 2819*c0909341SAndroid Build Coastguard Worker 2820*c0909341SAndroid Build Coastguard Worker16: 2821*c0909341SAndroid Build Coastguard Worker ld1 {v18.16b}, [\src], #16 2822*c0909341SAndroid Build Coastguard Worker ld1 {v22.16b}, [\sr2], #16 2823*c0909341SAndroid Build Coastguard Worker ext v17.16b, v16.16b, v18.16b, #8 2824*c0909341SAndroid Build Coastguard Worker ext v19.16b, v16.16b, v18.16b, #9 2825*c0909341SAndroid Build Coastguard Worker ext v21.16b, v20.16b, v22.16b, #8 2826*c0909341SAndroid Build Coastguard Worker ext v23.16b, v20.16b, v22.16b, #9 2827*c0909341SAndroid Build Coastguard Worker umull v16.8h, v17.8b, v0.8b 2828*c0909341SAndroid Build Coastguard Worker umull2 v17.8h, v17.16b, v0.16b 2829*c0909341SAndroid Build Coastguard Worker umull v20.8h, v21.8b, v0.8b 2830*c0909341SAndroid Build Coastguard Worker umull2 v21.8h, v21.16b, v0.16b 2831*c0909341SAndroid Build Coastguard Worker umlal v16.8h, v19.8b, v1.8b 2832*c0909341SAndroid Build Coastguard Worker umlal2 v17.8h, v19.16b, v1.16b 2833*c0909341SAndroid Build Coastguard Worker umlal v20.8h, v23.8b, v1.8b 2834*c0909341SAndroid Build Coastguard Worker umlal2 v21.8h, v23.16b, v1.16b 2835*c0909341SAndroid Build Coastguard Worker subs \mx, \mx, #16 2836*c0909341SAndroid Build Coastguard Worker.ifc \type, put 2837*c0909341SAndroid Build Coastguard Worker uqrshrn v16.8b, v16.8h, #4 2838*c0909341SAndroid Build Coastguard Worker uqrshrn2 v16.16b, v17.8h, #4 2839*c0909341SAndroid Build Coastguard Worker uqrshrn v20.8b, v20.8h, #4 2840*c0909341SAndroid Build Coastguard Worker uqrshrn2 v20.16b, v21.8h, #4 2841*c0909341SAndroid Build Coastguard Worker st1 {v16.16b}, [\dst], #16 2842*c0909341SAndroid Build Coastguard Worker st1 {v20.16b}, [\ds2], #16 2843*c0909341SAndroid Build Coastguard Worker.else 2844*c0909341SAndroid Build Coastguard Worker st1 {v16.8h, v17.8h}, [\dst], #32 2845*c0909341SAndroid Build Coastguard Worker st1 {v20.8h, v21.8h}, [\ds2], #32 2846*c0909341SAndroid Build Coastguard Worker.endif 2847*c0909341SAndroid Build Coastguard Worker b.le 9f 2848*c0909341SAndroid Build Coastguard Worker 2849*c0909341SAndroid Build Coastguard Worker mov v16.16b, v18.16b 2850*c0909341SAndroid Build Coastguard Worker mov v20.16b, v22.16b 2851*c0909341SAndroid Build Coastguard Worker b 16b 2852*c0909341SAndroid Build Coastguard Worker 2853*c0909341SAndroid Build Coastguard Worker9: 2854*c0909341SAndroid Build Coastguard Worker add \dst, \dst, \d_strd 2855*c0909341SAndroid Build Coastguard Worker add \ds2, \ds2, \d_strd 2856*c0909341SAndroid Build Coastguard Worker add \src, \src, \s_strd 2857*c0909341SAndroid Build Coastguard Worker add \sr2, \sr2, \s_strd 2858*c0909341SAndroid Build Coastguard Worker 2859*c0909341SAndroid Build Coastguard Worker subs \h, \h, #2 2860*c0909341SAndroid Build Coastguard Worker b.gt 161b 2861*c0909341SAndroid Build Coastguard Worker ret 2862*c0909341SAndroid Build Coastguard Workerendfunc 2863*c0909341SAndroid Build Coastguard Worker 2864*c0909341SAndroid Build Coastguard Workerjumptable \type\()_bilin_h_tbl 2865*c0909341SAndroid Build Coastguard Worker .word 1280b - \type\()_bilin_h_tbl 2866*c0909341SAndroid Build Coastguard Worker .word 640b - \type\()_bilin_h_tbl 2867*c0909341SAndroid Build Coastguard Worker .word 320b - \type\()_bilin_h_tbl 2868*c0909341SAndroid Build Coastguard Worker .word 160b - \type\()_bilin_h_tbl 2869*c0909341SAndroid Build Coastguard Worker .word 80b - \type\()_bilin_h_tbl 2870*c0909341SAndroid Build Coastguard Worker .word 40b - \type\()_bilin_h_tbl 2871*c0909341SAndroid Build Coastguard Worker .word 20b - \type\()_bilin_h_tbl 2872*c0909341SAndroid Build Coastguard Workerendjumptable 2873*c0909341SAndroid Build Coastguard Worker 2874*c0909341SAndroid Build Coastguard Worker 2875*c0909341SAndroid Build Coastguard Workerfunction L(\type\()_bilin_v) 2876*c0909341SAndroid Build Coastguard Worker cmp \h, #4 2877*c0909341SAndroid Build Coastguard Worker movrel x9, \type\()_bilin_v_tbl 2878*c0909341SAndroid Build Coastguard Worker ldrsw x8, [x9, x8, lsl #2] 2879*c0909341SAndroid Build Coastguard Worker add x9, x9, x8 2880*c0909341SAndroid Build Coastguard Worker br x9 2881*c0909341SAndroid Build Coastguard Worker 2882*c0909341SAndroid Build Coastguard Worker20: // 2xN v 2883*c0909341SAndroid Build Coastguard Worker AARCH64_VALID_JUMP_TARGET 2884*c0909341SAndroid Build Coastguard Worker.ifc \type, put 2885*c0909341SAndroid Build Coastguard Worker cmp \h, #2 2886*c0909341SAndroid Build Coastguard Worker add \ds2, \dst, \d_strd 2887*c0909341SAndroid Build Coastguard Worker add \sr2, \src, \s_strd 2888*c0909341SAndroid Build Coastguard Worker lsl \s_strd, \s_strd, #1 2889*c0909341SAndroid Build Coastguard Worker lsl \d_strd, \d_strd, #1 2890*c0909341SAndroid Build Coastguard Worker 2891*c0909341SAndroid Build Coastguard Worker // 2x2 v 2892*c0909341SAndroid Build Coastguard Worker ld1r {v16.8h}, [\src], \s_strd 2893*c0909341SAndroid Build Coastguard Worker b.gt 24f 2894*c0909341SAndroid Build Coastguard Worker22: 2895*c0909341SAndroid Build Coastguard Worker ld1r {v17.8h}, [\sr2], \s_strd 2896*c0909341SAndroid Build Coastguard Worker ld1r {v18.8h}, [\src], \s_strd 2897*c0909341SAndroid Build Coastguard Worker trn1 v16.4h, v16.4h, v17.4h 2898*c0909341SAndroid Build Coastguard Worker trn1 v17.4h, v17.4h, v18.4h 2899*c0909341SAndroid Build Coastguard Worker umull v4.8h, v16.8b, v2.8b 2900*c0909341SAndroid Build Coastguard Worker umlal v4.8h, v17.8b, v3.8b 2901*c0909341SAndroid Build Coastguard Worker uqrshrn v4.8b, v4.8h, #4 2902*c0909341SAndroid Build Coastguard Worker str h4, [\dst] 2903*c0909341SAndroid Build Coastguard Worker st1 {v4.h}[1], [\ds2] 2904*c0909341SAndroid Build Coastguard Worker ret 2905*c0909341SAndroid Build Coastguard Worker24: // 2x4, 2x6, 2x8, ... v 2906*c0909341SAndroid Build Coastguard Worker ld1r {v17.8h}, [\sr2], \s_strd 2907*c0909341SAndroid Build Coastguard Worker ld1r {v18.8h}, [\src], \s_strd 2908*c0909341SAndroid Build Coastguard Worker ld1r {v19.8h}, [\sr2], \s_strd 2909*c0909341SAndroid Build Coastguard Worker ld1r {v20.8h}, [\src], \s_strd 2910*c0909341SAndroid Build Coastguard Worker sub \h, \h, #4 2911*c0909341SAndroid Build Coastguard Worker trn1 v16.4h, v16.4h, v17.4h 2912*c0909341SAndroid Build Coastguard Worker trn1 v17.4h, v17.4h, v18.4h 2913*c0909341SAndroid Build Coastguard Worker trn1 v18.4h, v18.4h, v19.4h 2914*c0909341SAndroid Build Coastguard Worker trn1 v19.4h, v19.4h, v20.4h 2915*c0909341SAndroid Build Coastguard Worker trn1 v16.2s, v16.2s, v18.2s 2916*c0909341SAndroid Build Coastguard Worker trn1 v17.2s, v17.2s, v19.2s 2917*c0909341SAndroid Build Coastguard Worker umull v4.8h, v16.8b, v2.8b 2918*c0909341SAndroid Build Coastguard Worker umlal v4.8h, v17.8b, v3.8b 2919*c0909341SAndroid Build Coastguard Worker cmp \h, #2 2920*c0909341SAndroid Build Coastguard Worker uqrshrn v4.8b, v4.8h, #4 2921*c0909341SAndroid Build Coastguard Worker st1 {v4.h}[0], [\dst], \d_strd 2922*c0909341SAndroid Build Coastguard Worker st1 {v4.h}[1], [\ds2], \d_strd 2923*c0909341SAndroid Build Coastguard Worker st1 {v4.h}[2], [\dst], \d_strd 2924*c0909341SAndroid Build Coastguard Worker st1 {v4.h}[3], [\ds2], \d_strd 2925*c0909341SAndroid Build Coastguard Worker b.lt 0f 2926*c0909341SAndroid Build Coastguard Worker mov v16.8b, v20.8b 2927*c0909341SAndroid Build Coastguard Worker b.eq 22b 2928*c0909341SAndroid Build Coastguard Worker b 24b 2929*c0909341SAndroid Build Coastguard Worker0: 2930*c0909341SAndroid Build Coastguard Worker ret 2931*c0909341SAndroid Build Coastguard Worker.endif 2932*c0909341SAndroid Build Coastguard Worker 2933*c0909341SAndroid Build Coastguard Worker40: // 4xN v 2934*c0909341SAndroid Build Coastguard Worker AARCH64_VALID_JUMP_TARGET 2935*c0909341SAndroid Build Coastguard Worker add \ds2, \dst, \d_strd 2936*c0909341SAndroid Build Coastguard Worker add \sr2, \src, \s_strd 2937*c0909341SAndroid Build Coastguard Worker lsl \s_strd, \s_strd, #1 2938*c0909341SAndroid Build Coastguard Worker lsl \d_strd, \d_strd, #1 2939*c0909341SAndroid Build Coastguard Worker ld1r {v16.4s}, [\src], \s_strd 2940*c0909341SAndroid Build Coastguard Worker4: 2941*c0909341SAndroid Build Coastguard Worker ld1r {v17.4s}, [\sr2], \s_strd 2942*c0909341SAndroid Build Coastguard Worker ld1r {v18.4s}, [\src], \s_strd 2943*c0909341SAndroid Build Coastguard Worker trn1 v16.2s, v16.2s, v17.2s 2944*c0909341SAndroid Build Coastguard Worker trn1 v17.2s, v17.2s, v18.2s 2945*c0909341SAndroid Build Coastguard Worker umull v4.8h, v16.8b, v2.8b 2946*c0909341SAndroid Build Coastguard Worker umlal v4.8h, v17.8b, v3.8b 2947*c0909341SAndroid Build Coastguard Worker subs \h, \h, #2 2948*c0909341SAndroid Build Coastguard Worker.ifc \type, put 2949*c0909341SAndroid Build Coastguard Worker uqrshrn v4.8b, v4.8h, #4 2950*c0909341SAndroid Build Coastguard Worker st1 {v4.s}[0], [\dst], \d_strd 2951*c0909341SAndroid Build Coastguard Worker st1 {v4.s}[1], [\ds2], \d_strd 2952*c0909341SAndroid Build Coastguard Worker.else 2953*c0909341SAndroid Build Coastguard Worker st1 {v4.8b}, [\dst], \d_strd 2954*c0909341SAndroid Build Coastguard Worker st1 {v4.d}[1], [\ds2], \d_strd 2955*c0909341SAndroid Build Coastguard Worker.endif 2956*c0909341SAndroid Build Coastguard Worker b.le 0f 2957*c0909341SAndroid Build Coastguard Worker mov v16.8b, v18.8b 2958*c0909341SAndroid Build Coastguard Worker b 4b 2959*c0909341SAndroid Build Coastguard Worker0: 2960*c0909341SAndroid Build Coastguard Worker ret 2961*c0909341SAndroid Build Coastguard Worker 2962*c0909341SAndroid Build Coastguard Worker80: // 8xN v 2963*c0909341SAndroid Build Coastguard Worker AARCH64_VALID_JUMP_TARGET 2964*c0909341SAndroid Build Coastguard Worker add \ds2, \dst, \d_strd 2965*c0909341SAndroid Build Coastguard Worker add \sr2, \src, \s_strd 2966*c0909341SAndroid Build Coastguard Worker lsl \s_strd, \s_strd, #1 2967*c0909341SAndroid Build Coastguard Worker lsl \d_strd, \d_strd, #1 2968*c0909341SAndroid Build Coastguard Worker ld1 {v16.8b}, [\src], \s_strd 2969*c0909341SAndroid Build Coastguard Worker8: 2970*c0909341SAndroid Build Coastguard Worker ld1 {v17.8b}, [\sr2], \s_strd 2971*c0909341SAndroid Build Coastguard Worker ld1 {v18.8b}, [\src], \s_strd 2972*c0909341SAndroid Build Coastguard Worker umull v4.8h, v16.8b, v2.8b 2973*c0909341SAndroid Build Coastguard Worker umull v5.8h, v17.8b, v2.8b 2974*c0909341SAndroid Build Coastguard Worker umlal v4.8h, v17.8b, v3.8b 2975*c0909341SAndroid Build Coastguard Worker umlal v5.8h, v18.8b, v3.8b 2976*c0909341SAndroid Build Coastguard Worker subs \h, \h, #2 2977*c0909341SAndroid Build Coastguard Worker.ifc \type, put 2978*c0909341SAndroid Build Coastguard Worker uqrshrn v4.8b, v4.8h, #4 2979*c0909341SAndroid Build Coastguard Worker uqrshrn v5.8b, v5.8h, #4 2980*c0909341SAndroid Build Coastguard Worker st1 {v4.8b}, [\dst], \d_strd 2981*c0909341SAndroid Build Coastguard Worker st1 {v5.8b}, [\ds2], \d_strd 2982*c0909341SAndroid Build Coastguard Worker.else 2983*c0909341SAndroid Build Coastguard Worker st1 {v4.8h}, [\dst], \d_strd 2984*c0909341SAndroid Build Coastguard Worker st1 {v5.8h}, [\ds2], \d_strd 2985*c0909341SAndroid Build Coastguard Worker.endif 2986*c0909341SAndroid Build Coastguard Worker b.le 0f 2987*c0909341SAndroid Build Coastguard Worker mov v16.8b, v18.8b 2988*c0909341SAndroid Build Coastguard Worker b 8b 2989*c0909341SAndroid Build Coastguard Worker0: 2990*c0909341SAndroid Build Coastguard Worker ret 2991*c0909341SAndroid Build Coastguard Worker 2992*c0909341SAndroid Build Coastguard Worker160: // 16xN, 32xN, ... 2993*c0909341SAndroid Build Coastguard Worker320: 2994*c0909341SAndroid Build Coastguard Worker640: 2995*c0909341SAndroid Build Coastguard Worker1280: 2996*c0909341SAndroid Build Coastguard Worker AARCH64_VALID_JUMP_TARGET 2997*c0909341SAndroid Build Coastguard Worker mov \my, \h 2998*c0909341SAndroid Build Coastguard Worker1: 2999*c0909341SAndroid Build Coastguard Worker add \ds2, \dst, \d_strd 3000*c0909341SAndroid Build Coastguard Worker add \sr2, \src, \s_strd 3001*c0909341SAndroid Build Coastguard Worker lsl \s_strd, \s_strd, #1 3002*c0909341SAndroid Build Coastguard Worker lsl \d_strd, \d_strd, #1 3003*c0909341SAndroid Build Coastguard Worker 3004*c0909341SAndroid Build Coastguard Worker ld1 {v16.16b}, [\src], \s_strd 3005*c0909341SAndroid Build Coastguard Worker2: 3006*c0909341SAndroid Build Coastguard Worker ld1 {v17.16b}, [\sr2], \s_strd 3007*c0909341SAndroid Build Coastguard Worker ld1 {v18.16b}, [\src], \s_strd 3008*c0909341SAndroid Build Coastguard Worker umull v4.8h, v16.8b, v2.8b 3009*c0909341SAndroid Build Coastguard Worker umull2 v5.8h, v16.16b, v2.16b 3010*c0909341SAndroid Build Coastguard Worker umull v6.8h, v17.8b, v2.8b 3011*c0909341SAndroid Build Coastguard Worker umull2 v7.8h, v17.16b, v2.16b 3012*c0909341SAndroid Build Coastguard Worker umlal v4.8h, v17.8b, v3.8b 3013*c0909341SAndroid Build Coastguard Worker umlal2 v5.8h, v17.16b, v3.16b 3014*c0909341SAndroid Build Coastguard Worker umlal v6.8h, v18.8b, v3.8b 3015*c0909341SAndroid Build Coastguard Worker umlal2 v7.8h, v18.16b, v3.16b 3016*c0909341SAndroid Build Coastguard Worker subs \h, \h, #2 3017*c0909341SAndroid Build Coastguard Worker.ifc \type, put 3018*c0909341SAndroid Build Coastguard Worker uqrshrn v4.8b, v4.8h, #4 3019*c0909341SAndroid Build Coastguard Worker uqrshrn2 v4.16b, v5.8h, #4 3020*c0909341SAndroid Build Coastguard Worker uqrshrn v6.8b, v6.8h, #4 3021*c0909341SAndroid Build Coastguard Worker uqrshrn2 v6.16b, v7.8h, #4 3022*c0909341SAndroid Build Coastguard Worker st1 {v4.16b}, [\dst], \d_strd 3023*c0909341SAndroid Build Coastguard Worker st1 {v6.16b}, [\ds2], \d_strd 3024*c0909341SAndroid Build Coastguard Worker.else 3025*c0909341SAndroid Build Coastguard Worker st1 {v4.8h, v5.8h}, [\dst], \d_strd 3026*c0909341SAndroid Build Coastguard Worker st1 {v6.8h, v7.8h}, [\ds2], \d_strd 3027*c0909341SAndroid Build Coastguard Worker.endif 3028*c0909341SAndroid Build Coastguard Worker b.le 9f 3029*c0909341SAndroid Build Coastguard Worker mov v16.16b, v18.16b 3030*c0909341SAndroid Build Coastguard Worker b 2b 3031*c0909341SAndroid Build Coastguard Worker9: 3032*c0909341SAndroid Build Coastguard Worker subs \w, \w, #16 3033*c0909341SAndroid Build Coastguard Worker b.le 0f 3034*c0909341SAndroid Build Coastguard Worker asr \s_strd, \s_strd, #1 3035*c0909341SAndroid Build Coastguard Worker asr \d_strd, \d_strd, #1 3036*c0909341SAndroid Build Coastguard Worker msub \src, \s_strd, \xmy, \src 3037*c0909341SAndroid Build Coastguard Worker msub \dst, \d_strd, \xmy, \dst 3038*c0909341SAndroid Build Coastguard Worker sub \src, \src, \s_strd, lsl #1 3039*c0909341SAndroid Build Coastguard Worker mov \h, \my 3040*c0909341SAndroid Build Coastguard Worker add \src, \src, #16 3041*c0909341SAndroid Build Coastguard Worker.ifc \type, put 3042*c0909341SAndroid Build Coastguard Worker add \dst, \dst, #16 3043*c0909341SAndroid Build Coastguard Worker.else 3044*c0909341SAndroid Build Coastguard Worker add \dst, \dst, #32 3045*c0909341SAndroid Build Coastguard Worker.endif 3046*c0909341SAndroid Build Coastguard Worker b 1b 3047*c0909341SAndroid Build Coastguard Worker0: 3048*c0909341SAndroid Build Coastguard Worker ret 3049*c0909341SAndroid Build Coastguard Workerendfunc 3050*c0909341SAndroid Build Coastguard Worker 3051*c0909341SAndroid Build Coastguard Workerjumptable \type\()_bilin_v_tbl 3052*c0909341SAndroid Build Coastguard Worker .word 1280b - \type\()_bilin_v_tbl 3053*c0909341SAndroid Build Coastguard Worker .word 640b - \type\()_bilin_v_tbl 3054*c0909341SAndroid Build Coastguard Worker .word 320b - \type\()_bilin_v_tbl 3055*c0909341SAndroid Build Coastguard Worker .word 160b - \type\()_bilin_v_tbl 3056*c0909341SAndroid Build Coastguard Worker .word 80b - \type\()_bilin_v_tbl 3057*c0909341SAndroid Build Coastguard Worker .word 40b - \type\()_bilin_v_tbl 3058*c0909341SAndroid Build Coastguard Worker .word 20b - \type\()_bilin_v_tbl 3059*c0909341SAndroid Build Coastguard Workerendjumptable 3060*c0909341SAndroid Build Coastguard Worker 3061*c0909341SAndroid Build Coastguard Workerfunction L(\type\()_bilin_hv) 3062*c0909341SAndroid Build Coastguard Worker uxtl v2.8h, v2.8b 3063*c0909341SAndroid Build Coastguard Worker uxtl v3.8h, v3.8b 3064*c0909341SAndroid Build Coastguard Worker movrel x9, \type\()_bilin_hv_tbl 3065*c0909341SAndroid Build Coastguard Worker ldrsw x8, [x9, x8, lsl #2] 3066*c0909341SAndroid Build Coastguard Worker add x9, x9, x8 3067*c0909341SAndroid Build Coastguard Worker br x9 3068*c0909341SAndroid Build Coastguard Worker 3069*c0909341SAndroid Build Coastguard Worker20: // 2xN hv 3070*c0909341SAndroid Build Coastguard Worker AARCH64_VALID_JUMP_TARGET 3071*c0909341SAndroid Build Coastguard Worker.ifc \type, put 3072*c0909341SAndroid Build Coastguard Worker add \sr2, \src, \s_strd 3073*c0909341SAndroid Build Coastguard Worker add \ds2, \dst, \d_strd 3074*c0909341SAndroid Build Coastguard Worker lsl \s_strd, \s_strd, #1 3075*c0909341SAndroid Build Coastguard Worker lsl \d_strd, \d_strd, #1 3076*c0909341SAndroid Build Coastguard Worker 3077*c0909341SAndroid Build Coastguard Worker ld1r {v28.4s}, [\src], \s_strd 3078*c0909341SAndroid Build Coastguard Worker ext v29.8b, v28.8b, v28.8b, #1 3079*c0909341SAndroid Build Coastguard Worker umull v16.8h, v28.8b, v0.8b 3080*c0909341SAndroid Build Coastguard Worker umlal v16.8h, v29.8b, v1.8b 3081*c0909341SAndroid Build Coastguard Worker 3082*c0909341SAndroid Build Coastguard Worker2: 3083*c0909341SAndroid Build Coastguard Worker ld1r {v28.4s}, [\sr2], \s_strd 3084*c0909341SAndroid Build Coastguard Worker ld1r {v30.4s}, [\src], \s_strd 3085*c0909341SAndroid Build Coastguard Worker ext v29.8b, v28.8b, v28.8b, #1 3086*c0909341SAndroid Build Coastguard Worker ext v31.8b, v30.8b, v30.8b, #1 3087*c0909341SAndroid Build Coastguard Worker trn1 v28.4h, v28.4h, v30.4h 3088*c0909341SAndroid Build Coastguard Worker trn1 v29.4h, v29.4h, v31.4h 3089*c0909341SAndroid Build Coastguard Worker umull v17.8h, v28.8b, v0.8b 3090*c0909341SAndroid Build Coastguard Worker umlal v17.8h, v29.8b, v1.8b 3091*c0909341SAndroid Build Coastguard Worker 3092*c0909341SAndroid Build Coastguard Worker trn1 v16.2s, v16.2s, v17.2s 3093*c0909341SAndroid Build Coastguard Worker 3094*c0909341SAndroid Build Coastguard Worker mul v4.4h, v16.4h, v2.4h 3095*c0909341SAndroid Build Coastguard Worker mla v4.4h, v17.4h, v3.4h 3096*c0909341SAndroid Build Coastguard Worker uqrshrn v4.8b, v4.8h, #8 3097*c0909341SAndroid Build Coastguard Worker subs \h, \h, #2 3098*c0909341SAndroid Build Coastguard Worker st1 {v4.h}[0], [\dst], \d_strd 3099*c0909341SAndroid Build Coastguard Worker st1 {v4.h}[1], [\ds2], \d_strd 3100*c0909341SAndroid Build Coastguard Worker b.le 0f 3101*c0909341SAndroid Build Coastguard Worker trn2 v16.2s, v17.2s, v17.2s 3102*c0909341SAndroid Build Coastguard Worker b 2b 3103*c0909341SAndroid Build Coastguard Worker0: 3104*c0909341SAndroid Build Coastguard Worker ret 3105*c0909341SAndroid Build Coastguard Worker.endif 3106*c0909341SAndroid Build Coastguard Worker 3107*c0909341SAndroid Build Coastguard Worker40: // 4xN hv 3108*c0909341SAndroid Build Coastguard Worker AARCH64_VALID_JUMP_TARGET 3109*c0909341SAndroid Build Coastguard Worker add \sr2, \src, \s_strd 3110*c0909341SAndroid Build Coastguard Worker add \ds2, \dst, \d_strd 3111*c0909341SAndroid Build Coastguard Worker lsl \s_strd, \s_strd, #1 3112*c0909341SAndroid Build Coastguard Worker lsl \d_strd, \d_strd, #1 3113*c0909341SAndroid Build Coastguard Worker 3114*c0909341SAndroid Build Coastguard Worker ld1 {v28.8b}, [\src], \s_strd 3115*c0909341SAndroid Build Coastguard Worker ext v29.8b, v28.8b, v28.8b, #1 3116*c0909341SAndroid Build Coastguard Worker umull v16.8h, v28.8b, v0.8b 3117*c0909341SAndroid Build Coastguard Worker umlal v16.8h, v29.8b, v1.8b 3118*c0909341SAndroid Build Coastguard Worker 3119*c0909341SAndroid Build Coastguard Worker4: 3120*c0909341SAndroid Build Coastguard Worker ld1 {v28.8b}, [\sr2], \s_strd 3121*c0909341SAndroid Build Coastguard Worker ld1 {v30.8b}, [\src], \s_strd 3122*c0909341SAndroid Build Coastguard Worker ext v29.8b, v28.8b, v28.8b, #1 3123*c0909341SAndroid Build Coastguard Worker ext v31.8b, v30.8b, v30.8b, #1 3124*c0909341SAndroid Build Coastguard Worker trn1 v28.2s, v28.2s, v30.2s 3125*c0909341SAndroid Build Coastguard Worker trn1 v29.2s, v29.2s, v31.2s 3126*c0909341SAndroid Build Coastguard Worker umull v17.8h, v28.8b, v0.8b 3127*c0909341SAndroid Build Coastguard Worker umlal v17.8h, v29.8b, v1.8b 3128*c0909341SAndroid Build Coastguard Worker 3129*c0909341SAndroid Build Coastguard Worker trn1 v16.2d, v16.2d, v17.2d 3130*c0909341SAndroid Build Coastguard Worker 3131*c0909341SAndroid Build Coastguard Worker mul v4.8h, v16.8h, v2.8h 3132*c0909341SAndroid Build Coastguard Worker mla v4.8h, v17.8h, v3.8h 3133*c0909341SAndroid Build Coastguard Worker subs \h, \h, #2 3134*c0909341SAndroid Build Coastguard Worker.ifc \type, put 3135*c0909341SAndroid Build Coastguard Worker uqrshrn v4.8b, v4.8h, #8 3136*c0909341SAndroid Build Coastguard Worker st1 {v4.s}[0], [\dst], \d_strd 3137*c0909341SAndroid Build Coastguard Worker st1 {v4.s}[1], [\ds2], \d_strd 3138*c0909341SAndroid Build Coastguard Worker.else 3139*c0909341SAndroid Build Coastguard Worker urshr v4.8h, v4.8h, #4 3140*c0909341SAndroid Build Coastguard Worker st1 {v4.8b}, [\dst], \d_strd 3141*c0909341SAndroid Build Coastguard Worker st1 {v4.d}[1], [\ds2], \d_strd 3142*c0909341SAndroid Build Coastguard Worker.endif 3143*c0909341SAndroid Build Coastguard Worker b.le 0f 3144*c0909341SAndroid Build Coastguard Worker trn2 v16.2d, v17.2d, v17.2d 3145*c0909341SAndroid Build Coastguard Worker b 4b 3146*c0909341SAndroid Build Coastguard Worker0: 3147*c0909341SAndroid Build Coastguard Worker ret 3148*c0909341SAndroid Build Coastguard Worker 3149*c0909341SAndroid Build Coastguard Worker80: // 8xN, 16xN, ... hv 3150*c0909341SAndroid Build Coastguard Worker160: 3151*c0909341SAndroid Build Coastguard Worker320: 3152*c0909341SAndroid Build Coastguard Worker640: 3153*c0909341SAndroid Build Coastguard Worker1280: 3154*c0909341SAndroid Build Coastguard Worker AARCH64_VALID_JUMP_TARGET 3155*c0909341SAndroid Build Coastguard Worker mov \my, \h 3156*c0909341SAndroid Build Coastguard Worker 3157*c0909341SAndroid Build Coastguard Worker1: 3158*c0909341SAndroid Build Coastguard Worker add \sr2, \src, \s_strd 3159*c0909341SAndroid Build Coastguard Worker add \ds2, \dst, \d_strd 3160*c0909341SAndroid Build Coastguard Worker lsl \s_strd, \s_strd, #1 3161*c0909341SAndroid Build Coastguard Worker lsl \d_strd, \d_strd, #1 3162*c0909341SAndroid Build Coastguard Worker 3163*c0909341SAndroid Build Coastguard Worker ld1 {v28.16b}, [\src], \s_strd 3164*c0909341SAndroid Build Coastguard Worker ext v29.16b, v28.16b, v28.16b, #1 3165*c0909341SAndroid Build Coastguard Worker umull v16.8h, v28.8b, v0.8b 3166*c0909341SAndroid Build Coastguard Worker umlal v16.8h, v29.8b, v1.8b 3167*c0909341SAndroid Build Coastguard Worker 3168*c0909341SAndroid Build Coastguard Worker2: 3169*c0909341SAndroid Build Coastguard Worker ld1 {v28.16b}, [\sr2], \s_strd 3170*c0909341SAndroid Build Coastguard Worker ld1 {v30.16b}, [\src], \s_strd 3171*c0909341SAndroid Build Coastguard Worker ext v29.16b, v28.16b, v28.16b, #1 3172*c0909341SAndroid Build Coastguard Worker ext v31.16b, v30.16b, v30.16b, #1 3173*c0909341SAndroid Build Coastguard Worker umull v17.8h, v28.8b, v0.8b 3174*c0909341SAndroid Build Coastguard Worker umlal v17.8h, v29.8b, v1.8b 3175*c0909341SAndroid Build Coastguard Worker umull v18.8h, v30.8b, v0.8b 3176*c0909341SAndroid Build Coastguard Worker umlal v18.8h, v31.8b, v1.8b 3177*c0909341SAndroid Build Coastguard Worker 3178*c0909341SAndroid Build Coastguard Worker mul v4.8h, v16.8h, v2.8h 3179*c0909341SAndroid Build Coastguard Worker mla v4.8h, v17.8h, v3.8h 3180*c0909341SAndroid Build Coastguard Worker mul v5.8h, v17.8h, v2.8h 3181*c0909341SAndroid Build Coastguard Worker mla v5.8h, v18.8h, v3.8h 3182*c0909341SAndroid Build Coastguard Worker subs \h, \h, #2 3183*c0909341SAndroid Build Coastguard Worker.ifc \type, put 3184*c0909341SAndroid Build Coastguard Worker uqrshrn v4.8b, v4.8h, #8 3185*c0909341SAndroid Build Coastguard Worker uqrshrn v5.8b, v5.8h, #8 3186*c0909341SAndroid Build Coastguard Worker st1 {v4.8b}, [\dst], \d_strd 3187*c0909341SAndroid Build Coastguard Worker st1 {v5.8b}, [\ds2], \d_strd 3188*c0909341SAndroid Build Coastguard Worker.else 3189*c0909341SAndroid Build Coastguard Worker urshr v4.8h, v4.8h, #4 3190*c0909341SAndroid Build Coastguard Worker urshr v5.8h, v5.8h, #4 3191*c0909341SAndroid Build Coastguard Worker st1 {v4.8h}, [\dst], \d_strd 3192*c0909341SAndroid Build Coastguard Worker st1 {v5.8h}, [\ds2], \d_strd 3193*c0909341SAndroid Build Coastguard Worker.endif 3194*c0909341SAndroid Build Coastguard Worker b.le 9f 3195*c0909341SAndroid Build Coastguard Worker mov v16.16b, v18.16b 3196*c0909341SAndroid Build Coastguard Worker b 2b 3197*c0909341SAndroid Build Coastguard Worker9: 3198*c0909341SAndroid Build Coastguard Worker subs \w, \w, #8 3199*c0909341SAndroid Build Coastguard Worker b.le 0f 3200*c0909341SAndroid Build Coastguard Worker asr \s_strd, \s_strd, #1 3201*c0909341SAndroid Build Coastguard Worker asr \d_strd, \d_strd, #1 3202*c0909341SAndroid Build Coastguard Worker msub \src, \s_strd, \xmy, \src 3203*c0909341SAndroid Build Coastguard Worker msub \dst, \d_strd, \xmy, \dst 3204*c0909341SAndroid Build Coastguard Worker sub \src, \src, \s_strd, lsl #1 3205*c0909341SAndroid Build Coastguard Worker mov \h, \my 3206*c0909341SAndroid Build Coastguard Worker add \src, \src, #8 3207*c0909341SAndroid Build Coastguard Worker.ifc \type, put 3208*c0909341SAndroid Build Coastguard Worker add \dst, \dst, #8 3209*c0909341SAndroid Build Coastguard Worker.else 3210*c0909341SAndroid Build Coastguard Worker add \dst, \dst, #16 3211*c0909341SAndroid Build Coastguard Worker.endif 3212*c0909341SAndroid Build Coastguard Worker b 1b 3213*c0909341SAndroid Build Coastguard Worker0: 3214*c0909341SAndroid Build Coastguard Worker ret 3215*c0909341SAndroid Build Coastguard Workerendfunc 3216*c0909341SAndroid Build Coastguard Worker 3217*c0909341SAndroid Build Coastguard Workerjumptable \type\()_bilin_hv_tbl 3218*c0909341SAndroid Build Coastguard Worker .word 1280b - \type\()_bilin_hv_tbl 3219*c0909341SAndroid Build Coastguard Worker .word 640b - \type\()_bilin_hv_tbl 3220*c0909341SAndroid Build Coastguard Worker .word 320b - \type\()_bilin_hv_tbl 3221*c0909341SAndroid Build Coastguard Worker .word 160b - \type\()_bilin_hv_tbl 3222*c0909341SAndroid Build Coastguard Worker .word 80b - \type\()_bilin_hv_tbl 3223*c0909341SAndroid Build Coastguard Worker .word 40b - \type\()_bilin_hv_tbl 3224*c0909341SAndroid Build Coastguard Worker .word 20b - \type\()_bilin_hv_tbl 3225*c0909341SAndroid Build Coastguard Workerendjumptable 3226*c0909341SAndroid Build Coastguard Worker.endm 3227*c0909341SAndroid Build Coastguard Worker 3228*c0909341SAndroid Build Coastguard Workermake_8tap_fn put, regular_sharp, REGULAR, SHARP, 8tap 3229*c0909341SAndroid Build Coastguard Workermake_8tap_fn put, smooth_sharp, SMOOTH, SHARP, 8tap 3230*c0909341SAndroid Build Coastguard Workermake_8tap_fn put, sharp, SHARP, SHARP, 8tap 3231*c0909341SAndroid Build Coastguard Workermake_8tap_fn put, sharp_regular, SHARP, REGULAR, 8tap 3232*c0909341SAndroid Build Coastguard Workermake_8tap_fn put, sharp_smooth, SHARP, SMOOTH, 8tap 3233*c0909341SAndroid Build Coastguard Workerfilter_fn put, x0, x1, x2, x3, w4, w5, w6, x6, w7, x7, x8, x9, 10, 8tap 3234*c0909341SAndroid Build Coastguard Worker 3235*c0909341SAndroid Build Coastguard Workermake_8tap_fn put, regular, REGULAR, REGULAR, 6tap 3236*c0909341SAndroid Build Coastguard Workermake_8tap_fn put, regular_smooth, REGULAR, SMOOTH, 6tap 3237*c0909341SAndroid Build Coastguard Workermake_8tap_fn put, smooth, SMOOTH, SMOOTH, 6tap 3238*c0909341SAndroid Build Coastguard Workermake_8tap_fn put, smooth_regular, SMOOTH, REGULAR, 6tap 3239*c0909341SAndroid Build Coastguard Workerfilter_fn put, x0, x1, x2, x3, w4, w5, w6, x6, w7, x7, x8, x9, 10, 6tap 3240*c0909341SAndroid Build Coastguard Workerfilter_bilin_fn put, x0, x1, x2, x3, w4, w5, w6, x6, w7, x7, x8, x9, 10 3241*c0909341SAndroid Build Coastguard Worker 3242*c0909341SAndroid Build Coastguard Workermake_8tap_fn prep, regular_sharp, REGULAR, SHARP, 8tap 3243*c0909341SAndroid Build Coastguard Workermake_8tap_fn prep, smooth_sharp, SMOOTH, SHARP, 8tap 3244*c0909341SAndroid Build Coastguard Workermake_8tap_fn prep, sharp, SHARP, SHARP, 8tap 3245*c0909341SAndroid Build Coastguard Workermake_8tap_fn prep, sharp_regular, SHARP, REGULAR, 8tap 3246*c0909341SAndroid Build Coastguard Workermake_8tap_fn prep, sharp_smooth, SHARP, SMOOTH, 8tap 3247*c0909341SAndroid Build Coastguard Workerfilter_fn prep, x0, x7, x1, x2, w3, w4, w5, x5, w6, x6, x8, x9, 6, 8tap 3248*c0909341SAndroid Build Coastguard Worker 3249*c0909341SAndroid Build Coastguard Workermake_8tap_fn prep, regular, REGULAR, REGULAR, 6tap 3250*c0909341SAndroid Build Coastguard Workermake_8tap_fn prep, regular_smooth, REGULAR, SMOOTH, 6tap 3251*c0909341SAndroid Build Coastguard Workermake_8tap_fn prep, smooth, SMOOTH, SMOOTH, 6tap 3252*c0909341SAndroid Build Coastguard Workermake_8tap_fn prep, smooth_regular, SMOOTH, REGULAR, 6tap 3253*c0909341SAndroid Build Coastguard Workerfilter_fn prep, x0, x7, x1, x2, w3, w4, w5, x5, w6, x6, x8, x9, 6, 6tap 3254*c0909341SAndroid Build Coastguard Workerfilter_bilin_fn prep, x0, x7, x1, x2, w3, w4, w5, x5, w6, x6, x8, x9, 6 3255*c0909341SAndroid Build Coastguard Worker 3256*c0909341SAndroid Build Coastguard Worker 3257*c0909341SAndroid Build Coastguard Worker.macro load_filter_row dst, src, inc 3258*c0909341SAndroid Build Coastguard Worker asr w13, \src, #10 3259*c0909341SAndroid Build Coastguard Worker add \src, \src, \inc 3260*c0909341SAndroid Build Coastguard Worker ldr \dst, [x11, w13, sxtw #3] 3261*c0909341SAndroid Build Coastguard Worker.endm 3262*c0909341SAndroid Build Coastguard Worker 3263*c0909341SAndroid Build Coastguard Workerfunction warp_filter_horz_neon 3264*c0909341SAndroid Build Coastguard Worker add w12, w5, #512 3265*c0909341SAndroid Build Coastguard Worker 3266*c0909341SAndroid Build Coastguard Worker ld1 {v16.8b, v17.8b}, [x2], x3 3267*c0909341SAndroid Build Coastguard Worker 3268*c0909341SAndroid Build Coastguard Worker load_filter_row d0, w12, w7 3269*c0909341SAndroid Build Coastguard Worker load_filter_row d1, w12, w7 3270*c0909341SAndroid Build Coastguard Worker load_filter_row d2, w12, w7 3271*c0909341SAndroid Build Coastguard Worker load_filter_row d3, w12, w7 3272*c0909341SAndroid Build Coastguard Worker load_filter_row d4, w12, w7 3273*c0909341SAndroid Build Coastguard Worker load_filter_row d5, w12, w7 3274*c0909341SAndroid Build Coastguard Worker load_filter_row d6, w12, w7 3275*c0909341SAndroid Build Coastguard Worker // subtract by 128 to allow using smull 3276*c0909341SAndroid Build Coastguard Worker eor v16.8b, v16.8b, v22.8b 3277*c0909341SAndroid Build Coastguard Worker eor v17.8b, v17.8b, v22.8b 3278*c0909341SAndroid Build Coastguard Worker load_filter_row d7, w12, w7 3279*c0909341SAndroid Build Coastguard Worker 3280*c0909341SAndroid Build Coastguard Worker ext v18.8b, v16.8b, v17.8b, #1 3281*c0909341SAndroid Build Coastguard Worker ext v19.8b, v16.8b, v17.8b, #2 3282*c0909341SAndroid Build Coastguard Worker smull v0.8h, v0.8b, v16.8b 3283*c0909341SAndroid Build Coastguard Worker smull v1.8h, v1.8b, v18.8b 3284*c0909341SAndroid Build Coastguard Worker ext v18.8b, v16.8b, v17.8b, #3 3285*c0909341SAndroid Build Coastguard Worker ext v20.8b, v16.8b, v17.8b, #4 3286*c0909341SAndroid Build Coastguard Worker smull v2.8h, v2.8b, v19.8b 3287*c0909341SAndroid Build Coastguard Worker smull v3.8h, v3.8b, v18.8b 3288*c0909341SAndroid Build Coastguard Worker ext v18.8b, v16.8b, v17.8b, #5 3289*c0909341SAndroid Build Coastguard Worker ext v19.8b, v16.8b, v17.8b, #6 3290*c0909341SAndroid Build Coastguard Worker smull v4.8h, v4.8b, v20.8b 3291*c0909341SAndroid Build Coastguard Worker smull v5.8h, v5.8b, v18.8b 3292*c0909341SAndroid Build Coastguard Worker ext v18.8b, v16.8b, v17.8b, #7 3293*c0909341SAndroid Build Coastguard Worker smull v6.8h, v6.8b, v19.8b 3294*c0909341SAndroid Build Coastguard Worker smull v7.8h, v7.8b, v18.8b 3295*c0909341SAndroid Build Coastguard Worker 3296*c0909341SAndroid Build Coastguard Worker addp v0.8h, v0.8h, v1.8h 3297*c0909341SAndroid Build Coastguard Worker addp v2.8h, v2.8h, v3.8h 3298*c0909341SAndroid Build Coastguard Worker addp v4.8h, v4.8h, v5.8h 3299*c0909341SAndroid Build Coastguard Worker addp v6.8h, v6.8h, v7.8h 3300*c0909341SAndroid Build Coastguard Worker 3301*c0909341SAndroid Build Coastguard Worker addp v0.8h, v0.8h, v2.8h 3302*c0909341SAndroid Build Coastguard Worker addp v4.8h, v4.8h, v6.8h 3303*c0909341SAndroid Build Coastguard Worker 3304*c0909341SAndroid Build Coastguard Worker addp v0.8h, v0.8h, v4.8h 3305*c0909341SAndroid Build Coastguard Worker 3306*c0909341SAndroid Build Coastguard Worker add w5, w5, w8 3307*c0909341SAndroid Build Coastguard Worker 3308*c0909341SAndroid Build Coastguard Worker ret 3309*c0909341SAndroid Build Coastguard Workerendfunc 3310*c0909341SAndroid Build Coastguard Worker 3311*c0909341SAndroid Build Coastguard Worker// void dav1d_warp_affine_8x8_8bpc_neon( 3312*c0909341SAndroid Build Coastguard Worker// pixel *dst, const ptrdiff_t dst_stride, 3313*c0909341SAndroid Build Coastguard Worker// const pixel *src, const ptrdiff_t src_stride, 3314*c0909341SAndroid Build Coastguard Worker// const int16_t *const abcd, int mx, int my) 3315*c0909341SAndroid Build Coastguard Worker.macro warp t, shift 3316*c0909341SAndroid Build Coastguard Workerfunction warp_affine_8x8\t\()_8bpc_neon, export=1 3317*c0909341SAndroid Build Coastguard Worker ldr x4, [x4] 3318*c0909341SAndroid Build Coastguard Worker sbfx x7, x4, #0, #16 3319*c0909341SAndroid Build Coastguard Worker sbfx x8, x4, #16, #16 3320*c0909341SAndroid Build Coastguard Worker sbfx x9, x4, #32, #16 3321*c0909341SAndroid Build Coastguard Worker sbfx x4, x4, #48, #16 3322*c0909341SAndroid Build Coastguard Worker mov w10, #8 3323*c0909341SAndroid Build Coastguard Worker sub x2, x2, x3, lsl #1 3324*c0909341SAndroid Build Coastguard Worker sub x2, x2, x3 3325*c0909341SAndroid Build Coastguard Worker sub x2, x2, #3 3326*c0909341SAndroid Build Coastguard Worker movrel x11, X(mc_warp_filter), 64*8 3327*c0909341SAndroid Build Coastguard Worker mov x15, x30 3328*c0909341SAndroid Build Coastguard Worker.ifnb \t 3329*c0909341SAndroid Build Coastguard Worker lsl x1, x1, #1 3330*c0909341SAndroid Build Coastguard Worker.endif 3331*c0909341SAndroid Build Coastguard Worker 3332*c0909341SAndroid Build Coastguard Worker movi v22.8b, #128 3333*c0909341SAndroid Build Coastguard Worker.ifb \t 3334*c0909341SAndroid Build Coastguard Worker movi v23.8h, #128 3335*c0909341SAndroid Build Coastguard Worker.else 3336*c0909341SAndroid Build Coastguard Worker movi v23.8h, #8, lsl #8 3337*c0909341SAndroid Build Coastguard Worker.endif 3338*c0909341SAndroid Build Coastguard Worker 3339*c0909341SAndroid Build Coastguard Worker bl warp_filter_horz_neon 3340*c0909341SAndroid Build Coastguard Worker srshr v24.8h, v0.8h, #3 3341*c0909341SAndroid Build Coastguard Worker bl warp_filter_horz_neon 3342*c0909341SAndroid Build Coastguard Worker srshr v25.8h, v0.8h, #3 3343*c0909341SAndroid Build Coastguard Worker bl warp_filter_horz_neon 3344*c0909341SAndroid Build Coastguard Worker srshr v26.8h, v0.8h, #3 3345*c0909341SAndroid Build Coastguard Worker bl warp_filter_horz_neon 3346*c0909341SAndroid Build Coastguard Worker srshr v27.8h, v0.8h, #3 3347*c0909341SAndroid Build Coastguard Worker bl warp_filter_horz_neon 3348*c0909341SAndroid Build Coastguard Worker srshr v28.8h, v0.8h, #3 3349*c0909341SAndroid Build Coastguard Worker bl warp_filter_horz_neon 3350*c0909341SAndroid Build Coastguard Worker srshr v29.8h, v0.8h, #3 3351*c0909341SAndroid Build Coastguard Worker bl warp_filter_horz_neon 3352*c0909341SAndroid Build Coastguard Worker srshr v30.8h, v0.8h, #3 3353*c0909341SAndroid Build Coastguard Worker 3354*c0909341SAndroid Build Coastguard Worker1: 3355*c0909341SAndroid Build Coastguard Worker add w14, w6, #512 3356*c0909341SAndroid Build Coastguard Worker bl warp_filter_horz_neon 3357*c0909341SAndroid Build Coastguard Worker srshr v31.8h, v0.8h, #3 3358*c0909341SAndroid Build Coastguard Worker 3359*c0909341SAndroid Build Coastguard Worker load_filter_row d0, w14, w9 3360*c0909341SAndroid Build Coastguard Worker load_filter_row d1, w14, w9 3361*c0909341SAndroid Build Coastguard Worker load_filter_row d2, w14, w9 3362*c0909341SAndroid Build Coastguard Worker load_filter_row d3, w14, w9 3363*c0909341SAndroid Build Coastguard Worker load_filter_row d4, w14, w9 3364*c0909341SAndroid Build Coastguard Worker load_filter_row d5, w14, w9 3365*c0909341SAndroid Build Coastguard Worker load_filter_row d6, w14, w9 3366*c0909341SAndroid Build Coastguard Worker load_filter_row d7, w14, w9 3367*c0909341SAndroid Build Coastguard Worker transpose_8x8b_xtl v0, v1, v2, v3, v4, v5, v6, v7, sxtl 3368*c0909341SAndroid Build Coastguard Worker 3369*c0909341SAndroid Build Coastguard Worker // This ordering of smull/smlal/smull2/smlal2 is highly 3370*c0909341SAndroid Build Coastguard Worker // beneficial for Cortex A53 here. 3371*c0909341SAndroid Build Coastguard Worker smull v16.4s, v24.4h, v0.4h 3372*c0909341SAndroid Build Coastguard Worker smlal v16.4s, v25.4h, v1.4h 3373*c0909341SAndroid Build Coastguard Worker smlal v16.4s, v26.4h, v2.4h 3374*c0909341SAndroid Build Coastguard Worker smlal v16.4s, v27.4h, v3.4h 3375*c0909341SAndroid Build Coastguard Worker smlal v16.4s, v28.4h, v4.4h 3376*c0909341SAndroid Build Coastguard Worker smlal v16.4s, v29.4h, v5.4h 3377*c0909341SAndroid Build Coastguard Worker smlal v16.4s, v30.4h, v6.4h 3378*c0909341SAndroid Build Coastguard Worker smlal v16.4s, v31.4h, v7.4h 3379*c0909341SAndroid Build Coastguard Worker smull2 v17.4s, v24.8h, v0.8h 3380*c0909341SAndroid Build Coastguard Worker smlal2 v17.4s, v25.8h, v1.8h 3381*c0909341SAndroid Build Coastguard Worker smlal2 v17.4s, v26.8h, v2.8h 3382*c0909341SAndroid Build Coastguard Worker smlal2 v17.4s, v27.8h, v3.8h 3383*c0909341SAndroid Build Coastguard Worker smlal2 v17.4s, v28.8h, v4.8h 3384*c0909341SAndroid Build Coastguard Worker smlal2 v17.4s, v29.8h, v5.8h 3385*c0909341SAndroid Build Coastguard Worker smlal2 v17.4s, v30.8h, v6.8h 3386*c0909341SAndroid Build Coastguard Worker smlal2 v17.4s, v31.8h, v7.8h 3387*c0909341SAndroid Build Coastguard Worker 3388*c0909341SAndroid Build Coastguard Worker mov v24.16b, v25.16b 3389*c0909341SAndroid Build Coastguard Worker mov v25.16b, v26.16b 3390*c0909341SAndroid Build Coastguard Worker sqrshrn v16.4h, v16.4s, #\shift 3391*c0909341SAndroid Build Coastguard Worker mov v26.16b, v27.16b 3392*c0909341SAndroid Build Coastguard Worker sqrshrn2 v16.8h, v17.4s, #\shift 3393*c0909341SAndroid Build Coastguard Worker mov v27.16b, v28.16b 3394*c0909341SAndroid Build Coastguard Worker mov v28.16b, v29.16b 3395*c0909341SAndroid Build Coastguard Worker add v16.8h, v16.8h, v23.8h 3396*c0909341SAndroid Build Coastguard Worker.ifb \t 3397*c0909341SAndroid Build Coastguard Worker sqxtun v16.8b, v16.8h 3398*c0909341SAndroid Build Coastguard Worker.endif 3399*c0909341SAndroid Build Coastguard Worker mov v29.16b, v30.16b 3400*c0909341SAndroid Build Coastguard Worker mov v30.16b, v31.16b 3401*c0909341SAndroid Build Coastguard Worker subs w10, w10, #1 3402*c0909341SAndroid Build Coastguard Worker.ifnb \t 3403*c0909341SAndroid Build Coastguard Worker st1 {v16.8h}, [x0], x1 3404*c0909341SAndroid Build Coastguard Worker.else 3405*c0909341SAndroid Build Coastguard Worker st1 {v16.8b}, [x0], x1 3406*c0909341SAndroid Build Coastguard Worker.endif 3407*c0909341SAndroid Build Coastguard Worker 3408*c0909341SAndroid Build Coastguard Worker add w6, w6, w4 3409*c0909341SAndroid Build Coastguard Worker b.gt 1b 3410*c0909341SAndroid Build Coastguard Worker 3411*c0909341SAndroid Build Coastguard Worker ret x15 3412*c0909341SAndroid Build Coastguard Workerendfunc 3413*c0909341SAndroid Build Coastguard Worker.endm 3414*c0909341SAndroid Build Coastguard Worker 3415*c0909341SAndroid Build Coastguard Workerwarp , 11 3416*c0909341SAndroid Build Coastguard Workerwarp t, 7 3417*c0909341SAndroid Build Coastguard Worker 3418*c0909341SAndroid Build Coastguard Worker// void dav1d_emu_edge_8bpc_neon( 3419*c0909341SAndroid Build Coastguard Worker// const intptr_t bw, const intptr_t bh, 3420*c0909341SAndroid Build Coastguard Worker// const intptr_t iw, const intptr_t ih, 3421*c0909341SAndroid Build Coastguard Worker// const intptr_t x, const intptr_t y, 3422*c0909341SAndroid Build Coastguard Worker// pixel *dst, const ptrdiff_t dst_stride, 3423*c0909341SAndroid Build Coastguard Worker// const pixel *ref, const ptrdiff_t ref_stride) 3424*c0909341SAndroid Build Coastguard Workerfunction emu_edge_8bpc_neon, export=1 3425*c0909341SAndroid Build Coastguard Worker ldp x8, x9, [sp] 3426*c0909341SAndroid Build Coastguard Worker 3427*c0909341SAndroid Build Coastguard Worker // ref += iclip(y, 0, ih - 1) * PXSTRIDE(ref_stride) 3428*c0909341SAndroid Build Coastguard Worker // ref += iclip(x, 0, iw - 1) 3429*c0909341SAndroid Build Coastguard Worker sub x12, x3, #1 // ih - 1 3430*c0909341SAndroid Build Coastguard Worker cmp x5, x3 3431*c0909341SAndroid Build Coastguard Worker sub x13, x2, #1 // iw - 1 3432*c0909341SAndroid Build Coastguard Worker csel x12, x12, x5, ge // min(y, ih - 1) 3433*c0909341SAndroid Build Coastguard Worker cmp x4, x2 3434*c0909341SAndroid Build Coastguard Worker bic x12, x12, x12, asr #63 // max(min(y, ih - 1), 0) 3435*c0909341SAndroid Build Coastguard Worker csel x13, x13, x4, ge // min(x, iw - 1) 3436*c0909341SAndroid Build Coastguard Worker bic x13, x13, x13, asr #63 // max(min(x, iw - 1), 0) 3437*c0909341SAndroid Build Coastguard Worker madd x8, x12, x9, x8 // ref += iclip() * stride 3438*c0909341SAndroid Build Coastguard Worker add x8, x8, x13 // ref += iclip() 3439*c0909341SAndroid Build Coastguard Worker 3440*c0909341SAndroid Build Coastguard Worker // bottom_ext = iclip(y + bh - ih, 0, bh - 1) 3441*c0909341SAndroid Build Coastguard Worker // top_ext = iclip(-y, 0, bh - 1) 3442*c0909341SAndroid Build Coastguard Worker add x10, x5, x1 // y + bh 3443*c0909341SAndroid Build Coastguard Worker neg x5, x5 // -y 3444*c0909341SAndroid Build Coastguard Worker sub x10, x10, x3 // y + bh - ih 3445*c0909341SAndroid Build Coastguard Worker sub x12, x1, #1 // bh - 1 3446*c0909341SAndroid Build Coastguard Worker cmp x10, x1 3447*c0909341SAndroid Build Coastguard Worker bic x5, x5, x5, asr #63 // max(-y, 0) 3448*c0909341SAndroid Build Coastguard Worker csel x10, x10, x12, lt // min(y + bh - ih, bh-1) 3449*c0909341SAndroid Build Coastguard Worker cmp x5, x1 3450*c0909341SAndroid Build Coastguard Worker bic x10, x10, x10, asr #63 // max(min(y + bh - ih, bh-1), 0) 3451*c0909341SAndroid Build Coastguard Worker csel x5, x5, x12, lt // min(max(-y, 0), bh-1) 3452*c0909341SAndroid Build Coastguard Worker 3453*c0909341SAndroid Build Coastguard Worker // right_ext = iclip(x + bw - iw, 0, bw - 1) 3454*c0909341SAndroid Build Coastguard Worker // left_ext = iclip(-x, 0, bw - 1) 3455*c0909341SAndroid Build Coastguard Worker add x11, x4, x0 // x + bw 3456*c0909341SAndroid Build Coastguard Worker neg x4, x4 // -x 3457*c0909341SAndroid Build Coastguard Worker sub x11, x11, x2 // x + bw - iw 3458*c0909341SAndroid Build Coastguard Worker sub x13, x0, #1 // bw - 1 3459*c0909341SAndroid Build Coastguard Worker cmp x11, x0 3460*c0909341SAndroid Build Coastguard Worker bic x4, x4, x4, asr #63 // max(-x, 0) 3461*c0909341SAndroid Build Coastguard Worker csel x11, x11, x13, lt // min(x + bw - iw, bw-1) 3462*c0909341SAndroid Build Coastguard Worker cmp x4, x0 3463*c0909341SAndroid Build Coastguard Worker bic x11, x11, x11, asr #63 // max(min(x + bw - iw, bw-1), 0) 3464*c0909341SAndroid Build Coastguard Worker csel x4, x4, x13, lt // min(max(-x, 0), bw - 1) 3465*c0909341SAndroid Build Coastguard Worker 3466*c0909341SAndroid Build Coastguard Worker // center_h = bh - top_ext - bottom_ext 3467*c0909341SAndroid Build Coastguard Worker // dst += top_ext * PXSTRIDE(dst_stride) 3468*c0909341SAndroid Build Coastguard Worker // center_w = bw - left_ext - right_ext 3469*c0909341SAndroid Build Coastguard Worker sub x1, x1, x5 // bh - top_ext 3470*c0909341SAndroid Build Coastguard Worker madd x6, x5, x7, x6 3471*c0909341SAndroid Build Coastguard Worker sub x2, x0, x4 // bw - left_ext 3472*c0909341SAndroid Build Coastguard Worker sub x1, x1, x10 // center_h = bh - top_ext - bottom_ext 3473*c0909341SAndroid Build Coastguard Worker sub x2, x2, x11 // center_w = bw - left_ext - right_ext 3474*c0909341SAndroid Build Coastguard Worker 3475*c0909341SAndroid Build Coastguard Worker mov x14, x6 // backup of dst 3476*c0909341SAndroid Build Coastguard Worker 3477*c0909341SAndroid Build Coastguard Worker.macro v_loop need_left, need_right 3478*c0909341SAndroid Build Coastguard Worker0: 3479*c0909341SAndroid Build Coastguard Worker.if \need_left 3480*c0909341SAndroid Build Coastguard Worker ld1r {v0.16b}, [x8] 3481*c0909341SAndroid Build Coastguard Worker mov x12, x6 // out = dst 3482*c0909341SAndroid Build Coastguard Worker mov x3, x4 3483*c0909341SAndroid Build Coastguard Worker1: 3484*c0909341SAndroid Build Coastguard Worker subs x3, x3, #16 3485*c0909341SAndroid Build Coastguard Worker st1 {v0.16b}, [x12], #16 3486*c0909341SAndroid Build Coastguard Worker b.gt 1b 3487*c0909341SAndroid Build Coastguard Worker.endif 3488*c0909341SAndroid Build Coastguard Worker mov x13, x8 3489*c0909341SAndroid Build Coastguard Worker add x12, x6, x4 // out = dst + left_ext 3490*c0909341SAndroid Build Coastguard Worker mov x3, x2 3491*c0909341SAndroid Build Coastguard Worker1: 3492*c0909341SAndroid Build Coastguard Worker ld1 {v0.16b, v1.16b}, [x13], #32 3493*c0909341SAndroid Build Coastguard Worker subs x3, x3, #32 3494*c0909341SAndroid Build Coastguard Worker st1 {v0.16b, v1.16b}, [x12], #32 3495*c0909341SAndroid Build Coastguard Worker b.gt 1b 3496*c0909341SAndroid Build Coastguard Worker.if \need_right 3497*c0909341SAndroid Build Coastguard Worker add x3, x8, x2 // in + center_w 3498*c0909341SAndroid Build Coastguard Worker sub x3, x3, #1 // in + center_w - 1 3499*c0909341SAndroid Build Coastguard Worker add x12, x6, x4 // dst + left_ext 3500*c0909341SAndroid Build Coastguard Worker ld1r {v0.16b}, [x3] 3501*c0909341SAndroid Build Coastguard Worker add x12, x12, x2 // out = dst + left_ext + center_w 3502*c0909341SAndroid Build Coastguard Worker mov x3, x11 3503*c0909341SAndroid Build Coastguard Worker1: 3504*c0909341SAndroid Build Coastguard Worker subs x3, x3, #16 3505*c0909341SAndroid Build Coastguard Worker st1 {v0.16b}, [x12], #16 3506*c0909341SAndroid Build Coastguard Worker b.gt 1b 3507*c0909341SAndroid Build Coastguard Worker.endif 3508*c0909341SAndroid Build Coastguard Worker 3509*c0909341SAndroid Build Coastguard Worker subs x1, x1, #1 // center_h-- 3510*c0909341SAndroid Build Coastguard Worker add x6, x6, x7 3511*c0909341SAndroid Build Coastguard Worker add x8, x8, x9 3512*c0909341SAndroid Build Coastguard Worker b.gt 0b 3513*c0909341SAndroid Build Coastguard Worker.endm 3514*c0909341SAndroid Build Coastguard Worker 3515*c0909341SAndroid Build Coastguard Worker cbz x4, 2f 3516*c0909341SAndroid Build Coastguard Worker // need_left 3517*c0909341SAndroid Build Coastguard Worker cbz x11, 3f 3518*c0909341SAndroid Build Coastguard Worker // need_left + need_right 3519*c0909341SAndroid Build Coastguard Worker v_loop 1, 1 3520*c0909341SAndroid Build Coastguard Worker b 5f 3521*c0909341SAndroid Build Coastguard Worker 3522*c0909341SAndroid Build Coastguard Worker2: 3523*c0909341SAndroid Build Coastguard Worker // !need_left 3524*c0909341SAndroid Build Coastguard Worker cbz x11, 4f 3525*c0909341SAndroid Build Coastguard Worker // !need_left + need_right 3526*c0909341SAndroid Build Coastguard Worker v_loop 0, 1 3527*c0909341SAndroid Build Coastguard Worker b 5f 3528*c0909341SAndroid Build Coastguard Worker 3529*c0909341SAndroid Build Coastguard Worker3: 3530*c0909341SAndroid Build Coastguard Worker // need_left + !need_right 3531*c0909341SAndroid Build Coastguard Worker v_loop 1, 0 3532*c0909341SAndroid Build Coastguard Worker b 5f 3533*c0909341SAndroid Build Coastguard Worker 3534*c0909341SAndroid Build Coastguard Worker4: 3535*c0909341SAndroid Build Coastguard Worker // !need_left + !need_right 3536*c0909341SAndroid Build Coastguard Worker v_loop 0, 0 3537*c0909341SAndroid Build Coastguard Worker 3538*c0909341SAndroid Build Coastguard Worker5: 3539*c0909341SAndroid Build Coastguard Worker 3540*c0909341SAndroid Build Coastguard Worker cbz x10, 3f 3541*c0909341SAndroid Build Coastguard Worker // need_bottom 3542*c0909341SAndroid Build Coastguard Worker sub x8, x6, x7 // ref = dst - stride 3543*c0909341SAndroid Build Coastguard Worker mov x4, x0 3544*c0909341SAndroid Build Coastguard Worker1: 3545*c0909341SAndroid Build Coastguard Worker ld1 {v0.16b, v1.16b}, [x8], #32 3546*c0909341SAndroid Build Coastguard Worker mov x3, x10 3547*c0909341SAndroid Build Coastguard Worker2: 3548*c0909341SAndroid Build Coastguard Worker subs x3, x3, #1 3549*c0909341SAndroid Build Coastguard Worker st1 {v0.16b, v1.16b}, [x6], x7 3550*c0909341SAndroid Build Coastguard Worker b.gt 2b 3551*c0909341SAndroid Build Coastguard Worker msub x6, x7, x10, x6 // dst -= bottom_ext * stride 3552*c0909341SAndroid Build Coastguard Worker subs x4, x4, #32 // bw -= 32 3553*c0909341SAndroid Build Coastguard Worker add x6, x6, #32 // dst += 32 3554*c0909341SAndroid Build Coastguard Worker b.gt 1b 3555*c0909341SAndroid Build Coastguard Worker 3556*c0909341SAndroid Build Coastguard Worker3: 3557*c0909341SAndroid Build Coastguard Worker cbz x5, 3f 3558*c0909341SAndroid Build Coastguard Worker // need_top 3559*c0909341SAndroid Build Coastguard Worker msub x6, x7, x5, x14 // dst = stored_dst - top_ext * stride 3560*c0909341SAndroid Build Coastguard Worker1: 3561*c0909341SAndroid Build Coastguard Worker ld1 {v0.16b, v1.16b}, [x14], #32 3562*c0909341SAndroid Build Coastguard Worker mov x3, x5 3563*c0909341SAndroid Build Coastguard Worker2: 3564*c0909341SAndroid Build Coastguard Worker subs x3, x3, #1 3565*c0909341SAndroid Build Coastguard Worker st1 {v0.16b, v1.16b}, [x6], x7 3566*c0909341SAndroid Build Coastguard Worker b.gt 2b 3567*c0909341SAndroid Build Coastguard Worker msub x6, x7, x5, x6 // dst -= top_ext * stride 3568*c0909341SAndroid Build Coastguard Worker subs x0, x0, #32 // bw -= 32 3569*c0909341SAndroid Build Coastguard Worker add x6, x6, #32 // dst += 32 3570*c0909341SAndroid Build Coastguard Worker b.gt 1b 3571*c0909341SAndroid Build Coastguard Worker 3572*c0909341SAndroid Build Coastguard Worker3: 3573*c0909341SAndroid Build Coastguard Worker ret 3574*c0909341SAndroid Build Coastguard Workerendfunc 3575